Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix frame cache to always re-read current frame for accurate line num…
…bers

The frame cache was returning stale line numbers when execution moved
within the same function between samples. By always reading the current
frame from memory and only caching parent frames, we get correct line
numbers while still benefiting from cache hits on the unchanged call
stack. This also skips the expensive stack chunk copying in cache mode
since direct memory reads are cheaper when most frames come from cache
anyway.
  • Loading branch information
pablogsal committed Dec 5, 2025
commit 8df21063b1c0f237236afcbe485dd76979405f1e
4 changes: 2 additions & 2 deletions Lib/profiling/sampling/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def sample(
native=native,
gc=gc,
skip_non_matching_threads=skip_non_matching_threads,
collect_stats=realtime_stats # Collect stats when realtime_stats is enabled
collect_stats=realtime_stats,
)
profiler.realtime_stats = realtime_stats

Expand Down Expand Up @@ -363,7 +363,7 @@ def sample_live(
native=native,
gc=gc,
skip_non_matching_threads=skip_non_matching_threads,
collect_stats=realtime_stats # Collect stats when realtime_stats is enabled
collect_stats=realtime_stats,
)
profiler.realtime_stats = realtime_stats

Expand Down
83 changes: 76 additions & 7 deletions Lib/test/test_external_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2131,7 +2131,12 @@ def _sample_frames(self, client_socket, unwinder, wait_signal, send_ack, require
"Test only runs on Linux with process_vm_readv support",
)
def test_cache_hit_same_stack(self):
"""Test that 3 consecutive samples reuse cached frame objects (identity check)."""
"""Test that consecutive samples reuse cached parent frame objects.

The current frame (index 0) is always re-read from memory to get
updated line numbers, so it may be a different object. Parent frames
(index 1+) should be identical objects from cache.
"""
script_body = """\
def level3():
sock.sendall(b"sync1")
Expand Down Expand Up @@ -2164,11 +2169,71 @@ def level1():
self.assertEqual(len(frames1), len(frames2))
self.assertEqual(len(frames2), len(frames3))

# All frames must be identical objects (cache reuse)
for i, (f1, f2, f3) in enumerate(zip(frames1, frames2, frames3)):
# Current frame (index 0) is always re-read, so check value equality
self.assertEqual(frames1[0].funcname, frames2[0].funcname)
self.assertEqual(frames2[0].funcname, frames3[0].funcname)

# Parent frames (index 1+) must be identical objects (cache reuse)
for i in range(1, len(frames1)):
f1, f2, f3 = frames1[i], frames2[i], frames3[i]
self.assertIs(f1, f2, f"Frame {i}: samples 1-2 must be same object")
self.assertIs(f2, f3, f"Frame {i}: samples 2-3 must be same object")
self.assertIs(f1, f3, f"Frame {i}: samples 1-3 must be same object")

@skip_if_not_supported
@unittest.skipIf(
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
"Test only runs on Linux with process_vm_readv support",
)
def test_line_number_updates_in_same_frame(self):
"""Test that line numbers are correctly updated when execution moves within a function.

When the profiler samples at different points within the same function,
it must report the correct line number for each sample, not stale cached values.
"""
script_body = """\
def outer():
inner()

def inner():
sock.sendall(b"line_a"); sock.recv(16)
sock.sendall(b"line_b"); sock.recv(16)
sock.sendall(b"line_c"); sock.recv(16)
sock.sendall(b"line_d"); sock.recv(16)

outer()
"""

with self._target_process(script_body) as (p, client_socket, make_unwinder):
unwinder = make_unwinder(cache_frames=True)

frames_a = self._sample_frames(client_socket, unwinder, b"line_a", b"ack", {"inner"})
frames_b = self._sample_frames(client_socket, unwinder, b"line_b", b"ack", {"inner"})
frames_c = self._sample_frames(client_socket, unwinder, b"line_c", b"ack", {"inner"})
frames_d = self._sample_frames(client_socket, unwinder, b"line_d", b"done", {"inner"})

self.assertIsNotNone(frames_a)
self.assertIsNotNone(frames_b)
self.assertIsNotNone(frames_c)
self.assertIsNotNone(frames_d)

# Get the 'inner' frame from each sample (should be index 0)
inner_a = frames_a[0]
inner_b = frames_b[0]
inner_c = frames_c[0]
inner_d = frames_d[0]

self.assertEqual(inner_a.funcname, "inner")
self.assertEqual(inner_b.funcname, "inner")
self.assertEqual(inner_c.funcname, "inner")
self.assertEqual(inner_d.funcname, "inner")

# Line numbers must be different and increasing (execution moves forward)
self.assertLess(inner_a.lineno, inner_b.lineno,
"Line B should be after line A")
self.assertLess(inner_b.lineno, inner_c.lineno,
"Line C should be after line B")
self.assertLess(inner_c.lineno, inner_d.lineno,
"Line D should be after line C")

@skip_if_not_supported
@unittest.skipIf(
Expand Down Expand Up @@ -2351,9 +2416,13 @@ def recurse(n):

self.assertEqual(len(frames1), len(frames2))

# All frames should be identical objects (cache reuse)
for i, (f1, f2) in enumerate(zip(frames1, frames2)):
self.assertIs(f1, f2, f"Frame {i}: recursive frames must be same object")
# Current frame (index 0) is re-read, check value equality
self.assertEqual(frames1[0].funcname, frames2[0].funcname)

# Parent frames (index 1+) should be identical objects (cache reuse)
for i in range(1, len(frames1)):
self.assertIs(frames1[i], frames2[i],
f"Frame {i}: recursive frames must be same object")

@skip_if_not_supported
@unittest.skipIf(
Expand Down
6 changes: 5 additions & 1 deletion Modules/_remote_debugging/frame_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ int
frame_cache_init(RemoteUnwinderObject *unwinder)
{
unwinder->frame_cache = PyMem_Calloc(FRAME_CACHE_MAX_THREADS, sizeof(FrameCacheEntry));
return unwinder->frame_cache ? 0 : -1;
if (!unwinder->frame_cache) {
PyErr_NoMemory();
return -1;
}
return 0;
}

void
Expand Down
52 changes: 43 additions & 9 deletions Modules/_remote_debugging/frames.c
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,12 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
return 0;
}

// Fast path: check if we have a full cache hit (entire stack unchanged)
// Returns: 1 if full hit (frame_info populated), 0 if miss, -1 on error
// Fast path: check if we have a full cache hit (parent stack unchanged)
// A "full hit" means current frame == last profiled frame, so we can reuse
// cached parent frames. We always read the current frame from memory to get
// updated line numbers (the line within a frame can change between samples).
// Returns: 1 if full hit (frame_info populated with current frame + cached parents),
// 0 if miss, -1 on error
static int
try_full_cache_hit(
RemoteUnwinderObject *unwinder,
Expand All @@ -463,14 +467,44 @@ try_full_cache_hit(
return 0;
}

// Full hit! Extend frame_info with entire cached list
Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
int result = PyList_SetSlice(frame_info, cur_size, cur_size, entry->frame_list);
if (result >= 0) {
STATS_INC(unwinder, frame_cache_hits);
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(entry->frame_list));
// Always read the current frame from memory to get updated line number
PyObject *current_frame = NULL;
uintptr_t code_object_addr = 0;
uintptr_t previous_frame = 0;
int parse_result = parse_frame_object(unwinder, &current_frame, frame_addr,
&code_object_addr, &previous_frame);
if (parse_result < 0) {
return -1;
}
return result < 0 ? -1 : 1;

// Add current frame if valid
if (current_frame != NULL) {
if (PyList_Append(frame_info, current_frame) < 0) {
Py_DECREF(current_frame);
return -1;
}
Py_DECREF(current_frame);
STATS_ADD(unwinder, frames_read_from_memory, 1);
}

// Extend with cached parent frames (from index 1 onwards, skipping the current frame)
Py_ssize_t cached_size = PyList_GET_SIZE(entry->frame_list);
if (cached_size > 1) {
PyObject *parent_slice = PyList_GetSlice(entry->frame_list, 1, cached_size);
if (!parent_slice) {
return -1;
}
Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
int result = PyList_SetSlice(frame_info, cur_size, cur_size, parent_slice);
Py_DECREF(parent_slice);
if (result < 0) {
return -1;
}
STATS_ADD(unwinder, frames_read_from_cache, cached_size - 1);
}

STATS_INC(unwinder, frame_cache_hits);
return 1;
}

// High-level helper: collect frames with cache optimization
Expand Down
3 changes: 1 addition & 2 deletions Modules/_remote_debugging/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
#ifdef Py_GIL_DISABLED
if (only_active_thread) {
PyErr_SetString(PyExc_ValueError,
"only_active_thread is not supported when Py_GIL_DISABLED is not defined");
"only_active_thread is not supported in free-threaded builds");
return -1;
}
#endif
Expand Down Expand Up @@ -389,7 +389,6 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
#endif

if (cache_frames && frame_cache_init(self) < 0) {
PyErr_NoMemory();
return -1;
}

Expand Down
9 changes: 6 additions & 3 deletions Modules/_remote_debugging/threads.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,12 @@ unwind_stack_for_thread(
goto error;
}

if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks");
goto error;
// In cache mode, copying stack chunks is more expensive than direct memory reads
if (!unwinder->cache_frames) {
if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks");
goto error;
}
}

if (unwinder->cache_frames) {
Expand Down
Loading