Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rework the cache
  • Loading branch information
pablogsal committed Dec 1, 2025
commit cf15c90f079e4763d1e326361e19b20c54145560
55 changes: 55 additions & 0 deletions Lib/test/test_external_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2616,6 +2616,61 @@ def level1():
self.assertEqual(len(frames1), len(frames2),
"New unwinder should return complete stack despite stale last_profiled_frame")

@skip_if_not_supported
@unittest.skipIf(
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
"Test only runs on Linux with process_vm_readv support",
)
def test_cache_exhaustion(self):
"""Test cache works when frame limit (1024) is exceeded.

FRAME_CACHE_MAX_FRAMES=1024. With 1100 recursive frames,
the cache can't store all of them but should still work.
"""
# Use 1100 to exceed FRAME_CACHE_MAX_FRAMES=1024
depth = 1100
script_body = f"""\
import sys
sys.setrecursionlimit(2000)

def recurse(n):
if n <= 0:
sock.sendall(b"ready")
sock.recv(16) # wait for ack
sock.sendall(b"ready2")
sock.recv(16) # wait for done
return
recurse(n - 1)

recurse({depth})
"""

with self._target_process(script_body) as (p, client_socket, make_unwinder):
unwinder_cache = make_unwinder(cache_frames=True)
unwinder_no_cache = make_unwinder(cache_frames=False)

frames_cached = self._sample_frames(
client_socket, unwinder_cache, b"ready", b"ack", {"recurse"}
)
# Sample again with no cache for comparison
frames_no_cache = self._sample_frames(
client_socket, unwinder_no_cache, b"ready2", b"done", {"recurse"}
)

self.assertIsNotNone(frames_cached)
self.assertIsNotNone(frames_no_cache)

# Both should have many recurse frames (> 1024 limit)
cached_count = [f.funcname for f in frames_cached].count("recurse")
no_cache_count = [f.funcname for f in frames_no_cache].count("recurse")

self.assertGreater(cached_count, 1000, "Should have >1000 recurse frames")
self.assertGreater(no_cache_count, 1000, "Should have >1000 recurse frames")

# Both modes should produce same frame count
self.assertEqual(len(frames_cached), len(frames_no_cache),
"Cache exhaustion should not affect stack completeness")


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion Modules/Setup.stdlib.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
@MODULE__PICKLE_TRUE@_pickle _pickle.c
@MODULE__QUEUE_TRUE@_queue _queuemodule.c
@MODULE__RANDOM_TRUE@_random _randommodule.c
@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/threads.c _remote_debugging/asyncio.c
@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c
@MODULE__STRUCT_TRUE@_struct _struct.c

# build supports subinterpreters
Expand Down
23 changes: 20 additions & 3 deletions Modules/_remote_debugging/_remote_debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ typedef struct {
uintptr_t addr_code_adaptive;
} CachedCodeMetadata;

/* Frame cache constants and types */
#define FRAME_CACHE_MAX_THREADS 32
#define FRAME_CACHE_MAX_FRAMES 1024

typedef struct {
uint64_t thread_id; // 0 = empty slot
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
Py_ssize_t num_addrs;
PyObject *frame_list; // owned reference, NULL if empty
} FrameCacheEntry;

typedef struct {
PyTypeObject *RemoteDebugging_Type;
PyTypeObject *TaskInfo_Type;
Expand Down Expand Up @@ -196,8 +207,9 @@ typedef struct {
int native;
int gc;
int cache_frames;
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
RemoteDebuggingState *cached_state;
PyObject *frame_cache; // dict: thread_id -> list of (addr, frame_info)
FrameCacheEntry *frame_cache; // preallocated array of FRAME_CACHE_MAX_THREADS entries
#ifdef Py_GIL_DISABLED
uint32_t tlbc_generation;
_Py_hashtable_t *tlbc_cache;
Expand Down Expand Up @@ -368,20 +380,25 @@ extern int process_frame_chain(
uintptr_t gc_frame,
uintptr_t last_profiled_frame,
int *stopped_at_cached_frame,
PyObject *frame_addresses // optional: list to receive frame addresses
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs
);

/* Frame cache functions */
extern int frame_cache_init(RemoteUnwinderObject *unwinder);
extern void frame_cache_cleanup(RemoteUnwinderObject *unwinder);
extern FrameCacheEntry *frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id);
extern int clear_last_profiled_frames(RemoteUnwinderObject *unwinder);
extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result);
extern int frame_cache_lookup_and_extend(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
uintptr_t last_profiled_frame,
PyObject *frame_info,
PyObject *frame_addresses); // optional: list to extend with cached addresses
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs);
extern int frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
Expand Down
225 changes: 225 additions & 0 deletions Modules/_remote_debugging/frame_cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/******************************************************************************
* Remote Debugging Module - Frame Cache
*
* This file contains functions for caching frame information to optimize
* repeated stack unwinding for profiling.
******************************************************************************/

#include "_remote_debugging.h"

/* ============================================================================
* FRAME CACHE - stores (address, frame_info) pairs per thread
* Uses preallocated fixed-size arrays for efficiency and bounded memory.
* ============================================================================ */

int
frame_cache_init(RemoteUnwinderObject *unwinder)
{
unwinder->frame_cache = PyMem_Calloc(FRAME_CACHE_MAX_THREADS, sizeof(FrameCacheEntry));
return unwinder->frame_cache ? 0 : -1;
}

void
frame_cache_cleanup(RemoteUnwinderObject *unwinder)
{
if (!unwinder->frame_cache) {
return;
}
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
Py_CLEAR(unwinder->frame_cache[i].frame_list);
}
PyMem_Free(unwinder->frame_cache);
unwinder->frame_cache = NULL;
}

// Find cache entry by thread_id
FrameCacheEntry *
frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id)
{
if (!unwinder->frame_cache || thread_id == 0) {
return NULL;
}
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == thread_id) {
return &unwinder->frame_cache[i];
}
}
return NULL;
}

// Allocate a cache slot for a thread
// Returns NULL if cache is full (graceful degradation)
static FrameCacheEntry *
frame_cache_alloc_slot(RemoteUnwinderObject *unwinder, uint64_t thread_id)
{
if (!unwinder->frame_cache || thread_id == 0) {
return NULL;
}
// First check if thread already has an entry
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == thread_id) {
return &unwinder->frame_cache[i];
}
}
// Find empty slot
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == 0) {
return &unwinder->frame_cache[i];
}
}
// Cache full - graceful degradation
return NULL;
}

// Remove cache entries for threads not seen in the result
// result structure: list of InterpreterInfo, where InterpreterInfo[1] is threads list,
// and ThreadInfo[0] is the thread_id
void
frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result)
{
if (!unwinder->frame_cache || !result || !PyList_Check(result)) {
return;
}

// Build array of seen thread IDs from result
uint64_t seen_threads[FRAME_CACHE_MAX_THREADS];
int num_seen = 0;

Py_ssize_t num_interps = PyList_GET_SIZE(result);
for (Py_ssize_t i = 0; i < num_interps && num_seen < FRAME_CACHE_MAX_THREADS; i++) {
PyObject *interp_info = PyList_GET_ITEM(result, i);
PyObject *threads = PyStructSequence_GetItem(interp_info, 1);
if (!threads || !PyList_Check(threads)) {
continue;
}
Py_ssize_t num_threads = PyList_GET_SIZE(threads);
for (Py_ssize_t j = 0; j < num_threads && num_seen < FRAME_CACHE_MAX_THREADS; j++) {
PyObject *thread_info = PyList_GET_ITEM(threads, j);
PyObject *tid_obj = PyStructSequence_GetItem(thread_info, 0);
if (tid_obj) {
uint64_t tid = PyLong_AsUnsignedLongLong(tid_obj);
if (!PyErr_Occurred()) {
seen_threads[num_seen++] = tid;
} else {
PyErr_Clear();
}
}
}
}

// Invalidate entries not in seen list
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
if (unwinder->frame_cache[i].thread_id == 0) {
continue;
}
int found = 0;
for (int j = 0; j < num_seen; j++) {
if (unwinder->frame_cache[i].thread_id == seen_threads[j]) {
found = 1;
break;
}
}
if (!found) {
// Clear this entry
Py_CLEAR(unwinder->frame_cache[i].frame_list);
unwinder->frame_cache[i].thread_id = 0;
unwinder->frame_cache[i].num_addrs = 0;
}
}
}

// Find last_profiled_frame in cache and extend frame_info with cached continuation
// If frame_addrs is provided (not NULL), also extends it with cached addresses
int
frame_cache_lookup_and_extend(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
uintptr_t last_profiled_frame,
PyObject *frame_info,
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs)
{
if (!unwinder->frame_cache || last_profiled_frame == 0) {
return 0;
}

FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
if (!entry || !entry->frame_list) {
return 0;
}

// Find the index where last_profiled_frame matches
Py_ssize_t start_idx = -1;
for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
if (entry->addrs[i] == last_profiled_frame) {
start_idx = i;
break;
}
}

if (start_idx < 0) {
return 0; // Not found
}

Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);

// Extend frame_info with frames from start_idx onwards
PyObject *slice = PyList_GetSlice(entry->frame_list, start_idx, num_frames);
if (!slice) {
return -1;
}

Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
int result = PyList_SetSlice(frame_info, cur_size, cur_size, slice);
Py_DECREF(slice);

if (result < 0) {
return -1;
}

// Also extend frame_addrs with cached addresses if provided
if (frame_addrs) {
for (Py_ssize_t i = start_idx; i < entry->num_addrs && *num_addrs < max_addrs; i++) {
frame_addrs[(*num_addrs)++] = entry->addrs[i];
}
}

return 1;
}

// Store frame list with addresses in cache
int
frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
Py_ssize_t num_addrs)
{
if (!unwinder->frame_cache || thread_id == 0) {
return 0;
}

// Clamp to max frames
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
num_addrs = FRAME_CACHE_MAX_FRAMES;
}

FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id);
if (!entry) {
// Cache full - graceful degradation
return 0;
}

// Clear old frame_list if replacing
Py_CLEAR(entry->frame_list);

// Store data
entry->thread_id = thread_id;
memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t));
entry->num_addrs = num_addrs;
entry->frame_list = Py_NewRef(frame_list);

return 0;
}
Loading