From 51f0db11433793f839699e6afa0a4e90de9103fa Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Fri, 13 Mar 2026 12:01:41 +0000 Subject: [PATCH] First stab at having the GC scan the shadow stack. This comes with the caveat that each thread scans all other threads shadow stacks, which may not be completely safe(?). With this, and recent ykllvm/yk changes, this mandelbrot benchmark now runs: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/fannkuchredux-python3-6.html (If you run with YKD_SERIALISE_COMPILATION=1 however, I get an unimplemented ptrtoint constant expression -- this isn't hard to implement) --- ports/unix/main.c | 8 ++++++++ shared/runtime/gchelper_generic.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/ports/unix/main.c b/ports/unix/main.c index 9e9704aa801db..3cb1e3bbdbb1c 100644 --- a/ports/unix/main.c +++ b/ports/unix/main.c @@ -57,6 +57,10 @@ #include "stack_size.h" #include "shared/runtime/pyexec.h" +#ifdef USE_YK +#include +#endif + // Command line options, with their defaults bool mp_compile_only = false; static uint emit_opt = MP_EMIT_OPT_NONE; @@ -443,6 +447,10 @@ int main(int argc, char **argv) { // Define a reasonable stack limit to detect stack overflow. mp_uint_t stack_size = 40000 * UNIX_STACK_MULTIPLIER; +#ifdef USE_YK + yk_init(); +#endif + // We should capture stack top ASAP after start, and it should be // captured guaranteedly before any other stack variables are allocated. // For this, actual main (renamed main_) should not be inlined into diff --git a/shared/runtime/gchelper_generic.c b/shared/runtime/gchelper_generic.c index 45b2e4f7d848a..11ba11bdb3d71 100644 --- a/shared/runtime/gchelper_generic.c +++ b/shared/runtime/gchelper_generic.c @@ -206,6 +206,12 @@ static void gc_helper_get_regs(gc_helper_regs_t arr) { #endif // MICROPY_GCREGS_SETJMP +#ifdef USE_YK +void gc_collect_shadowstack(void *start, void *end) { + gc_collect_root(start, ((intptr_t) end - (intptr_t) start) / sizeof(void *)); +} +#endif + // Explicitly mark this as noinline to make sure the regs variable // is effectively at the top of the stack: otherwise, in builds where // LTO is enabled and a lot of inlining takes place we risk a stack @@ -217,6 +223,16 @@ MP_NOINLINE void gc_helper_collect_regs_and_stack(void) { // GC stack (and regs because we captured them) void **regs_ptr = (void **)(void *)®s; gc_collect_root(regs_ptr, ((uintptr_t)MP_STATE_THREAD(stack_top) - (uintptr_t)®s) / sizeof(uintptr_t)); + +#ifdef USE_YK + // Now scan the shadow stack. + // + // FIXME: this will scan the shadow stacks of *all* threads, but we should + // probably only scan the current thread's shadow stack here. yk doesn't + // currently have provide an API (e.g. a `yk_curthread_sstack_bounds()`) + // that could enable this. + yk_foreach_shadowstack(gc_collect_shadowstack); +#endif } #endif // MICROPY_ENABLE_GC