diff --git a/CMakeLists.txt b/CMakeLists.txt index 1160504..38ea1fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ option(ELIO_ENABLE_HTTP "Enable HTTP client/server support (requires TLS)" ${ELI option(ELIO_ENABLE_HTTP2 "Enable HTTP/2 support (requires nghttp2)" ${ELIO_IS_TOP_LEVEL}) option(ELIO_ENABLE_DEVELOPER_WARNINGS "Enable strict warning flags for Elio tests/examples" ${ELIO_IS_TOP_LEVEL}) option(ELIO_WARNINGS_AS_ERRORS "Treat warnings as errors for Elio tests/examples" ${ELIO_IS_TOP_LEVEL}) +option(ELIO_ENABLE_DEBUG_METADATA "Enable coroutine debug metadata" ON) # Platform check - Linux only if(NOT UNIX OR APPLE) @@ -34,11 +35,18 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # Header-only library interface add_library(elio INTERFACE) -target_include_directories(elio INTERFACE +target_include_directories(elio INTERFACE $ $ ) +# Debug metadata option +if(ELIO_ENABLE_DEBUG_METADATA) + target_compile_definitions(elio INTERFACE ELIO_ENABLE_DEBUG_METADATA=1) +else() + target_compile_definitions(elio INTERFACE ELIO_ENABLE_DEBUG_METADATA=0) +endif() + # Dependencies via FetchContent include(FetchContent) diff --git a/include/elio/coro/frame_allocator.hpp b/include/elio/coro/frame_allocator.hpp index 88177d1..f98d93a 100644 --- a/include/elio/coro/frame_allocator.hpp +++ b/include/elio/coro/frame_allocator.hpp @@ -24,18 +24,21 @@ namespace elio::coro { /// Thread-local free-list based frame allocator for small coroutine frames /// Dramatically reduces allocation overhead for frequently created/destroyed coroutines /// -/// Design: Each allocated frame has a hidden header storing the source pool ID. +/// Design: Each allocated frame has a hidden header storing the source pool ID and size class. /// When deallocated on a different thread, the frame is returned via an MPSC queue /// to its source pool. This handles work-stealing scenarios where coroutines /// are allocated on thread A but deallocated on thread B. /// +/// Size Classes: Multiple pools for different frame sizes (32, 64, 128, 256 bytes) +/// reduce memory waste for small frames while maintaining allocation performance. +/// /// Note: Under sanitizers, pooling is disabled to allow proper leak/error detection. class frame_allocator { public: - // Support frames up to 256 bytes (covers most simple tasks) - // Actual allocation includes header, so user-visible size is MAX_FRAME_SIZE - static constexpr size_t MAX_FRAME_SIZE = 256; - static constexpr size_t POOL_SIZE = 1024; + // Size classes for different frame sizes + static constexpr size_t SIZE_CLASSES[] = {32, 64, 128, 256}; + static constexpr size_t NUM_SIZE_CLASSES = 4; + static constexpr size_t POOL_SIZE = 512; // Per size class static constexpr size_t REMOTE_QUEUE_BATCH = 64; // Process remote returns in batches // Detect sanitizers: GCC uses __SANITIZE_*, Clang uses __has_feature @@ -58,45 +61,48 @@ class frame_allocator { } #else static void* allocate(size_t size) { - if (size <= MAX_FRAME_SIZE) { + size_t sc = find_size_class(size); + if (sc < NUM_SIZE_CLASSES) { auto& alloc = instance(); // First try to reclaim remote returns periodically alloc.reclaim_remote_returns(); - if (alloc.free_count_ > 0) { - void* block = alloc.pool_[--alloc.free_count_]; + if (alloc.free_count_[sc] > 0) { + void* block = alloc.pool_[sc][--alloc.free_count_[sc]]; // Update header to reflect current pool ownership - // This is important because blocks may have been returned from remote threads auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; + header->size_class = static_cast(sc); return block_to_user(block); } // Allocate new block with header - void* block = ::operator new(ALLOC_BLOCK_SIZE); + void* block = ::operator new(alloc_block_size(sc)); auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; + header->size_class = static_cast(sc); header->next.store(nullptr, std::memory_order_relaxed); return block_to_user(block); } - // Fall back to standard allocation for large frames (no header) + // Fall back to standard allocation for large frames return ::operator new(size); } static void deallocate(void* ptr, size_t size) noexcept { - if (size <= MAX_FRAME_SIZE) { + size_t sc = find_size_class(size); + if (sc < NUM_SIZE_CLASSES) { void* block = user_to_block(ptr); auto* header = static_cast(block); auto& alloc = instance(); // Fast path: same thread - return directly to local pool if (header->source_pool_id == alloc.pool_id_) { - if (alloc.free_count_ < POOL_SIZE) { - alloc.pool_[alloc.free_count_++] = block; + if (alloc.free_count_[sc] < POOL_SIZE) { + alloc.pool_[sc][alloc.free_count_[sc]++] = block; return; } - // Pool full, delete the block (not the user pointer!) + // Pool full, delete the block ::operator delete(block); return; } else { @@ -119,13 +125,33 @@ class frame_allocator { private: // Block header stored before user data struct block_header { - uint32_t source_pool_id; // ID of the pool that allocated this block - std::atomic next; // For MPSC queue linkage + uint32_t source_pool_id; // ID of the pool that allocated this block + uint8_t size_class; // Size class index (0-3) + std::atomic next; // For MPSC queue linkage }; - // Total block size including header, aligned for user data + // Header size static constexpr size_t HEADER_SIZE = sizeof(block_header); - static constexpr size_t ALLOC_BLOCK_SIZE = HEADER_SIZE + MAX_FRAME_SIZE; + + // Find size class index for requested size + static size_t find_size_class(size_t size) noexcept { + for (size_t i = 0; i < NUM_SIZE_CLASSES; ++i) { + if (size <= SIZE_CLASSES[i]) { + return i; + } + } + return NUM_SIZE_CLASSES; // Not found (for sizes > 256) + } + + // Get actual size for a size class + static size_t size_class_size(size_t idx) noexcept { + return SIZE_CLASSES[idx]; + } + + // Total block size including header for a given size class + static size_t alloc_block_size(size_t size_class_idx) noexcept { + return HEADER_SIZE + SIZE_CLASSES[size_class_idx]; + } // Convert between block (with header) and user pointer static void* block_to_user(void* block) noexcept { @@ -137,10 +163,17 @@ class frame_allocator { } frame_allocator() - : free_count_(0) - , pool_id_(next_pool_id_.fetch_add(1, std::memory_order_relaxed)) - , remote_head_{0, {nullptr}} // Initialize dummy head: pool_id=0, next=nullptr + : pool_id_(next_pool_id_.fetch_add(1, std::memory_order_relaxed)) + , remote_head_{} , remote_tail_(&remote_head_) { + // Initialize remote_head_ fields after default construction + remote_head_.source_pool_id = 0; + remote_head_.size_class = 0; + remote_head_.next.store(nullptr, std::memory_order_relaxed); + // Initialize free counts to 0 + for (size_t i = 0; i < NUM_SIZE_CLASSES; ++i) { + free_count_[i] = 0; + } // Register this pool for cross-thread access register_pool(this); } @@ -153,8 +186,10 @@ class frame_allocator { reclaim_all_remote_returns(); // Free all cached frames when thread exits - for (size_t i = 0; i < free_count_; ++i) { - ::operator delete(pool_[i]); + for (size_t sc = 0; sc < NUM_SIZE_CLASSES; ++sc) { + for (size_t i = 0; i < free_count_[sc]; ++i) { + ::operator delete(pool_[sc][i]); + } } } @@ -168,14 +203,14 @@ class frame_allocator { prev->next.store(header, std::memory_order_release); } - // Called by owner thread to reclaim remote returns + // Called by owner thread to reclaim remote returns for all size classes void reclaim_remote_returns() noexcept { // Quick check without full synchronization block_header* head = remote_head_.next.load(std::memory_order_acquire); if (!head) return; size_t count = 0; - while (head && count < REMOTE_QUEUE_BATCH && free_count_ < POOL_SIZE) { + while (head && count < REMOTE_QUEUE_BATCH) { block_header* next = head->next.load(std::memory_order_acquire); // If next is null but tail points elsewhere, the producer is in the @@ -193,10 +228,21 @@ class frame_allocator { if (!next) break; } - pool_[free_count_++] = head; - remote_head_.next.store(next, std::memory_order_release); + // Add to appropriate size class pool + size_t sc = head->size_class; + if (sc < NUM_SIZE_CLASSES && free_count_[sc] < POOL_SIZE) { + pool_[sc][free_count_[sc]++] = head; + remote_head_.next.store(next, std::memory_order_release); + ++count; + } else if (sc >= NUM_SIZE_CLASSES) { + // Invalid size class - delete the block + ::operator delete(head); + remote_head_.next.store(next, std::memory_order_release); + } else { + // Pool full - leave it in the queue for later + break; + } head = next; - ++count; } } @@ -218,8 +264,9 @@ class frame_allocator { if (!next) break; } - if (free_count_ < POOL_SIZE) { - pool_[free_count_++] = head; + size_t sc = head->size_class; + if (sc < NUM_SIZE_CLASSES && free_count_[sc] < POOL_SIZE) { + pool_[sc][free_count_[sc]++] = head; } else { ::operator delete(head); } @@ -267,8 +314,8 @@ class frame_allocator { return nullptr; } - std::array pool_; - size_t free_count_; + std::array, NUM_SIZE_CLASSES> pool_; + std::array free_count_; uint32_t pool_id_; // MPSC queue for remote returns (dummy head node pattern) diff --git a/include/elio/coro/promise_base.hpp b/include/elio/coro/promise_base.hpp index 669744e..113c296 100644 --- a/include/elio/coro/promise_base.hpp +++ b/include/elio/coro/promise_base.hpp @@ -72,7 +72,7 @@ class id_allocator { /// Base class for all coroutine promise types /// Implements lightweight virtual stack tracking via thread-local intrusive list /// -/// Debug support: +/// Debug support (when ELIO_ENABLE_DEBUG_METADATA=1): /// - Each frame has a unique ID for identification /// - Source location can be set for debugging /// - State tracking (created/running/suspended/completed/failed) @@ -88,14 +88,16 @@ class promise_base { promise_base() noexcept : frame_magic_(FRAME_MAGIC) , parent_(current_frame_) +#if ELIO_ENABLE_DEBUG_METADATA , debug_state_(coroutine_state::created) , debug_worker_id_(static_cast(-1)) , debug_id_(0) // Lazy allocation - only allocated when id() is called +#endif , affinity_(NO_AFFINITY) { current_frame_ = this; } - + ~promise_base() noexcept { current_frame_ = parent_; } @@ -107,13 +109,15 @@ class promise_base { void unhandled_exception() noexcept { exception_ = std::current_exception(); +#if ELIO_ENABLE_DEBUG_METADATA debug_state_ = coroutine_state::failed; +#endif } - + [[nodiscard]] std::exception_ptr exception() const noexcept { return exception_; } - + [[nodiscard]] promise_base* parent() const noexcept { return parent_; } @@ -122,7 +126,8 @@ class promise_base { return current_frame_; } - // Debug accessors + // Debug accessors (available only when debug metadata is enabled) +#if ELIO_ENABLE_DEBUG_METADATA [[nodiscard]] uint64_t frame_magic() const noexcept { return frame_magic_; } [[nodiscard]] const debug_location& location() const noexcept { return debug_location_; } [[nodiscard]] coroutine_state state() const noexcept { return debug_state_; } @@ -149,39 +154,55 @@ class promise_base { void set_worker_id(uint32_t id) noexcept { debug_worker_id_ = id; } +#else + // Stub accessors when debug metadata is disabled + [[nodiscard]] uint64_t frame_magic() const noexcept { return frame_magic_; } + [[nodiscard]] uint64_t id() noexcept { return 0; } + [[nodiscard]] uint32_t worker_id() const noexcept { return static_cast(-1); } + [[nodiscard]] coroutine_state state() const noexcept { return coroutine_state::running; } + [[nodiscard]] const debug_location& location() const noexcept { + static const debug_location empty{}; + return empty; + } + void set_location(const char*, const char*, uint32_t) noexcept {} + void set_state(coroutine_state) noexcept {} + void set_worker_id(uint32_t) noexcept {} +#endif // Affinity accessors /// Get the current thread affinity for this vthread /// @return Worker ID this vthread is bound to, or NO_AFFINITY if unbound [[nodiscard]] size_t affinity() const noexcept { return affinity_; } - + /// Set thread affinity for this vthread /// @param worker_id Worker ID to bind to, or NO_AFFINITY to clear void set_affinity(size_t worker_id) noexcept { affinity_ = worker_id; } - + /// Check if this vthread has affinity set [[nodiscard]] bool has_affinity() const noexcept { return affinity_ != NO_AFFINITY; } - + /// Clear thread affinity, allowing this vthread to migrate freely void clear_affinity() noexcept { affinity_ = NO_AFFINITY; } private: // Magic number at start for debugger validation uint64_t frame_magic_; - + // Virtual stack tracking promise_base* parent_; std::exception_ptr exception_; - - // Debug metadata + +#if ELIO_ENABLE_DEBUG_METADATA + // Debug metadata (conditionally compiled) debug_location debug_location_; coroutine_state debug_state_; uint32_t debug_worker_id_; uint64_t debug_id_; - +#endif + // Thread affinity: NO_AFFINITY means can migrate freely size_t affinity_; - + static inline thread_local promise_base* current_frame_ = nullptr; }; diff --git a/include/elio/coro/task.hpp b/include/elio/coro/task.hpp index 212f593..ceff525 100644 --- a/include/elio/coro/task.hpp +++ b/include/elio/coro/task.hpp @@ -171,26 +171,19 @@ class join_handle { public: explicit join_handle(std::shared_ptr> state) noexcept : state_(std::move(state)) {} - + join_handle(join_handle&&) noexcept = default; join_handle& operator=(join_handle&&) noexcept = default; - + join_handle(const join_handle&) = delete; join_handle& operator=(const join_handle&) = delete; - + [[nodiscard]] bool await_ready() const noexcept { return state_->is_completed(); } bool await_suspend(std::coroutine_handle<> awaiter) noexcept { // Keep a local copy of the shared_ptr to prevent use-after-free. - // Without this, a race can occur: - // 1. set_waiter() stores the awaiter and checks completed_ - // 2. Meanwhile, complete() is called, which schedules the awaiter - // 3. The awaiter runs on another thread, finishes, and destroys this join_handle - // 4. The last shared_ptr ref is gone, join_state is destroyed - // 5. set_waiter() tries to access destroyed memory - // Holding a local shared_ptr ensures join_state outlives set_waiter(). auto state = state_; return state->set_waiter(awaiter); } @@ -198,7 +191,7 @@ class join_handle { T await_resume() { return state_->get_value(); } - + /// Check if the spawned task has completed [[nodiscard]] bool is_ready() const noexcept { return state_->is_completed(); @@ -252,6 +245,8 @@ class task { std::optional value_; std::coroutine_handle<> continuation_; bool detached_ = false; + // Join state for spawn() - only used when task is spawned + std::shared_ptr> join_state_; promise_type() noexcept = default; @@ -265,13 +260,25 @@ class task { template void return_value(U&& value) { value_.emplace(std::forward(value)); + // Notify join state if present + if (join_state_) { + join_state_->set_value(std::move(*value_)); + } + } + + void unhandled_exception() noexcept { + promise_base::unhandled_exception(); + // Notify join state if present + if (join_state_) { + join_state_->set_exception(exception()); + } } // Custom allocator for coroutine frames void* operator new(size_t size) { return frame_allocator::allocate(size); } - + void operator delete(void* ptr, size_t size) noexcept { frame_allocator::deallocate(ptr, size); } @@ -337,6 +344,8 @@ class task { struct promise_type : promise_base { std::coroutine_handle<> continuation_; bool detached_ = false; + // Join state for spawn() - only used when task is spawned + std::shared_ptr> join_state_; promise_type() noexcept = default; @@ -347,13 +356,26 @@ class task { [[nodiscard]] std::suspend_always initial_suspend() noexcept { return {}; } [[nodiscard]] detail::final_awaiter final_suspend() noexcept { return {}; } - void return_void() noexcept {} + void return_void() noexcept { + // Notify join state if present + if (join_state_) { + join_state_->set_value(); + } + } + + void unhandled_exception() noexcept { + promise_base::unhandled_exception(); + // Notify join state if present + if (join_state_) { + join_state_->set_exception(exception()); + } + } // Custom allocator for coroutine frames void* operator new(size_t size) { return frame_allocator::allocate(size); } - + void operator delete(void* ptr, size_t size) noexcept { frame_allocator::deallocate(ptr, size); } @@ -411,37 +433,21 @@ class task { handle_type handle_; }; -namespace detail { - -/// Wrapper task that forwards result to join_state -template -task join_wrapper(task t, std::shared_ptr> state) { - try { - if constexpr (std::is_void_v) { - co_await std::move(t); - state->set_value(); - } else { - T result = co_await std::move(t); - state->set_value(std::move(result)); - } - } catch (...) { - state->set_exception(std::current_exception()); - } -} - -} // namespace detail - // Out-of-line definitions for spawn() methods template join_handle task::spawn() { + // Create join state and attach to task's promise auto state = std::make_shared>(); - detail::join_wrapper(std::move(*this), state).go(); + handle_.promise().join_state_ = state; + // Release and schedule - the promise will notify join state on completion + runtime::schedule_handle(release()); return join_handle(std::move(state)); } inline join_handle task::spawn() { auto state = std::make_shared>(); - detail::join_wrapper(std::move(*this), state).go(); + handle_.promise().join_state_ = state; + runtime::schedule_handle(release()); return join_handle(std::move(state)); } diff --git a/tools/elio-gdb.py b/tools/elio-gdb.py index 155fe64..cf7f0fc 100755 --- a/tools/elio-gdb.py +++ b/tools/elio-gdb.py @@ -16,6 +16,9 @@ elio stats - Show scheduler statistics Works with both live processes and coredumps. + +Note: Debug metadata (location, state, worker_id, id) is only available +when ELIO_ENABLE_DEBUG_METADATA=1 (default for Debug builds). """ import gdb @@ -32,6 +35,16 @@ 4: "failed" } +# Check if debug metadata is enabled in the build +def is_debug_metadata_enabled(): + """Check if ELIO_ENABLE_DEBUG_METADATA is enabled.""" + try: + # Try to find the macro definition in compile commands or symbols + # This is a best-effort check - we'll also detect at runtime + return True # Default assume enabled, detect via missing data below + except: + return False + def read_atomic(val): """Read value from std::atomic.""" @@ -96,76 +109,100 @@ def read_cstring(addr): def get_frame_from_handle(handle_addr): - """Extract promise_base info from a coroutine handle address.""" + """Extract promise_base info from a coroutine handle address. + + Note: Debug metadata fields (location, state, worker_id, id) are only + available when ELIO_ENABLE_DEBUG_METADATA=1. When disabled, these fields + will return default/None values. + """ if handle_addr == 0: return None - + try: # Coroutine frame layout (typical): # - resume function pointer - # - destroy function pointer + # - destroy function pointer # - promise object - + # Read pointer size ptr_size = gdb.lookup_type("void").pointer().sizeof - + # Promise is typically at offset 2*ptr_size (after resume and destroy) promise_addr = handle_addr + 2 * ptr_size - + # Read magic to validate inferior = gdb.selected_inferior() magic_bytes = inferior.read_memory(promise_addr, 8) magic = int.from_bytes(bytes(magic_bytes), 'little') - + if magic != FRAME_MAGIC: return None - - # Read promise_base fields - # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) - + + # Read promise_base fields (layout varies based on ELIO_ENABLE_DEBUG_METADATA) + # Base layout (always present): magic(8) + parent(8) + exception(16) + # Debug layout (when enabled): + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) + # Affinity: size_t (8 bytes on 64-bit) + + # Read parent pointer (always present) parent_bytes = inferior.read_memory(promise_addr + 8, ptr_size) parent = int.from_bytes(bytes(parent_bytes), 'little') - - # debug_location starts at offset 8+8+16=32 - loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr - file_ptr_bytes = inferior.read_memory(promise_addr + loc_offset, ptr_size) - file_ptr = int.from_bytes(bytes(file_ptr_bytes), 'little') - - func_ptr_bytes = inferior.read_memory(promise_addr + loc_offset + ptr_size, ptr_size) - func_ptr = int.from_bytes(bytes(func_ptr_bytes), 'little') - - line_bytes = inferior.read_memory(promise_addr + loc_offset + 2 * ptr_size, 4) - line = int.from_bytes(bytes(line_bytes), 'little') - - # state at loc_offset + 24 - state_offset = loc_offset + 2 * ptr_size + 4 - state_byte = inferior.read_memory(promise_addr + state_offset, 1) - state = int.from_bytes(bytes(state_byte), 'little') - - # worker_id at state_offset + 4 (after 3 bytes padding) - worker_id_bytes = inferior.read_memory(promise_addr + state_offset + 4, 4) - worker_id = int.from_bytes(bytes(worker_id_bytes), 'little') - - # debug_id at worker_id + 4 - debug_id_bytes = inferior.read_memory(promise_addr + state_offset + 8, 8) - debug_id = int.from_bytes(bytes(debug_id_bytes), 'little') - - # Read strings + + # Try to detect if debug metadata is present by checking if debug_state_ exists + # We'll try to read the debug fields and gracefully handle failures + + # First, try to read debug metadata (assume enabled by default) + has_debug_metadata = True + debug_location_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr + + try: + # Try reading debug_location.file pointer + file_ptr_bytes = inferior.read_memory(promise_addr + debug_location_offset, ptr_size) + file_ptr = int.from_bytes(bytes(file_ptr_bytes), 'little') + + func_ptr_bytes = inferior.read_memory(promise_addr + debug_location_offset + ptr_size, ptr_size) + func_ptr = int.from_bytes(bytes(func_ptr_bytes), 'little') + + line_bytes = inferior.read_memory(promise_addr + debug_location_offset + 2 * ptr_size, 4) + line = int.from_bytes(bytes(line_bytes), 'little') + + # state at debug_location_offset + 24 + state_offset = debug_location_offset + 2 * ptr_size + 4 + state_byte = inferior.read_memory(promise_addr + state_offset, 1) + state = int.from_bytes(bytes(state_byte), 'little') + + # worker_id at state_offset + 4 (after 3 bytes padding) + worker_id_bytes = inferior.read_memory(promise_addr + state_offset + 4, 4) + worker_id = int.from_bytes(bytes(worker_id_bytes), 'little') + + # debug_id at worker_id + 4 + debug_id_bytes = inferior.read_memory(promise_addr + state_offset + 8, 8) + debug_id = int.from_bytes(bytes(debug_id_bytes), 'little') + except Exception as e: + # Debug metadata not available - use defaults + has_debug_metadata = False + file_ptr = 0 + func_ptr = 0 + line = 0 + state = 1 # running + worker_id = 0xFFFFFFFF # Unknown + debug_id = 0 + + # Read strings (only if debug metadata enabled) file_str = None func_str = None - if file_ptr != 0: + if has_debug_metadata and file_ptr != 0: try: file_mem = inferior.read_memory(file_ptr, 256) file_str = bytes(file_mem).split(b'\x00')[0].decode('utf-8', errors='replace') except: pass - if func_ptr != 0: + if has_debug_metadata and func_ptr != 0: try: func_mem = inferior.read_memory(func_ptr, 256) func_str = bytes(func_mem).split(b'\x00')[0].decode('utf-8', errors='replace') except: pass - + return { "id": debug_id, "state": COROUTINE_STATES.get(state, "unknown"), @@ -175,7 +212,8 @@ def get_frame_from_handle(handle_addr): "function": func_str, "line": line, "address": handle_addr, - "promise_addr": promise_addr + "promise_addr": promise_addr, + "has_debug_metadata": has_debug_metadata } except Exception as e: return None @@ -185,22 +223,23 @@ def walk_virtual_stack(handle_addr): """Walk the virtual stack from a coroutine handle.""" stack = [] visited = set() - + info = get_frame_from_handle(handle_addr) if info: stack.append(info) visited.add(handle_addr) - + # Walk parent chain parent = info["parent"] while parent != 0 and parent not in visited: visited.add(parent) # Parent is a promise_base*, need to find the frame address # This is tricky - for now just note we have a parent - stack.append({"id": 0, "address": parent, "state": "parent", - "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) + stack.append({"id": 0, "address": parent, "state": "parent", + "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF, + "has_debug_metadata": False}) break - + return stack @@ -322,62 +361,67 @@ def invoke(self, arg, from_tty): class ElioListCommand(gdb.Command): """List all vthreads from worker queues.""" - + def __init__(self): super(ElioListCommand, self).__init__( "elio list", gdb.COMMAND_USER ) - + def invoke(self, arg, from_tty): scheduler = get_scheduler() if scheduler is None: print("Error: No active scheduler found") return - + print("-" * 80) print(f"{'ID':<8} {'State':<12} {'Worker':<8} {'Function':<30} {'Location'}") print("-" * 80) - + count = 0 + has_debug_metadata_warning = False for task_addr, worker_id in iterate_all_tasks(scheduler): info = get_frame_from_handle(task_addr) if info is None: continue - + count += 1 func = info["function"] or "" if len(func) > 28: func = func[:25] + "..." - + loc = "" - if info["file"]: + if info.get("has_debug_metadata") and info["file"]: loc = f"{info['file']}" if info["line"] > 0: loc += f":{info['line']}" - + elif info.get("has_debug_metadata") is False: + has_debug_metadata_warning = True + worker = str(worker_id) - + print(f"{info['id']:<8} {info['state']:<12} {worker:<8} {func:<30} {loc}") - + print(f"\nTotal queued coroutines: {count}") + if has_debug_metadata_warning: + print("(Note: Debug metadata is disabled - location info not available)") class ElioBtCommand(gdb.Command): """Show backtrace for vthread(s).""" - + def __init__(self): super(ElioBtCommand, self).__init__( "elio bt", gdb.COMMAND_USER ) - + def invoke(self, arg, from_tty): scheduler = get_scheduler() if scheduler is None: print("Error: No active scheduler found") return - + target_id = None if arg.strip(): try: @@ -385,31 +429,31 @@ def invoke(self, arg, from_tty): except ValueError: print(f"Error: Invalid vthread ID: {arg}") return - + found = False for task_addr, worker_id in iterate_all_tasks(scheduler): info = get_frame_from_handle(task_addr) if info is None: continue - + if target_id is not None and info["id"] != target_id: continue - + found = True print(f"vthread #{info['id']} [{info['state']}] (worker {worker_id})") - + stack = walk_virtual_stack(task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" loc = "" - if frame.get("file"): + if frame.get("has_debug_metadata") and frame.get("file"): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - + print(f" #{i:<3} 0x{frame['address']:016x} in {func}{loc}") print() - + if not found: if target_id is not None: print(f"Error: vthread #{target_id} not found in queues") @@ -467,64 +511,67 @@ def invoke(self, arg, from_tty): class ElioInfoCommand(gdb.Command): """Show detailed info for a specific vthread.""" - + def __init__(self): super(ElioInfoCommand, self).__init__( "elio info", gdb.COMMAND_USER ) - + def invoke(self, arg, from_tty): if not arg.strip(): print("Usage: elio info ") return - + try: target_id = int(arg.strip()) except ValueError: print(f"Error: Invalid vthread ID: {arg}") return - + scheduler = get_scheduler() if scheduler is None: print("Error: No active scheduler found") return - + for task_addr, worker_id in iterate_all_tasks(scheduler): info = get_frame_from_handle(task_addr) if info is None: continue - + if info["id"] != target_id: continue - + print(f"vthread #{info['id']}") print(f" State: {info['state']}") print(f" Worker: {worker_id}") print(f" Handle: 0x{info['address']:016x}") print(f" Promise: 0x{info['promise_addr']:016x}") - - if info["function"]: - print(f" Function: {info['function']}") - if info["file"]: - loc = info["file"] - if info["line"] > 0: - loc += f":{info['line']}" - print(f" Location: {loc}") - + + if info.get("has_debug_metadata"): + if info["function"]: + print(f" Function: {info['function']}") + if info["file"]: + loc = info["file"] + if info["line"] > 0: + loc += f":{info['line']}" + print(f" Location: {loc}") + else: + print(" (Debug metadata disabled - function/location not available)") + print(f"\n Virtual Call Stack:") stack = walk_virtual_stack(task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" loc = "" - if frame.get("file"): + if frame.get("has_debug_metadata") and frame.get("file"): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" print(f" #{i:<3} {func}{loc}") - + return - + print(f"Error: vthread #{target_id} not found in queues") diff --git a/tools/elio-lldb.py b/tools/elio-lldb.py index 249acd8..d059b75 100755 --- a/tools/elio-lldb.py +++ b/tools/elio-lldb.py @@ -109,57 +109,82 @@ def get_scheduler(target, process): def get_frame_from_handle(process, handle_addr): - """Extract promise_base info from a coroutine handle address.""" + """Extract promise_base info from a coroutine handle address. + + Note: Debug metadata fields (location, state, worker_id, id) are only + available when ELIO_ENABLE_DEBUG_METADATA=1. When disabled, these fields + will return default/None values. + """ if handle_addr == 0: return None - + try: ptr_size = process.GetAddressByteSize() - + # Coroutine frame layout (typical): # - resume function pointer - # - destroy function pointer + # - destroy function pointer # - promise object - + # Promise is typically at offset 2*ptr_size (after resume and destroy) promise_addr = handle_addr + 2 * ptr_size - + # Read magic to validate magic = read_uint64(process, promise_addr) if magic != FRAME_MAGIC: return None - - # Read promise_base fields - # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) - + + # Read promise_base fields (layout varies based on ELIO_ENABLE_DEBUG_METADATA) + # Base layout (always present): magic(8) + parent(8) + exception(16) + # Debug layout (when enabled): + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) + # Affinity: size_t (8 bytes on 64-bit) + + # Read parent pointer (always present) parent = read_pointer(process, promise_addr + 8) - - # debug_location starts at offset 8+8+16=32 - loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr - file_ptr = read_pointer(process, promise_addr + loc_offset) - func_ptr = read_pointer(process, promise_addr + loc_offset + ptr_size) - line = read_uint32(process, promise_addr + loc_offset + 2 * ptr_size) - - # state at loc_offset + 24 - state_offset = loc_offset + 2 * ptr_size + 4 - state = read_uint8(process, promise_addr + state_offset) - - # worker_id at state_offset + 4 (after 3 bytes padding) - worker_id = read_uint32(process, promise_addr + state_offset + 4) - - # debug_id at worker_id + 4 - debug_id = read_uint64(process, promise_addr + state_offset + 8) - + + # Try to detect if debug metadata is present + # We'll try to read the debug fields and gracefully handle failures + + # First, try to read debug metadata (assume enabled by default) + has_debug_metadata = True + debug_location_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr + + try: + # Try reading debug_location.file pointer + file_ptr = read_pointer(process, promise_addr + debug_location_offset) + func_ptr = read_pointer(process, promise_addr + debug_location_offset + ptr_size) + line = read_uint32(process, promise_addr + debug_location_offset + 2 * ptr_size) + + # state at debug_location_offset + 24 + state_offset = debug_location_offset + 2 * ptr_size + 4 + state = read_uint8(process, promise_addr + state_offset) + + # worker_id at state_offset + 4 (after 3 bytes padding) + worker_id = read_uint32(process, promise_addr + state_offset + 4) + + # debug_id at worker_id + 4 + debug_id = read_uint64(process, promise_addr + state_offset + 8) + except Exception as e: + # Debug metadata not available - use defaults + has_debug_metadata = False + file_ptr = 0 + func_ptr = 0 + line = 0 + state = 1 # running + worker_id = 0xFFFFFFFF # Unknown + debug_id = 0 + return { "id": debug_id, "state": COROUTINE_STATES.get(state, "unknown"), "worker_id": worker_id, "parent": parent, - "file": read_cstring(process, file_ptr), - "function": read_cstring(process, func_ptr), - "line": line, + "file": read_cstring(process, file_ptr) if has_debug_metadata else None, + "function": read_cstring(process, func_ptr) if has_debug_metadata else None, + "line": line if has_debug_metadata else 0, "address": handle_addr, - "promise_addr": promise_addr + "promise_addr": promise_addr, + "has_debug_metadata": has_debug_metadata } except Exception as e: return None @@ -169,22 +194,23 @@ def walk_virtual_stack(process, handle_addr): """Walk the virtual stack from a coroutine handle.""" stack = [] visited = set() - + info = get_frame_from_handle(process, handle_addr) if info: stack.append(info) visited.add(handle_addr) - + # Walk parent chain parent = info["parent"] while parent != 0 and parent not in visited: visited.add(parent) # Parent is a promise_base*, need to find the frame address # For now just note we have a parent - stack.append({"id": 0, "address": parent, "state": "parent", - "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) + stack.append({"id": 0, "address": parent, "state": "parent", + "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF, + "has_debug_metadata": False}) break - + return stack @@ -355,38 +381,43 @@ def elio_list(debugger, command, result, internal_dict): """List all Elio vthreads from worker queues.""" target = debugger.GetSelectedTarget() process = target.GetProcess() - + if not process.IsValid(): result.AppendMessage("Error: No process") return - + sched_addr = get_scheduler(target, process) if sched_addr is None: result.AppendMessage("Error: No active scheduler found") return - + result.AppendMessage("-" * 80) result.AppendMessage(f"{'ID':<8} {'State':<12} {'Worker':<8} {'Function':<30} {'Location'}") result.AppendMessage("-" * 80) - + count = 0 + has_debug_metadata_warning = False for task_addr, info in get_all_frames(target, process): count += 1 func = info["function"] or "" if len(func) > 28: func = func[:25] + "..." - + loc = "" - if info["file"]: + if info.get("has_debug_metadata") and info["file"]: loc = f"{info['file']}" if info["line"] > 0: loc += f":{info['line']}" - + elif info.get("has_debug_metadata") is False: + has_debug_metadata_warning = True + worker = str(info.get("queue_worker_id", info["worker_id"])) - + result.AppendMessage(f"{info['id']:<8} {info['state']:<12} {worker:<8} {func:<30} {loc}") - + result.AppendMessage(f"\nTotal queued coroutines: {count}") + if has_debug_metadata_warning: + result.AppendMessage("(Note: Debug metadata is disabled - location info not available)") result.AppendMessage("Note: Only queued (not currently executing) coroutines are shown.") @@ -394,16 +425,16 @@ def elio_bt(debugger, command, result, internal_dict): """Show backtrace for Elio vthread(s).""" target = debugger.GetSelectedTarget() process = target.GetProcess() - + if not process.IsValid(): result.AppendMessage("Error: No process") return - + sched_addr = get_scheduler(target, process) if sched_addr is None: result.AppendMessage("Error: No active scheduler found") return - + target_id = None if command.strip(): try: @@ -411,29 +442,29 @@ def elio_bt(debugger, command, result, internal_dict): except ValueError: result.AppendMessage(f"Error: Invalid vthread ID: {command}") return - + found = False for task_addr, info in get_all_frames(target, process): if target_id is not None and info["id"] != target_id: continue - + found = True worker_id = info.get("queue_worker_id", info["worker_id"]) result.AppendMessage(f"vthread #{info['id']} [{info['state']}] (worker {worker_id})") - + stack = walk_virtual_stack(process, task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" loc = "" - if frame.get("file"): + if frame.get("has_debug_metadata") and frame.get("file"): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - + result.AppendMessage(f" #{i:<3} 0x{frame['address']:016x} in {func}{loc}") - + result.AppendMessage("") - + if not found: if target_id is not None: result.AppendMessage(f"Error: vthread #{target_id} not found in queues")