From 27b0175739963c828dee9887116f394949029ec0 Mon Sep 17 00:00:00 2001 From: Coldwings Date: Thu, 19 Mar 2026 10:30:33 +0800 Subject: [PATCH 1/4] VThread stack now contains faster frame allocation --- .gitignore | 2 +- CMakeLists.txt | 1 + README.md | 66 ++++- examples/debug_test.cpp | 3 +- examples/rpc_server_example.cpp | 26 +- examples/tcp_echo_client.cpp | 21 +- include/elio/coro/frame.hpp | 32 +-- include/elio/coro/frame_allocator.hpp | 75 ++++- include/elio/coro/promise_base.hpp | 150 +++++++++- include/elio/coro/task.hpp | 142 ++++++++- include/elio/coro/vthread_owner.hpp | 214 ++++++++++++++ include/elio/debug.hpp | 2 +- include/elio/elio.hpp | 1 + include/elio/http/client_base.hpp | 75 ++++- include/elio/http/http2_client.hpp | 86 +++--- include/elio/http/http_client.hpp | 41 ++- include/elio/http/sse_client.hpp | 7 +- include/elio/http/websocket_client.hpp | 8 +- include/elio/net/resolve.hpp | 382 +++++++++++++++++++++++++ include/elio/net/stream.hpp | 13 +- include/elio/net/tcp.hpp | 73 +---- include/elio/rpc/rpc_client.hpp | 126 ++++++-- include/elio/runtime/scheduler.hpp | 39 ++- include/elio/tls/tls_stream.hpp | 40 ++- tests/unit/test_io.cpp | 302 ++++++++++++++++++- tools/elio-gdb.py | 92 ++++-- tools/elio-lldb.py | 79 +++-- tools/elio_lldb.py | 29 ++ wiki/API-Reference.md | 6 + wiki/Core-Concepts.md | 26 +- wiki/Debugging.md | 58 ++-- wiki/Getting-Started.md | 16 +- wiki/Home.md | 2 +- wiki/Networking.md | 68 ++++- wiki/RPC-Framework.md | 28 +- wiki/WebSocket-SSE.md | 35 ++- 36 files changed, 2051 insertions(+), 315 deletions(-) create mode 100644 include/elio/coro/vthread_owner.hpp create mode 100644 include/elio/net/resolve.hpp mode change 100755 => 100644 tools/elio-gdb.py mode change 100755 => 100644 tools/elio-lldb.py create mode 100644 tools/elio_lldb.py diff --git a/.gitignore b/.gitignore index 87c2357..22d75a2 100644 --- a/.gitignore +++ b/.gitignore @@ -35,5 +35,5 @@ benchmark # Temporary files *.tmp .DS_Store -.qoder/ +.clangd __pycache__ diff --git a/CMakeLists.txt b/CMakeLists.txt index d654e56..1160504 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -216,6 +216,7 @@ install(PROGRAMS ) install(FILES tools/elio-gdb.py + tools/elio_lldb.py tools/elio-lldb.py DESTINATION share/elio ) diff --git a/README.md b/README.md index d10aefc..e2bfd81 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Elio Coroutine Library -**Elio** is a modern, production-ready C++20 coroutine library for high-performance asynchronous programming on Linux. It provides stackless coroutines with virtual stack tracking, a multi-threaded work-stealing scheduler, and a foundation for efficient I/O operations. +**Elio** is a modern, production-ready C++20 coroutine library for high-performance asynchronous programming on Linux. It provides stackless coroutines with extended vthread tracking, a multi-threaded work-stealing scheduler, and a foundation for efficient I/O operations. [![CI](https://github.com/Coldwings/Elio/actions/workflows/ci.yml/badge.svg)](https://github.com/Coldwings/Elio/actions/workflows/ci.yml) [![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://en.cppreference.com/w/cpp/20) @@ -10,7 +10,7 @@ - **C++20 Stackless Coroutines** with `task` type - **Ergonomic Task Spawning**: `go()` for fire-and-forget, `spawn()` for joinable tasks -- **Virtual Stack Tracking** for natural exception propagation +- **Extended VThread Tracking** covering construction lineage, first activation, and owner-domain semantics - **Work-Stealing Scheduler** with lock-free Chase-Lev deques - **Dynamic Thread Pool** with runtime adjustment - **Autoscaler** for automatic worker thread scaling under load @@ -142,9 +142,9 @@ elio:: ├── coro:: // Coroutine primitives │ ├── task // Primary coroutine type │ ├── join_handle // Handle for awaiting spawned tasks -│ ├── promise_base // Virtual stack base class +│ ├── promise_base // Vthread metadata base class │ ├── awaitable_base // CRTP awaitable base -│ └── frame utilities // Virtual stack inspection +│ └── frame utilities // Frame and owner inspection │ ├── runtime:: // Scheduler and execution │ ├── scheduler // Work-stealing scheduler @@ -188,15 +188,21 @@ elio:: tools/ // Debugging tools ├── elio-pstack // pstack-like CLI tool ├── elio-gdb.py // GDB Python extension -└── elio-lldb.py // LLDB Python extension +├── elio_lldb.py // LLDB import entrypoint +└── elio-lldb.py // LLDB implementation script ``` -### Virtual Stack +### Virtual Stack And VThreads -Elio implements a **virtual stack** by linking coroutine frames together. This enables: +Elio tracks coroutine relationships at three levels: +- construction lineage via `parent_` +- first-activation lineage via `activation_parent_` +- memory and execution ownership via `vthread_owner_` + +This enables: - Natural exception propagation through `co_await` chains -- Call chain inspection for debugging -- Automatic cleanup on coroutine completion +- Call chain inspection for debugging and postmortem analysis +- Explicit semantics for `spawn()`, `go()`, and worker migration ```cpp outer() -> middle() -> inner() @@ -255,7 +261,7 @@ make ## Debugging -Elio provides debugging tools to inspect coroutine states and virtual call stacks: +Elio provides debugging tools to inspect queued coroutine states, virtual call stacks, and vthread ownership metadata: ```bash # pstack-like tool for coroutines @@ -267,13 +273,17 @@ gdb -ex 'source tools/elio-gdb.py' ./myapp (gdb) elio list # List all vthreads (gdb) elio bt # Show all backtraces (gdb) elio bt 42 # Show backtrace for vthread #42 +(gdb) elio info 42 # Show owner/root/parent metadata -# LLDB extension -lldb -o 'command script import tools/elio-lldb.py' ./myapp +# LLDB extension (use entrypoint wrapper) +lldb -o 'command script import tools/elio_lldb.py' ./myapp (lldb) elio list (lldb) elio bt +(lldb) elio info 42 ``` +The debugger tools currently enumerate queued coroutines from worker queues. They prefer the activation-parent chain when it exists, and fall back to the construction-parent chain otherwise. + See the [Debugging wiki page](wiki/Debugging.md) for detailed documentation. ## API Reference @@ -349,6 +359,38 @@ if (handle.is_ready()) { } ``` +### Hostname Resolution Configuration + +Hostname resolution is explicit and configurable via `net::resolve_options`. +`tcp_connect` accepts concrete addresses only, so host+port callers should resolve first. + +```cpp +#include + +// Common options with cache enabled +auto opts = elio::net::default_cached_resolve_options(); +opts.positive_ttl = std::chrono::seconds(30); +opts.negative_ttl = std::chrono::seconds(2); + +// Resolve + connect (single best address) +auto addr = co_await elio::net::resolve_hostname("api.example.com", 443, opts); +if (addr) { + auto tcp = co_await elio::net::tcp_connect(*addr); +} + +// HTTP-family clients can set this in config directly +elio::http::client_config http_cfg; +http_cfg.resolve_options = opts; +http_cfg.rotate_resolved_addresses = true; + +elio::http::h2_client_config h2_cfg; +h2_cfg.resolve_options = opts; +h2_cfg.rotate_resolved_addresses = true; + +// RPC client explicit resolve configuration +auto rpc = co_await elio::rpc::tcp_rpc_client::connect("rpc.example.com", 9000, opts); +``` + ### Exception Handling ```cpp diff --git a/examples/debug_test.cpp b/examples/debug_test.cpp index 1168586..8b9fdde 100644 --- a/examples/debug_test.cpp +++ b/examples/debug_test.cpp @@ -4,7 +4,7 @@ /// coroutines that run concurrently and can be inspected using: /// - elio-pstack (command line) /// - GDB with elio-gdb.py -/// - LLDB with elio-lldb.py +/// - LLDB with elio_lldb.py (entrypoint wrapper) /// /// Usage: /// ./debug_test # Run normally @@ -135,6 +135,7 @@ coro::task async_main(int argc, char* argv[]) { std::cout << "Paused for debugger. Use one of:" << std::endl; std::cout << " elio-pstack " << getpid() << std::endl; std::cout << " gdb -p " << getpid() << " -ex 'source tools/elio-gdb.py' -ex 'elio bt'" << std::endl; + std::cout << " lldb -p " << getpid() << " -o 'command script import tools/elio_lldb.py' -o 'elio bt'" << std::endl; std::cout << std::endl; std::cout << "Press Ctrl+C to exit." << std::endl; std::cout << std::endl; diff --git a/examples/rpc_server_example.cpp b/examples/rpc_server_example.cpp index dddedef..ca33657 100644 --- a/examples/rpc_server_example.cpp +++ b/examples/rpc_server_example.cpp @@ -118,6 +118,11 @@ using Echo = ELIO_RPC_METHOD(5, EchoRequest, EchoResponse); // In-memory user store class UserStore { public: + struct UserListSnapshot { + std::vector users; + int32_t total_count; + }; + std::optional get_user(int32_t id) { std::lock_guard lock(mutex_); auto it = users_.find(id); @@ -135,22 +140,18 @@ class UserStore { return id; } - std::vector list_users(int32_t offset, int32_t limit) { + UserListSnapshot list_users_snapshot(int32_t offset, int32_t limit) { std::lock_guard lock(mutex_); - std::vector result; + UserListSnapshot snapshot; + snapshot.total_count = static_cast(users_.size()); int32_t count = 0; for (const auto& [id, user] : users_) { - if (count >= offset && result.size() < static_cast(limit)) { - result.push_back(user); + if (count >= offset && snapshot.users.size() < static_cast(limit)) { + snapshot.users.push_back(user); } ++count; } - return result; - } - - int32_t total_count() { - std::lock_guard lock(mutex_); - return static_cast(users_.size()); + return snapshot; } private: @@ -235,8 +236,9 @@ task server_main(uint16_t port, [[maybe_unused]] scheduler& sched) { server.register_method([](const ListUsersRequest& req) -> task { ListUsersResponse resp; - resp.users = g_user_store.list_users(req.offset, req.limit); - resp.total_count = g_user_store.total_count(); + auto snapshot = g_user_store.list_users_snapshot(req.offset, req.limit); + resp.users = std::move(snapshot.users); + resp.total_count = snapshot.total_count; co_return resp; }); diff --git a/examples/tcp_echo_client.cpp b/examples/tcp_echo_client.cpp index 7cd3471..f6c6c22 100644 --- a/examples/tcp_echo_client.cpp +++ b/examples/tcp_echo_client.cpp @@ -29,9 +29,14 @@ using namespace elio::net; /// Client coroutine - connects, sends messages, receives responses task client_main(std::string_view host, uint16_t port) { ELIO_LOG_INFO("Connecting to {}:{}...", host, port); - - // Connect to server - auto stream_result = co_await tcp_connect(host, port); + + auto resolved = co_await resolve_hostname(host, port); + if (!resolved) { + ELIO_LOG_ERROR("Resolve failed: {}", strerror(errno)); + co_return 1; + } + + auto stream_result = co_await tcp_connect(*resolved); if (!stream_result) { ELIO_LOG_ERROR("Connection failed: {}", strerror(errno)); @@ -79,8 +84,14 @@ task client_main(std::string_view host, uint16_t port) { /// Non-interactive benchmark mode task benchmark_main(std::string_view host, uint16_t port, int iterations) { ELIO_LOG_INFO("Connecting to {}:{} for benchmark...", host, port); - - auto stream_result = co_await tcp_connect(host, port); + + auto resolved = co_await resolve_hostname(host, port); + if (!resolved) { + ELIO_LOG_ERROR("Resolve failed: {}", strerror(errno)); + co_return 1; + } + + auto stream_result = co_await tcp_connect(*resolved); if (!stream_result) { ELIO_LOG_ERROR("Connection failed: {}", strerror(errno)); co_return 1; diff --git a/include/elio/coro/frame.hpp b/include/elio/coro/frame.hpp index c22b3f2..beeab58 100644 --- a/include/elio/coro/frame.hpp +++ b/include/elio/coro/frame.hpp @@ -1,9 +1,11 @@ #pragma once #include "promise_base.hpp" +#include "frame_allocator.hpp" +#include "vthread_owner.hpp" +#include #include #include -#include #include namespace elio::coro { @@ -49,21 +51,19 @@ inline void log_virtual_stack() { /// but is portable across GCC and Clang. The frame layout is: /// [resume_fn_ptr][destroy_fn_ptr][promise...] inline promise_base* get_promise_base(void* handle_addr) noexcept { - if (!handle_addr) return nullptr; - - // The coroutine frame layout has the promise after two function pointers - // (resume and destroy). This is consistent across GCC and Clang. - constexpr size_t promise_offset = 2 * sizeof(void*); - - auto* candidate = reinterpret_cast( - static_cast(handle_addr) + promise_offset); - - // Validate using the magic number - if (candidate->frame_magic() == promise_base::FRAME_MAGIC) { - return candidate; - } - - return nullptr; + return promise_base::from_handle_address(handle_addr); +} + +inline void ensure_vthread_owner(std::coroutine_handle<> handle) { + auto* promise = get_promise_base(handle.address()); + if (!promise) return; + if (promise->vthread_owner()) return; + + auto* owner = new vthread_owner(); + promise->bind_vthread_owner_once(owner); + promise->set_vthread_root(true); + frame_allocator::set_owner_metadata(handle.address(), owner, true); + promise_base::record_root_owner_creation(); } /// Check if a coroutine has affinity for a specific worker diff --git a/include/elio/coro/frame_allocator.hpp b/include/elio/coro/frame_allocator.hpp index 88177d1..7e3bcbc 100644 --- a/include/elio/coro/frame_allocator.hpp +++ b/include/elio/coro/frame_allocator.hpp @@ -32,11 +32,18 @@ namespace elio::coro { /// Note: Under sanitizers, pooling is disabled to allow proper leak/error detection. class frame_allocator { public: + struct owner_metadata { + void* owner = nullptr; + bool is_root = false; + bool found = false; + }; + // Support frames up to 256 bytes (covers most simple tasks) // Actual allocation includes header, so user-visible size is MAX_FRAME_SIZE static constexpr size_t MAX_FRAME_SIZE = 256; static constexpr size_t POOL_SIZE = 1024; static constexpr size_t REMOTE_QUEUE_BATCH = 64; // Process remote returns in batches + static constexpr uint32_t INVALID_POOL_ID = UINT32_MAX; // Detect sanitizers: GCC uses __SANITIZE_*, Clang uses __has_feature #if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) @@ -48,13 +55,20 @@ class frame_allocator { #endif #ifdef ELIO_SANITIZER_ACTIVE - // Under sanitizers, bypass pooling entirely for accurate leak detection + // Under sanitizers, bypass pooling entirely, but still keep the hidden + // header so delete-path metadata inspection remains valid. static void* allocate(size_t size) { - return ::operator new(size); + void* block = ::operator new(HEADER_SIZE + size); + auto* header = static_cast(block); + header->source_pool_id = INVALID_POOL_ID; + header->next.store(nullptr, std::memory_order_relaxed); + header->owner = nullptr; + header->is_root = false; + return block_to_user(block); } static void deallocate(void* ptr, [[maybe_unused]] size_t size) noexcept { - ::operator delete(ptr); + delete_block(user_to_block(ptr)); } #else static void* allocate(size_t size) { @@ -70,6 +84,8 @@ class frame_allocator { // This is important because blocks may have been returned from remote threads auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; + header->owner = nullptr; + header->is_root = false; return block_to_user(block); } @@ -78,10 +94,19 @@ class frame_allocator { auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; header->next.store(nullptr, std::memory_order_relaxed); + header->owner = nullptr; + header->is_root = false; return block_to_user(block); } - // Fall back to standard allocation for large frames (no header) - return ::operator new(size); + // Large frames still carry a small header so owner metadata can be + // attached later without touching promise memory in operator delete. + void* block = ::operator new(HEADER_SIZE + size); + auto* header = static_cast(block); + header->source_pool_id = INVALID_POOL_ID; + header->next.store(nullptr, std::memory_order_relaxed); + header->owner = nullptr; + header->is_root = false; + return block_to_user(block); } static void deallocate(void* ptr, size_t size) noexcept { @@ -97,7 +122,7 @@ class frame_allocator { return; } // Pool full, delete the block (not the user pointer!) - ::operator delete(block); + delete_block(block); return; } else { // Cross-thread deallocation: push to source pool's remote queue @@ -107,20 +132,50 @@ class frame_allocator { return; } // Source pool no longer exists (thread exited), delete the block - ::operator delete(block); + delete_block(block); return; } } - // Large allocation - was allocated without header - ::operator delete(ptr); + // Large allocation - free the underlying block carrying the header + delete_block(user_to_block(ptr)); } #endif + static void set_owner_metadata(void* ptr, void* owner, bool is_root) noexcept { + if (!ptr) return; + auto* header = static_cast(user_to_block(ptr)); + header->owner = owner; + header->is_root = is_root; + } + + [[nodiscard]] static owner_metadata inspect_owner_metadata(void* ptr) noexcept { + if (!ptr) return {}; + auto* header = static_cast(user_to_block(ptr)); + return owner_metadata{ + .owner = header->owner, + .is_root = header->is_root, + .found = header->owner != nullptr, + }; + } + private: + static void delete_block(void* block) noexcept { +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmismatched-new-delete" +#endif + ::operator delete(block); +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + } + // Block header stored before user data struct block_header { uint32_t source_pool_id; // ID of the pool that allocated this block std::atomic next; // For MPSC queue linkage + void* owner; // Owning vthread domain, if attached later + bool is_root; // Root frame responsible for owner lifetime }; // Total block size including header, aligned for user data @@ -139,7 +194,7 @@ class frame_allocator { frame_allocator() : free_count_(0) , pool_id_(next_pool_id_.fetch_add(1, std::memory_order_relaxed)) - , remote_head_{0, {nullptr}} // Initialize dummy head: pool_id=0, next=nullptr + , remote_head_{0, {nullptr}, nullptr, false} // Dummy head for remote queue , remote_tail_(&remote_head_) { // Register this pool for cross-thread access register_pool(this); diff --git a/include/elio/coro/promise_base.hpp b/include/elio/coro/promise_base.hpp index 669744e..bdb51ea 100644 --- a/include/elio/coro/promise_base.hpp +++ b/include/elio/coro/promise_base.hpp @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #include #include #include @@ -88,10 +90,15 @@ class promise_base { promise_base() noexcept : frame_magic_(FRAME_MAGIC) , parent_(current_frame_) + , activation_parent_(nullptr) + , vthread_owner_(current_owner_) , debug_state_(coroutine_state::created) , debug_worker_id_(static_cast(-1)) , debug_id_(0) // Lazy allocation - only allocated when id() is called , affinity_(NO_AFFINITY) + , frame_size_(consume_next_frame_size()) + , started_(false) + , vthread_root_(false) { current_frame_ = this; } @@ -118,10 +125,128 @@ class promise_base { return parent_; } + // Construction-time parent relationship (legacy parent semantics) + [[nodiscard]] promise_base* construction_parent() const noexcept { + return parent_; + } + + // First-activation parent relationship (runtime await-chain semantics) + [[nodiscard]] promise_base* activation_parent() const noexcept { + return activation_parent_; + } + + void set_activation_parent(promise_base* parent) noexcept { + activation_parent_ = parent; + } + + bool bind_activation_parent_once(promise_base* parent) noexcept { + if (activation_parent_ == nullptr) { + activation_parent_ = parent; + activation_bindings_.fetch_add(1, std::memory_order_relaxed); + return true; + } + assert(activation_parent_ == parent && "activation_parent rebound inconsistently"); + return false; + } + + [[nodiscard]] void* vthread_owner() const noexcept { + return vthread_owner_; + } + + void set_vthread_owner(void* owner) noexcept { + vthread_owner_ = owner; + } + + bool bind_vthread_owner_once(void* owner) noexcept { + if (vthread_owner_ == nullptr) { + vthread_owner_ = owner; + owner_bindings_.fetch_add(1, std::memory_order_relaxed); + return true; + } + assert(vthread_owner_ == owner && "vthread_owner rebound inconsistently"); + return false; + } + + [[nodiscard]] size_t frame_size() const noexcept { + return frame_size_; + } + + [[nodiscard]] bool started() const noexcept { + return started_; + } + + void mark_started() noexcept { + started_ = true; + } + + [[nodiscard]] bool is_vthread_root() const noexcept { + return vthread_root_; + } + + void set_vthread_root(bool value) noexcept { + vthread_root_ = value; + } + [[nodiscard]] static promise_base* current_frame() noexcept { return current_frame_; } + [[nodiscard]] static void* current_owner() noexcept { + return current_owner_; + } + + static void set_current_owner(void* owner) noexcept { + current_owner_ = owner; + } + + static void set_next_frame_size(size_t size) noexcept { + next_frame_size_ = size; + } + + static void record_root_owner_creation() noexcept { + root_owner_creations_.fetch_add(1, std::memory_order_relaxed); + } + + static void record_owner_context_restore() noexcept { + owner_context_restores_.fetch_add(1, std::memory_order_relaxed); + } + + static void record_ownerless_resume() noexcept { + ownerless_resumes_.fetch_add(1, std::memory_order_relaxed); + } + + [[nodiscard]] static uint64_t owner_bindings() noexcept { + return owner_bindings_.load(std::memory_order_relaxed); + } + + [[nodiscard]] static uint64_t activation_bindings() noexcept { + return activation_bindings_.load(std::memory_order_relaxed); + } + + [[nodiscard]] static uint64_t root_owner_creations() noexcept { + return root_owner_creations_.load(std::memory_order_relaxed); + } + + [[nodiscard]] static uint64_t owner_context_restores() noexcept { + return owner_context_restores_.load(std::memory_order_relaxed); + } + + [[nodiscard]] static uint64_t ownerless_resumes() noexcept { + return ownerless_resumes_.load(std::memory_order_relaxed); + } + + [[nodiscard]] static promise_base* from_handle_address(void* handle_addr) noexcept { + if (!handle_addr) return nullptr; + + // GCC/Clang coroutine frame layout: + // [resume_fn_ptr][destroy_fn_ptr][promise...] + constexpr size_t promise_offset = 2 * sizeof(void*); + auto* candidate = reinterpret_cast( + static_cast(handle_addr) + promise_offset); + + return candidate->frame_magic() == FRAME_MAGIC ? candidate : nullptr; + } + // Debug accessors [[nodiscard]] uint64_t frame_magic() const noexcept { return frame_magic_; } [[nodiscard]] const debug_location& location() const noexcept { return debug_location_; } @@ -166,11 +291,21 @@ class promise_base { void clear_affinity() noexcept { affinity_ = NO_AFFINITY; } private: + static size_t consume_next_frame_size() noexcept { + size_t size = next_frame_size_; + next_frame_size_ = 0; + return size; + } + // Magic number at start for debugger validation uint64_t frame_magic_; - // Virtual stack tracking + // Construction-time stack tracking promise_base* parent_; + // Runtime activation relationship + promise_base* activation_parent_; + // Runtime vthread ownership context + void* vthread_owner_; std::exception_ptr exception_; // Debug metadata @@ -181,8 +316,21 @@ class promise_base { // Thread affinity: NO_AFFINITY means can migrate freely size_t affinity_; + + // Frame metadata + size_t frame_size_; + bool started_; + bool vthread_root_; static inline thread_local promise_base* current_frame_ = nullptr; + static inline thread_local void* current_owner_ = nullptr; + static inline thread_local size_t next_frame_size_ = 0; + + static inline std::atomic owner_bindings_{0}; + static inline std::atomic activation_bindings_{0}; + static inline std::atomic root_owner_creations_{0}; + static inline std::atomic owner_context_restores_{0}; + static inline std::atomic ownerless_resumes_{0}; }; } // namespace elio::coro diff --git a/include/elio/coro/task.hpp b/include/elio/coro/task.hpp index 212f593..95dac1c 100644 --- a/include/elio/coro/task.hpp +++ b/include/elio/coro/task.hpp @@ -2,7 +2,9 @@ #include "promise_base.hpp" #include "frame_allocator.hpp" +#include "vthread_owner.hpp" #include +#include #include #include #include @@ -18,6 +20,66 @@ void schedule_handle(std::coroutine_handle<> handle) noexcept; namespace elio::coro { +inline void* relocate_spawn_go_root_frame(void* source, size_t frame_size) { + if (!source || frame_size == 0) return source; + + auto* owner = new vthread_owner(); + void* destination = owner->allocate(frame_size); + if (!destination) { + delete owner; + return source; + } + + std::memcpy(destination, source, frame_size); + + // GCC stores the get_return_object() return value (task{h}) inside the + // coroutine frame. That stored task contains a coroutine_handle whose + // _M_fr_ptr == source (the old frame address). After memcpy this stale + // self-reference remains, causing final_awaiter::await_suspend to receive + // the OLD frame address via h. Fix up every pointer-sized slot that still + // holds the old address. + auto* dst_bytes = static_cast(destination); + const auto old_val = reinterpret_cast(source); + const auto new_val = reinterpret_cast(destination); + for (size_t i = 0; i + sizeof(void*) <= frame_size; i += sizeof(void*)) { + uintptr_t slot; + std::memcpy(&slot, dst_bytes + i, sizeof(slot)); + if (slot == old_val) { + std::memcpy(dst_bytes + i, &new_val, sizeof(new_val)); + } + } + + auto* destination_promise = promise_base::from_handle_address(destination); + if (!destination_promise) { + delete owner; + return source; + } + + destination_promise->set_vthread_owner(owner); + destination_promise->set_activation_parent(nullptr); + destination_promise->set_vthread_root(true); + vthread_owner::mark_root_allocation(destination, true); + return destination; +} + +/// Free the backing allocation of a cold (pre-resume) coroutine frame +/// WITHOUT invoking C++ destructors. +/// +/// Used after memcpy relocation: the "live" state has been copied to a new address; +/// 'ptr' is the abandoned source. Running destructors here would double-destroy +/// objects already owned by the relocated frame (e.g. captured task handles, +/// shared_ptr ref counts would be incorrectly decremented). +inline void free_cold_frame_backing(void* ptr, size_t frame_size) noexcept { + if (!ptr) return; + // vthread_owner uses bump allocation; individual frees are no-ops. + // Memory is reclaimed when the owning domain is destroyed. + if (vthread_owner::inspect_allocation(ptr).found) { + return; + } + // frame_allocator: return the block to the pool without running any destructor. + frame_allocator::deallocate(ptr, frame_size); +} + template class task; @@ -269,10 +331,28 @@ class task { // Custom allocator for coroutine frames void* operator new(size_t size) { + promise_base::set_next_frame_size(size); + if (auto* owner = static_cast<::elio::coro::vthread_owner*>(promise_base::current_owner())) { + if (void* ptr = owner->allocate(size)) { + return ptr; + } + } return frame_allocator::allocate(size); } void operator delete(void* ptr, size_t size) noexcept { + auto owner_alloc = ::elio::coro::vthread_owner::inspect_allocation(ptr); + if (owner_alloc.found) { + if (owner_alloc.is_root) { + delete static_cast<::elio::coro::vthread_owner*>(owner_alloc.owner); + } + return; + } + + auto frame_owner = frame_allocator::inspect_owner_metadata(ptr); + if (frame_owner.found && frame_owner.is_root) { + delete static_cast<::elio::coro::vthread_owner*>(frame_owner.owner); + } frame_allocator::deallocate(ptr, size); } }; @@ -304,6 +384,18 @@ class task { /// Spawn this task on the current scheduler (fire-and-forget) /// The task will run asynchronously and self-destruct when complete void go() { + if (handle_) { + const size_t frame_size = handle_.promise().frame_size(); + void* old_ptr = handle_.address(); + void* relocated = relocate_spawn_go_root_frame(old_ptr, frame_size); + if (relocated != old_ptr) { + handle_ = handle_type::from_address(relocated); + // Do NOT call old.destroy(): that would run destructors and + // double-destroy captured objects now owned by the relocated frame. + // Instead, free only the backing allocation. + free_cold_frame_backing(old_ptr, frame_size); + } + } runtime::schedule_handle(release()); } @@ -314,7 +406,15 @@ class task { [[nodiscard]] bool await_ready() const noexcept { return false; } [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { - handle_.promise().continuation_ = awaiter; + auto& promise = handle_.promise(); + auto* activation_parent = promise_base::from_handle_address(awaiter.address()); + promise.bind_activation_parent_once(activation_parent); + + void* owner = promise_base::current_owner(); + if (owner) { + promise.bind_vthread_owner_once(owner); + } + promise.continuation_ = awaiter; return handle_; } @@ -351,10 +451,28 @@ class task { // Custom allocator for coroutine frames void* operator new(size_t size) { + promise_base::set_next_frame_size(size); + if (auto* owner = static_cast<::elio::coro::vthread_owner*>(promise_base::current_owner())) { + if (void* ptr = owner->allocate(size)) { + return ptr; + } + } return frame_allocator::allocate(size); } void operator delete(void* ptr, size_t size) noexcept { + auto owner_alloc = ::elio::coro::vthread_owner::inspect_allocation(ptr); + if (owner_alloc.found) { + if (owner_alloc.is_root) { + delete static_cast<::elio::coro::vthread_owner*>(owner_alloc.owner); + } + return; + } + + auto frame_owner = frame_allocator::inspect_owner_metadata(ptr); + if (frame_owner.found && frame_owner.is_root) { + delete static_cast<::elio::coro::vthread_owner*>(frame_owner.owner); + } frame_allocator::deallocate(ptr, size); } }; @@ -386,6 +504,18 @@ class task { /// Spawn this task on the current scheduler (fire-and-forget) /// The task will run asynchronously and self-destruct when complete void go() { + if (handle_) { + const size_t frame_size = handle_.promise().frame_size(); + void* old_ptr = handle_.address(); + void* relocated = relocate_spawn_go_root_frame(old_ptr, frame_size); + if (relocated != old_ptr) { + handle_ = handle_type::from_address(relocated); + // Do NOT call old.destroy(): that would run destructors and + // double-destroy captured objects now owned by the relocated frame. + // Instead, free only the backing allocation. + free_cold_frame_backing(old_ptr, frame_size); + } + } runtime::schedule_handle(release()); } @@ -396,7 +526,15 @@ class task { [[nodiscard]] bool await_ready() const noexcept { return false; } [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { - handle_.promise().continuation_ = awaiter; + auto& promise = handle_.promise(); + auto* activation_parent = promise_base::from_handle_address(awaiter.address()); + promise.bind_activation_parent_once(activation_parent); + + void* owner = promise_base::current_owner(); + if (owner) { + promise.bind_vthread_owner_once(owner); + } + promise.continuation_ = awaiter; return handle_; } diff --git a/include/elio/coro/vthread_owner.hpp b/include/elio/coro/vthread_owner.hpp new file mode 100644 index 0000000..337edd2 --- /dev/null +++ b/include/elio/coro/vthread_owner.hpp @@ -0,0 +1,214 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace elio::coro { + +/// Segmented owner storage for coroutine frames within one vthread domain. +/// +/// This allocator prioritizes pointer stability and simple O(1) bump allocation +/// in the active segment. Individual frame deallocation is intentionally a no-op; +/// memory is reclaimed when the owner is destroyed. +class vthread_owner { +public: + static constexpr size_t INITIAL_SEGMENT_SIZE = 4096; + static constexpr uint64_t ALLOCATION_MAGIC = 0x564F574E4552464DULL; + + struct allocation_info { + vthread_owner* owner = nullptr; + bool is_root = false; + bool found = false; + }; + + vthread_owner() { + add_segment(INITIAL_SEGMENT_SIZE); + } + + ~vthread_owner() { + segment* seg = head_; + while (seg) { + segment* next = seg->next; + unregister_segment(seg->data, seg->capacity); + ::operator delete(seg->data); + delete seg; + seg = next; + } + } + + vthread_owner(const vthread_owner&) = delete; + vthread_owner& operator=(const vthread_owner&) = delete; + vthread_owner(vthread_owner&&) = delete; + vthread_owner& operator=(vthread_owner&&) = delete; + + [[nodiscard]] void* allocate(size_t size, + size_t alignment = alignof(std::max_align_t), + bool is_root = false) { + if (size == 0) return nullptr; + if (alignment == 0) alignment = alignof(std::max_align_t); + if (!current_) return nullptr; + + void* ptr = try_allocate_in_segment(current_, size, alignment, this, is_root); + if (ptr) return ptr; + + const size_t required = size + header_size(alignment) + alignment; + const size_t next_size = std::max(required, current_->capacity * 2); + add_segment(next_size); + return try_allocate_in_segment(current_, size, alignment, this, is_root); + } + + static void mark_root_allocation(void* ptr, bool is_root) noexcept { + auto* header = header_from_user(ptr); + if (!header || header->magic != ALLOCATION_MAGIC) return; + header->is_root = is_root; + } + + [[nodiscard]] static allocation_info inspect_allocation(const void* ptr) noexcept { + if (!is_in_registered_segment(ptr)) { + return {}; + } + + auto* header = header_from_user(ptr); + if (!header || header->magic != ALLOCATION_MAGIC) { + return {}; + } + + return allocation_info{ + .owner = header->owner, + .is_root = header->is_root, + .found = true, + }; + } + + [[nodiscard]] bool owns_address(const void* ptr) const noexcept { + auto addr = reinterpret_cast(ptr); + segment* seg = head_; + while (seg) { + auto begin = reinterpret_cast(seg->data); + auto end = begin + seg->capacity; + if (addr >= begin && addr < end) { + return true; + } + seg = seg->next; + } + return false; + } + +private: + struct segment_range { + uintptr_t begin; + uintptr_t end; + }; + + struct allocation_header { + uint64_t magic; + vthread_owner* owner; + bool is_root; + }; + + struct segment { + char* data; + size_t capacity; + size_t used; + segment* next; + }; + + static size_t align_up(size_t value, size_t alignment) noexcept { + const size_t mask = alignment - 1; + return (value + mask) & ~mask; + } + + static size_t header_size(size_t alignment) noexcept { + return align_up(sizeof(allocation_header), alignment); + } + + static allocation_header* header_from_user(const void* ptr) noexcept { + if (!ptr) return nullptr; + auto* bytes = static_cast(ptr); + auto* header = reinterpret_cast(bytes - sizeof(allocation_header)); + return const_cast(header); + } + + static bool is_in_registered_segment(const void* ptr) { + if (!ptr) return false; + const auto addr = reinterpret_cast(ptr); + std::lock_guard lock(segment_registry_mutex_); + for (const auto& range : segment_registry_) { + if (addr >= range.begin && addr < range.end) { + return true; + } + } + return false; + } + + static void register_segment(const char* data, size_t capacity) { + std::lock_guard lock(segment_registry_mutex_); + segment_registry_.push_back(segment_range{ + .begin = reinterpret_cast(data), + .end = reinterpret_cast(data) + capacity, + }); + } + + static void unregister_segment(const char* data, size_t capacity) { + const auto begin = reinterpret_cast(data); + const auto end = begin + capacity; + std::lock_guard lock(segment_registry_mutex_); + auto it = std::remove_if(segment_registry_.begin(), segment_registry_.end(), + [&](const segment_range& range) { + return range.begin == begin && range.end == end; + }); + segment_registry_.erase(it, segment_registry_.end()); + } + + static void* try_allocate_in_segment(segment* seg, + size_t size, + size_t alignment, + vthread_owner* owner, + bool is_root) noexcept { + const size_t header = header_size(alignment); + const size_t offset = align_up(seg->used + header, alignment); + if (offset + size > seg->capacity) { + return nullptr; + } + + auto* allocation = reinterpret_cast(seg->data + offset - sizeof(allocation_header)); + allocation->magic = ALLOCATION_MAGIC; + allocation->owner = owner; + allocation->is_root = is_root; + + void* ptr = seg->data + offset; + seg->used = offset + size; + return ptr; + } + + void add_segment(size_t capacity) { + auto* seg = new segment{ + .data = static_cast(::operator new(capacity)), + .capacity = capacity, + .used = 0, + .next = nullptr, + }; + register_segment(seg->data, seg->capacity); + + if (!head_) { + head_ = seg; + current_ = seg; + return; + } + + current_->next = seg; + current_ = seg; + } + + segment* head_ = nullptr; + segment* current_ = nullptr; + + static inline std::mutex segment_registry_mutex_{}; + static inline std::vector segment_registry_{}; +}; + +} // namespace elio::coro diff --git a/include/elio/debug.hpp b/include/elio/debug.hpp index a387dd4..0348956 100644 --- a/include/elio/debug.hpp +++ b/include/elio/debug.hpp @@ -17,7 +17,7 @@ /// elio list # List all vthreads /// elio workers # Show worker information /// -/// In LLDB: command script import /path/to/elio-lldb.py +/// In LLDB: command script import /path/to/elio_lldb.py /// elio bt # Show all vthread backtraces /// /// Command line: diff --git a/include/elio/elio.hpp b/include/elio/elio.hpp index 91938b1..86157a7 100644 --- a/include/elio/elio.hpp +++ b/include/elio/elio.hpp @@ -40,6 +40,7 @@ // Networking #include "net/tcp.hpp" +#include "net/resolve.hpp" #include "net/uds.hpp" // Timers diff --git a/include/elio/http/client_base.hpp b/include/elio/http/client_base.hpp index c422c2f..1ed335b 100644 --- a/include/elio/http/client_base.hpp +++ b/include/elio/http/client_base.hpp @@ -9,6 +9,7 @@ /// - Connection utility functions #include +#include #include #include #include @@ -16,9 +17,31 @@ #include #include +#include +#include namespace elio::http { +namespace detail { + +inline size_t next_rotation_offset(const std::string& host, uint16_t port, size_t count) { + if (count == 0) { + return 0; + } + + static std::mutex mutex; + static std::unordered_map state; + + std::lock_guard lock(mutex); + std::string key = host + ":" + std::to_string(port); + size_t& cursor = state[key]; + size_t offset = cursor % count; + cursor = (cursor + 1) % count; + return offset; +} + +} // namespace detail + /// Base configuration shared by all HTTP-based clients /// Can be embedded in more specific configuration structures struct base_client_config { @@ -27,6 +50,8 @@ struct base_client_config { size_t read_buffer_size = 8192; ///< Read buffer size std::string user_agent; ///< User-Agent header (empty = no header) bool verify_certificate = true; ///< Verify TLS certificates + net::resolve_options resolve_options = net::default_cached_resolve_options(); ///< DNS resolve/cache behavior + bool rotate_resolved_addresses = true; ///< Rotate start index across resolved addresses }; /// Initialize a TLS context for client use with default settings @@ -49,28 +74,56 @@ inline void init_client_tls_context(tls::tls_context& ctx, bool verify_certifica /// @return Connected stream or std::nullopt on error inline coro::task> client_connect(std::string_view host, uint16_t port, bool secure, - tls::tls_context* tls_ctx) { + tls::tls_context* tls_ctx, + net::resolve_options resolve_opts = net::default_cached_resolve_options(), + bool rotate_resolved_addresses = true) { + + auto addresses = co_await net::resolve_all(host, port, resolve_opts); + if (addresses.empty()) { + ELIO_LOG_ERROR("Failed to resolve {}:{}: {}", host, port, strerror(errno)); + co_return std::nullopt; + } + + size_t offset = rotate_resolved_addresses + ? detail::next_rotation_offset(std::string(host), port, addresses.size()) + : 0; + if (secure) { if (!tls_ctx) { ELIO_LOG_ERROR("TLS context required for secure connection to {}:{}", host, port); co_return std::nullopt; } - auto result = co_await tls::tls_connect(*tls_ctx, host, port); - if (!result) { - ELIO_LOG_ERROR("Failed to connect to {}:{}: {}", host, port, strerror(errno)); - co_return std::nullopt; + for (size_t i = 0; i < addresses.size(); ++i) { + const auto& addr = addresses[(offset + i) % addresses.size()]; + auto tcp = co_await net::tcp_connect(addr); + if (!tcp) { + continue; + } + + tls::tls_stream tls_stream(std::move(*tcp), *tls_ctx); + tls_stream.set_hostname(host); + auto hs = co_await tls_stream.handshake(); + if (!hs) { + continue; + } + + co_return net::stream(std::move(tls_stream)); } - co_return net::stream(std::move(*result)); + ELIO_LOG_ERROR("Failed to connect to {}:{}: {}", host, port, strerror(errno)); + co_return std::nullopt; } else { - auto result = co_await net::tcp_connect(host, port); - if (!result) { - ELIO_LOG_ERROR("Failed to connect to {}:{}: {}", host, port, strerror(errno)); - co_return std::nullopt; + for (size_t i = 0; i < addresses.size(); ++i) { + const auto& addr = addresses[(offset + i) % addresses.size()]; + auto result = co_await net::tcp_connect(addr); + if (result) { + co_return net::stream(std::move(*result)); + } } - co_return net::stream(std::move(*result)); + ELIO_LOG_ERROR("Failed to connect to {}:{}: {}", host, port, strerror(errno)); + co_return std::nullopt; } } diff --git a/include/elio/http/http2_client.hpp b/include/elio/http/http2_client.hpp index ddcf0e0..ac2e287 100644 --- a/include/elio/http/http2_client.hpp +++ b/include/elio/http/http2_client.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include namespace elio::http { @@ -27,6 +29,8 @@ struct h2_client_config { uint32_t initial_window_size = 65535; ///< Initial flow control window size std::string user_agent = "elio-http2/1.0"; ///< User-Agent header bool enable_push = false; ///< Enable server push (rarely needed) + net::resolve_options resolve_options = net::default_cached_resolve_options(); ///< DNS resolve/cache behavior + bool rotate_resolved_addresses = true; ///< Rotate start index across resolved addresses }; /// HTTP/2 connection wrapper @@ -197,44 +201,58 @@ class h2_client { co_return std::move(conn); } - // Create new HTTP/2 connection - // First establish TCP connection - auto tcp_result = co_await net::tcp_connect(host, port); - if (!tcp_result) { - ELIO_LOG_ERROR("Failed to connect to {}:{}", host, port); + auto addresses = co_await net::resolve_all(host, port, config_.resolve_options); + if (addresses.empty()) { + ELIO_LOG_ERROR("Failed to resolve {}:{}", host, port); co_return std::nullopt; } - - // Create TLS stream with ALPN - tls::tls_stream tls_stream(std::move(*tcp_result), tls_ctx_); - tls_stream.set_hostname(host); - - // Perform TLS handshake - auto hs_result = co_await tls_stream.handshake(); - if (!hs_result) { - ELIO_LOG_ERROR("TLS handshake failed for {}:{}", host, port); - co_return std::nullopt; - } - - // Verify ALPN negotiated h2 - auto alpn = tls_stream.alpn_protocol(); - if (alpn != "h2") { - ELIO_LOG_ERROR("Server does not support HTTP/2 (ALPN: {})", - alpn.empty() ? "(none)" : std::string(alpn)); - co_return std::nullopt; + + size_t offset = 0; + if (config_.rotate_resolved_addresses) { + static std::mutex rotation_mutex; + static std::unordered_map rotation_cursor; + std::lock_guard lock(rotation_mutex); + size_t& cursor = rotation_cursor[key]; + offset = cursor % addresses.size(); + cursor = (cursor + 1) % addresses.size(); } - - ELIO_LOG_DEBUG("HTTP/2 connection established to {}:{}", host, port); - - h2_connection conn(std::move(tls_stream)); - - // Process initial frames (settings exchange) - if (!co_await conn.session()->process()) { - ELIO_LOG_ERROR("HTTP/2 session initialization failed"); - co_return std::nullopt; + + for (size_t i = 0; i < addresses.size(); ++i) { + const auto& addr = addresses[(offset + i) % addresses.size()]; + + auto tcp_result = co_await net::tcp_connect(addr); + if (!tcp_result) { + continue; + } + + tls::tls_stream tls_stream(std::move(*tcp_result), tls_ctx_); + tls_stream.set_hostname(host); + + auto hs_result = co_await tls_stream.handshake(); + if (!hs_result) { + continue; + } + + auto alpn = tls_stream.alpn_protocol(); + if (alpn != "h2") { + ELIO_LOG_ERROR("Server does not support HTTP/2 (ALPN: {})", + alpn.empty() ? "(none)" : std::string(alpn)); + continue; + } + + ELIO_LOG_DEBUG("HTTP/2 connection established to {}:{}", host, port); + + h2_connection conn(std::move(tls_stream)); + if (!co_await conn.session()->process()) { + ELIO_LOG_ERROR("HTTP/2 session initialization failed"); + continue; + } + + co_return std::move(conn); } - - co_return std::move(conn); + + ELIO_LOG_ERROR("Failed to connect to any resolved address for {}:{}", host, port); + co_return std::nullopt; } /// Return a connection to the pool diff --git a/include/elio/http/http_client.hpp b/include/elio/http/http_client.hpp index c4b11a6..0fd9aab 100644 --- a/include/elio/http/http_client.hpp +++ b/include/elio/http/http_client.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,8 @@ using connection = net::stream; /// Connection pool for HTTP keep-alive class connection_pool { public: + static constexpr size_t shard_count = 16; + explicit connection_pool(client_config config = {}) : config_(config) {} @@ -48,12 +51,13 @@ class connection_pool { bool secure, tls::tls_context* tls_ctx = nullptr) { std::string key = make_key(host, port, secure); + auto& shard = shard_for(key); // Try to get an existing connection { - std::lock_guard lock(mutex_); - auto it = pools_.find(key); - if (it != pools_.end() && !it->second.empty()) { + std::lock_guard lock(shard.mutex); + auto it = shard.pools.find(key); + if (it != shard.pools.end() && !it->second.empty()) { auto conn = std::move(it->second.front()); it->second.pop_front(); @@ -68,7 +72,13 @@ class connection_pool { } // Create new connection using client_connect utility - auto result = co_await client_connect(host, port, secure, tls_ctx); + auto result = co_await client_connect( + host, + port, + secure, + tls_ctx, + config_.resolve_options, + config_.rotate_resolved_addresses); if (!result) { co_return std::nullopt; } @@ -79,9 +89,10 @@ class connection_pool { /// Return a connection to the pool void release(const std::string& host, uint16_t port, bool secure, connection conn) { std::string key = make_key(host, port, secure); + auto& shard = shard_for(key); - std::lock_guard lock(mutex_); - auto& pool = pools_[key]; + std::lock_guard lock(shard.mutex); + auto& pool = shard.pools[key]; if (pool.size() < config_.max_connections_per_host) { conn.touch(); @@ -92,18 +103,28 @@ class connection_pool { /// Clear all pooled connections void clear() { - std::lock_guard lock(mutex_); - pools_.clear(); + for (auto& shard : shards_) { + std::lock_guard lock(shard.mutex); + shard.pools.clear(); + } } private: static std::string make_key(const std::string& host, uint16_t port, bool secure) { return (secure ? "https://" : "http://") + host + ":" + std::to_string(port); } + + struct pool_shard { + std::mutex mutex; + std::unordered_map> pools; + }; + + pool_shard& shard_for(const std::string& key) noexcept { + return shards_[std::hash{}(key) % shard_count]; + } client_config config_; - std::mutex mutex_; - std::unordered_map> pools_; + std::array shards_; }; /// HTTP client diff --git a/include/elio/http/sse_client.hpp b/include/elio/http/sse_client.hpp index a42537a..0eed2ad 100644 --- a/include/elio/http/sse_client.hpp +++ b/include/elio/http/sse_client.hpp @@ -353,7 +353,12 @@ class sse_client { // Establish connection using shared utility auto conn_result = co_await http::client_connect( - url_.host, url_.effective_port(), url_.is_secure(), &tls_ctx_); + url_.host, + url_.effective_port(), + url_.is_secure(), + &tls_ctx_, + config_.resolve_options, + config_.rotate_resolved_addresses); if (!conn_result) { state_ = client_state::disconnected; co_return false; diff --git a/include/elio/http/websocket_client.hpp b/include/elio/http/websocket_client.hpp index 89d180f..1c1859d 100644 --- a/include/elio/http/websocket_client.hpp +++ b/include/elio/http/websocket_client.hpp @@ -248,7 +248,13 @@ class ws_client { } // Establish connection using shared utility - auto conn_result = co_await http::client_connect(host_, port, secure_, &tls_ctx_); + auto conn_result = co_await http::client_connect( + host_, + port, + secure_, + &tls_ctx_, + config_.resolve_options, + config_.rotate_resolved_addresses); if (!conn_result) { co_return false; } diff --git a/include/elio/net/resolve.hpp b/include/elio/net/resolve.hpp new file mode 100644 index 0000000..57e9de8 --- /dev/null +++ b/include/elio/net/resolve.hpp @@ -0,0 +1,382 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace elio::net { + +struct resolve_cache_key { + std::string host; + uint16_t port = 0; + + bool operator==(const resolve_cache_key& other) const noexcept { + return host == other.host && port == other.port; + } +}; + +struct resolve_cache_key_hash { + size_t operator()(const resolve_cache_key& key) const noexcept { + size_t seed = std::hash{}(key.host); + seed ^= static_cast(key.port) + 0x9e3779b97f4a7c15ULL + (seed << 6U) + (seed >> 2U); + return seed; + } +}; + +struct resolve_cache_stats { + size_t cache_hits = 0; + size_t cache_misses = 0; + size_t cache_stores = 0; + size_t cache_invalidations = 0; +}; + +struct resolve_cache_entry { + std::vector addresses; + std::chrono::steady_clock::time_point expires_at{}; +}; + +class resolve_cache { +public: + static constexpr size_t shard_count = 16; + + bool try_get(const resolve_cache_key& key, std::vector& out) { + auto& shard = shard_for(key); + std::lock_guard lock(shard.mutex); + prune_expired_locked(shard.entries, std::chrono::steady_clock::now()); + + auto it = shard.entries.find(key); + if (it == shard.entries.end()) { + return false; + } + + out = it->second.addresses; + cache_hits_.fetch_add(1, std::memory_order_relaxed); + return true; + } + + void store(const resolve_cache_key& key, + std::vector addresses, + std::chrono::seconds ttl) { + auto& shard = shard_for(key); + std::lock_guard lock(shard.mutex); + resolve_cache_entry entry; + entry.addresses = std::move(addresses); + entry.expires_at = std::chrono::steady_clock::now() + ttl; + shard.entries[key] = std::move(entry); + cache_stores_.fetch_add(1, std::memory_order_relaxed); + } + + bool invalidate(const std::string_view host, uint16_t port) { + resolve_cache_key key{std::string(host), port}; + auto& shard = shard_for(key); + std::lock_guard lock(shard.mutex); + size_t erased = shard.entries.erase(key); + if (erased > 0) { + cache_invalidations_.fetch_add(erased, std::memory_order_relaxed); + return true; + } + return false; + } + + size_t invalidate_host(const std::string_view host) { + size_t removed = 0; + for (auto& shard : shards_) { + std::lock_guard lock(shard.mutex); + for (auto it = shard.entries.begin(); it != shard.entries.end();) { + if (it->first.host == host) { + it = shard.entries.erase(it); + ++removed; + } else { + ++it; + } + } + } + if (removed > 0) { + cache_invalidations_.fetch_add(removed, std::memory_order_relaxed); + } + return removed; + } + + void clear() { + size_t removed = 0; + for (auto& shard : shards_) { + std::lock_guard lock(shard.mutex); + removed += shard.entries.size(); + shard.entries.clear(); + } + cache_invalidations_.fetch_add(removed, std::memory_order_relaxed); + } + + resolve_cache_stats stats() const noexcept { + resolve_cache_stats out; + out.cache_hits = cache_hits_.load(std::memory_order_relaxed); + out.cache_misses = cache_misses_.load(std::memory_order_relaxed); + out.cache_stores = cache_stores_.load(std::memory_order_relaxed); + out.cache_invalidations = cache_invalidations_.load(std::memory_order_relaxed); + return out; + } + + void record_miss() { + cache_misses_.fetch_add(1, std::memory_order_relaxed); + } + +private: + struct cache_shard { + std::mutex mutex; + std::unordered_map entries; + }; + + cache_shard& shard_for(const resolve_cache_key& key) noexcept { + size_t idx = resolve_cache_key_hash{}(key) % shard_count; + return shards_[idx]; + } + + static void prune_expired_locked( + std::unordered_map& entries, + std::chrono::steady_clock::time_point now) { + for (auto it = entries.begin(); it != entries.end();) { + if (it->second.expires_at <= now) { + it = entries.erase(it); + } else { + ++it; + } + } + } + + std::array shards_; + std::atomic cache_hits_{0}; + std::atomic cache_misses_{0}; + std::atomic cache_stores_{0}; + std::atomic cache_invalidations_{0}; +}; + +inline resolve_cache& default_resolve_cache() { + static resolve_cache cache; + return cache; +} + +struct resolve_options { + bool use_cache = false; + resolve_cache* cache = nullptr; + std::chrono::seconds positive_ttl{60}; + std::chrono::seconds negative_ttl{5}; +}; + +inline resolve_options default_cached_resolve_options() { + resolve_options opts; + opts.use_cache = true; + opts.cache = &default_resolve_cache(); + return opts; +} + +struct resolve_waiter_state { + std::vector results; + int error = 0; + runtime::scheduler* scheduler = nullptr; + std::coroutine_handle<> handle; + size_t saved_affinity = coro::NO_AFFINITY; + void* handle_address = nullptr; + + void restore_affinity() const noexcept { + if (!handle_address) { + return; + } + auto* promise = coro::get_promise_base(handle_address); + if (!promise) { + return; + } + if (saved_affinity == coro::NO_AFFINITY) { + promise->clear_affinity(); + } else { + promise->set_affinity(saved_affinity); + } + } +}; + +inline bool try_parse_ipv4_literal(std::string_view host, uint16_t port, + std::vector& out) { + struct in_addr addr{}; + std::string host_str(host); + if (inet_pton(AF_INET, host_str.c_str(), &addr) != 1) { + return false; + } + + ipv4_address parsed; + parsed.addr = addr.s_addr; + parsed.port = port; + out.push_back(socket_address(parsed)); + return true; +} + +inline bool try_parse_ipv6_literal(std::string_view host, uint16_t port, + std::vector& out) { + std::string ip_str(host); + uint32_t scope_id = 0; + size_t scope_pos = ip_str.find('%'); + if (scope_pos != std::string::npos) { + std::string scope_name = ip_str.substr(scope_pos + 1); + ip_str = ip_str.substr(0, scope_pos); + scope_id = if_nametoindex(scope_name.c_str()); + } + + struct in6_addr addr{}; + if (inet_pton(AF_INET6, ip_str.c_str(), &addr) != 1) { + return false; + } + + ipv6_address parsed; + parsed.addr = addr; + parsed.port = port; + parsed.scope_id = scope_id; + out.push_back(socket_address(parsed)); + return true; +} + +class resolve_all_awaitable { +public: + resolve_all_awaitable(std::string_view host, uint16_t port, resolve_options options) + : host_(host) + , key_{std::string(host), port} + , options_(options) + , state_(std::make_shared()) { + if (host.empty() || host == "::" || host == "0.0.0.0") { + state_->results.push_back(socket_address(host, port)); + return; + } + + if (host.find(':') != std::string_view::npos) { + try_parse_ipv6_literal(host, port, state_->results); + return; + } + + try_parse_ipv4_literal(host, port, state_->results); + } + + bool await_ready() const noexcept { + if (!state_->results.empty()) { + return true; + } + + if (!options_.use_cache) { + return false; + } + + resolve_cache* cache = options_.cache ? options_.cache : &default_resolve_cache(); + if (cache->try_get(key_, state_->results)) { + return true; + } + + cache->record_miss(); + return false; + } + + template + bool await_suspend(std::coroutine_handle awaiter) { + state_->handle = awaiter; + state_->scheduler = runtime::scheduler::current(); + state_->handle_address = awaiter.address(); + + if constexpr (std::is_base_of_v) { + state_->saved_affinity = awaiter.promise().affinity(); + auto* worker = runtime::worker_thread::current(); + if (worker) { + awaiter.promise().set_affinity(worker->worker_id()); + } + } + + auto host = host_; + auto key = key_; + auto options = options_; + auto state = state_; + + std::thread([host = std::move(host), key = std::move(key), options, state]() mutable { + struct addrinfo hints{}; + struct addrinfo* result = nullptr; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + std::string service = std::to_string(key.port); + int rc = getaddrinfo(host.c_str(), service.c_str(), &hints, &result); + if (rc == 0 && result) { + for (auto* current = result; current != nullptr; current = current->ai_next) { + if (current->ai_family == AF_INET6) { + auto* sa = reinterpret_cast(current->ai_addr); + state->results.push_back(socket_address(ipv6_address(*sa))); + } else if (current->ai_family == AF_INET) { + auto* sa = reinterpret_cast(current->ai_addr); + state->results.push_back(socket_address(ipv4_address(*sa))); + } + } + freeaddrinfo(result); + } + + if (state->results.empty()) { + state->error = (rc == EAI_SYSTEM) ? errno : EHOSTUNREACH; + if (options.use_cache) { + resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); + cache->store(key, {}, options.negative_ttl); + } + } else if (options.use_cache) { + resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); + cache->store(key, state->results, options.positive_ttl); + } + + if (state->scheduler && state->scheduler->is_running()) { + state->scheduler->spawn(state->handle); + } else { + runtime::schedule_handle(state->handle); + } + }).detach(); + + return true; + } + + std::vector await_resume() { + state_->restore_affinity(); + if (state_->results.empty()) { + errno = state_->error; + } + return state_->results; + } + +private: + std::string host_; + resolve_cache_key key_; + resolve_options options_; + std::shared_ptr state_; +}; + +inline auto resolve_all(std::string_view host, + uint16_t port, + resolve_options options = {}) { + return resolve_all_awaitable(host, port, options); +} + +inline coro::task> resolve_hostname(std::string_view host, + uint16_t port, + resolve_options options = {}) { + auto results = co_await resolve_all(host, port, options); + if (results.empty()) { + co_return std::nullopt; + } + co_return results.front(); +} + +} // namespace elio::net diff --git a/include/elio/net/stream.hpp b/include/elio/net/stream.hpp index 8e15627..5263f53 100644 --- a/include/elio/net/stream.hpp +++ b/include/elio/net/stream.hpp @@ -8,6 +8,7 @@ /// duplication in HTTP, WebSocket, and SSE clients. #include +#include #include #include #include @@ -178,18 +179,24 @@ class stream { /// @return Connected stream on success, std::nullopt on error inline coro::task> connect(std::string_view host, uint16_t port, bool secure = false, - tls::tls_context* tls_ctx = nullptr) { + tls::tls_context* tls_ctx = nullptr, + resolve_options resolve_opts = default_cached_resolve_options()) { if (secure) { if (!tls_ctx) { co_return std::nullopt; } - auto result = co_await tls::tls_connect(*tls_ctx, host, port); + auto result = co_await tls::tls_connect(*tls_ctx, host, port, resolve_opts); if (!result) { co_return std::nullopt; } co_return stream(std::move(*result)); } else { - auto result = co_await tcp_connect(host, port); + auto resolved = co_await resolve_hostname(host, port, resolve_opts); + if (!resolved) { + co_return std::nullopt; + } + + auto result = co_await tcp_connect(*resolved); if (!result) { co_return std::nullopt; } diff --git a/include/elio/net/tcp.hpp b/include/elio/net/tcp.hpp index 126161c..ffff828 100644 --- a/include/elio/net/tcp.hpp +++ b/include/elio/net/tcp.hpp @@ -48,23 +48,10 @@ struct ipv4_address { if (ip.empty() || ip == "0.0.0.0") { addr = INADDR_ANY; } else { - // First try as numeric IP - if (inet_pton(AF_INET, std::string(ip).c_str(), &addr) != 1) { - // Not a numeric IP, try DNS resolution - struct addrinfo hints{}; - struct addrinfo* result = nullptr; - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_STREAM; - - std::string ip_str(ip); - if (getaddrinfo(ip_str.c_str(), nullptr, &hints, &result) == 0 && result) { - auto* sa = reinterpret_cast(result->ai_addr); - addr = sa->sin_addr.s_addr; - freeaddrinfo(result); - } else { - ELIO_LOG_ERROR("Failed to resolve hostname: {}", ip); - addr = INADDR_ANY; - } + std::string ip_str(ip); + if (inet_pton(AF_INET, ip_str.c_str(), &addr) != 1) { + ELIO_LOG_ERROR("ipv4_address only accepts numeric IPv4 literals: {}", ip); + addr = INADDR_ANY; } } } @@ -115,23 +102,9 @@ struct ipv6_address { scope_id = if_nametoindex(scope_name.c_str()); } - // First try as numeric IP if (inet_pton(AF_INET6, ip_str.c_str(), &addr) != 1) { - // Not a numeric IP, try DNS resolution - struct addrinfo hints{}; - struct addrinfo* result = nullptr; - hints.ai_family = AF_INET6; - hints.ai_socktype = SOCK_STREAM; - - if (getaddrinfo(ip_str.c_str(), nullptr, &hints, &result) == 0 && result) { - auto* sa = reinterpret_cast(result->ai_addr); - addr = sa->sin6_addr; - scope_id = sa->sin6_scope_id; - freeaddrinfo(result); - } else { - ELIO_LOG_ERROR("Failed to resolve IPv6 hostname: {}", ip); - addr = IN6ADDR_ANY_INIT; - } + ELIO_LOG_ERROR("ipv6_address only accepts numeric IPv6 literals: {}", ip); + addr = IN6ADDR_ANY_INIT; } } } @@ -201,32 +174,8 @@ class socket_address { data_ = ipv6_address(host, port); return; } - - // Try to resolve and prefer IPv6 - struct addrinfo hints{}; - struct addrinfo* result = nullptr; - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - - std::string host_str(host); - if (getaddrinfo(host_str.c_str(), nullptr, &hints, &result) == 0 && result) { - // Use the first result - if (result->ai_family == AF_INET6) { - auto* sa = reinterpret_cast(result->ai_addr); - ipv6_address addr(*sa); - addr.port = port; - data_ = addr; - } else { - auto* sa = reinterpret_cast(result->ai_addr); - ipv4_address addr(*sa); - addr.port = port; - data_ = addr; - } - freeaddrinfo(result); - } else { - // Fallback to IPv4 - data_ = ipv4_address(host, port); - } + + data_ = ipv4_address(host, port); } /// Construct from sockaddr_storage @@ -773,10 +722,4 @@ inline auto tcp_connect(const socket_address& addr, return tcp_connect_awaitable(addr, opts); } -/// Connect to a remote TCP server by host and port (auto-detects IPv4/IPv6) -inline auto tcp_connect(std::string_view host, uint16_t port, - const tcp_options& opts = {}) { - return tcp_connect_awaitable(socket_address(host, port), opts); -} - } // namespace elio::net diff --git a/include/elio/rpc/rpc_client.hpp b/include/elio/rpc/rpc_client.hpp index dcc0a72..7b2271f 100644 --- a/include/elio/rpc/rpc_client.hpp +++ b/include/elio/rpc/rpc_client.hpp @@ -26,11 +26,17 @@ #include #include #include +#include #include +#include +#include #include #include #include +#include +#include +#include namespace elio::rpc { @@ -73,6 +79,8 @@ class rpc_client : public std::enable_shared_from_this> { public: using stream_type = Stream; using ptr = std::shared_ptr; + + static constexpr size_t pending_shard_count = 16; /// Create a new RPC client from an existing stream static ptr create(Stream stream) { @@ -84,13 +92,54 @@ class rpc_client : public std::enable_shared_from_this> { static coro::task> connect(Args&&... args) requires std::is_same_v { - auto stream = co_await net::tcp_connect(std::forward(args)...); - if (!stream) { + if constexpr (requires { net::tcp_connect(std::forward(args)...); }) { + auto stream = co_await net::tcp_connect(std::forward(args)...); + if (!stream) { + co_return std::nullopt; + } + auto client = create(std::move(*stream)); + client->start_receive_loop(); + co_return client; + } else if constexpr ( + sizeof...(Args) == 2 && + std::is_convertible_v...>>, std::string_view> && + std::is_integral_v...>>>) { + auto forwarded = std::forward_as_tuple(std::forward(args)...); + std::string_view host = std::get<0>(forwarded); + uint16_t port = static_cast(std::get<1>(forwarded)); + + auto addresses = co_await net::resolve_all(host, port); + for (const auto& addr : addresses) { + auto stream = co_await net::tcp_connect(addr); + if (stream) { + auto client = create(std::move(*stream)); + client->start_receive_loop(); + co_return client; + } + } co_return std::nullopt; + } else { + static_assert(sizeof...(Args) == 0, + "rpc_client::connect arguments are not supported"); } - auto client = create(std::move(*stream)); - client->start_receive_loop(); - co_return client; + } + + /// Connect to a TCP server and create client with explicit resolve options + static coro::task> connect(std::string_view host, + uint16_t port, + net::resolve_options resolve_opts) + requires std::is_same_v + { + auto addresses = co_await net::resolve_all(host, port, resolve_opts); + for (const auto& addr : addresses) { + auto stream = co_await net::tcp_connect(addr); + if (stream) { + auto client = create(std::move(*stream)); + client->start_receive_loop(); + co_return client; + } + } + co_return std::nullopt; } /// Connect to a UDS server and create client @@ -128,15 +177,15 @@ class rpc_client : public std::enable_shared_from_this> { } // Cancel all pending requests - { - std::lock_guard lock(pending_mutex_); - for (auto& [id, req] : pending_requests_) { + for (auto& shard : pending_shards_) { + std::lock_guard lock(shard.mutex); + for (auto& [id, req] : shard.requests) { if (req->try_complete()) { req->error = rpc_error::connection_closed; req->completion_event.set(); } } - pending_requests_.clear(); + shard.requests.clear(); } } @@ -216,8 +265,9 @@ class rpc_client : public std::enable_shared_from_this> { auto pending = std::make_shared(); { - std::lock_guard lock(pending_mutex_); - pending_requests_[request_id] = pending; + auto& shard = pending_shard_for(request_id); + std::lock_guard lock(shard.mutex); + shard.requests[request_id] = pending; } // Register cancellation callback @@ -239,8 +289,9 @@ class rpc_client : public std::enable_shared_from_this> { bool sent = co_await write_frame(stream_, request_frame.first, request_frame.second); if (!sent) { - std::lock_guard lock(pending_mutex_); - pending_requests_.erase(request_id); + auto& shard = pending_shard_for(request_id); + std::lock_guard lock(shard.mutex); + shard.requests.erase(request_id); co_return rpc_result(rpc_error::connection_closed); } } @@ -279,8 +330,9 @@ class rpc_client : public std::enable_shared_from_this> { // Remove from pending { - std::lock_guard lock(pending_mutex_); - pending_requests_.erase(request_id); + auto& shard = pending_shard_for(request_id); + std::lock_guard lock(shard.mutex); + shard.requests.erase(request_id); } // Check result @@ -332,8 +384,9 @@ class rpc_client : public std::enable_shared_from_this> { auto pending = std::make_shared(); { - std::lock_guard lock(pending_mutex_); - pending_requests_[ping_id] = pending; + auto& shard = pending_shard_for(ping_id); + std::lock_guard lock(shard.mutex); + shard.requests[ping_id] = pending; } // Send ping @@ -345,8 +398,9 @@ class rpc_client : public std::enable_shared_from_this> { buffer_writer empty; bool sent = co_await write_frame(stream_, header, empty); if (!sent) { - std::lock_guard lock(pending_mutex_); - pending_requests_.erase(ping_id); + auto& shard = pending_shard_for(ping_id); + std::lock_guard lock(shard.mutex); + shard.requests.erase(ping_id); co_return false; } } @@ -374,8 +428,9 @@ class rpc_client : public std::enable_shared_from_this> { co_await pending->completion_event.wait(); { - std::lock_guard lock(pending_mutex_); - pending_requests_.erase(ping_id); + auto& shard = pending_shard_for(ping_id); + std::lock_guard lock(shard.mutex); + shard.requests.erase(ping_id); } co_return !pending->timed_out; @@ -449,9 +504,10 @@ class rpc_client : public std::enable_shared_from_this> { std::shared_ptr pending; { - std::lock_guard lock(pending_mutex_); - auto it = pending_requests_.find(header.request_id); - if (it == pending_requests_.end()) { + auto& shard = pending_shard_for(header.request_id); + std::lock_guard lock(shard.mutex); + auto it = shard.requests.find(header.request_id); + if (it == shard.requests.end()) { ELIO_LOG_WARNING("RPC client: received response for unknown request {}", header.request_id); return; @@ -472,9 +528,10 @@ class rpc_client : public std::enable_shared_from_this> { std::shared_ptr pending; { - std::lock_guard lock(pending_mutex_); - auto it = pending_requests_.find(request_id); - if (it == pending_requests_.end()) { + auto& shard = pending_shard_for(request_id); + std::lock_guard lock(shard.mutex); + auto it = shard.requests.find(request_id); + if (it == shard.requests.end()) { return; } pending = it->second; @@ -488,10 +545,17 @@ class rpc_client : public std::enable_shared_from_this> { Stream stream_; std::atomic closed_{false}; request_id_generator id_generator_; - - // Pending requests map - std::mutex pending_mutex_; - std::unordered_map> pending_requests_; + + struct pending_shard { + std::mutex mutex; + std::unordered_map> requests; + }; + + pending_shard& pending_shard_for(uint32_t request_id) noexcept { + return pending_shards_[request_id % pending_shard_count]; + } + + std::array pending_shards_; // Send mutex for serializing writes sync::mutex send_mutex_; diff --git a/include/elio/runtime/scheduler.hpp b/include/elio/runtime/scheduler.hpp index b060c69..22b0aaf 100644 --- a/include/elio/runtime/scheduler.hpp +++ b/include/elio/runtime/scheduler.hpp @@ -296,7 +296,24 @@ inline void schedule_handle(std::coroutine_handle<> handle) noexcept { sched->spawn(handle); } else { // No scheduler - run synchronously. Task self-destructs via final_suspend. - if (!handle.done()) handle.resume(); + if (!handle.done()) { + coro::ensure_vthread_owner(handle); + auto* frame = coro::get_promise_base(handle.address()); + void* previous_owner = nullptr; + if (frame) { + previous_owner = coro::promise_base::current_owner(); + coro::promise_base::set_current_owner(frame->vthread_owner()); + frame->mark_started(); + if (!frame->vthread_owner()) { + coro::promise_base::record_ownerless_resume(); + } + } + handle.resume(); + if (frame) { + coro::promise_base::set_current_owner(previous_owner); + coro::promise_base::record_owner_context_restore(); + } + } } } @@ -433,8 +450,28 @@ inline void worker_thread::run_task(std::coroutine_handle<> handle) noexcept { } if (!handle || handle.done()) [[unlikely]] return; + + coro::ensure_vthread_owner(handle); + auto* frame = coro::get_promise_base(handle.address()); + void* previous_owner = nullptr; + if (frame) { + previous_owner = coro::promise_base::current_owner(); + coro::promise_base::set_current_owner(frame->vthread_owner()); + frame->mark_started(); + frame->set_worker_id(static_cast(worker_id_)); + frame->set_state(coro::coroutine_state::running); + if (!frame->vthread_owner()) { + coro::promise_base::record_ownerless_resume(); + } + } handle.resume(); + + if (frame) { + coro::promise_base::set_current_owner(previous_owner); + coro::promise_base::record_owner_context_restore(); + } + tasks_executed_.fetch_add(1, std::memory_order_relaxed); update_last_task_time(); diff --git a/include/elio/tls/tls_stream.hpp b/include/elio/tls/tls_stream.hpp index 9bf427f..af50e59 100644 --- a/include/elio/tls/tls_stream.hpp +++ b/include/elio/tls/tls_stream.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -337,24 +338,33 @@ class tls_stream { /// @param port Port to connect to /// @return TLS stream on success, std::nullopt on error (check errno) inline coro::task> -tls_connect(tls_context& ctx, std::string_view host, uint16_t port) { - // First establish TCP connection - auto tcp_result = co_await net::tcp_connect(host, port); - if (!tcp_result) { +tls_connect(tls_context& ctx, + std::string_view host, + uint16_t port, + net::resolve_options resolve_opts = net::default_cached_resolve_options()) { + auto resolved = co_await net::resolve_all(host, port, resolve_opts); + if (resolved.empty()) { co_return std::nullopt; } - - // Create TLS stream - tls_stream stream(std::move(*tcp_result), ctx); - stream.set_hostname(host); - - // Perform handshake - auto hs_result = co_await stream.handshake(); - if (!hs_result) { - co_return std::nullopt; + + for (const auto& addr : resolved) { + auto tcp_result = co_await net::tcp_connect(addr); + if (!tcp_result) { + continue; + } + + tls_stream stream(std::move(*tcp_result), ctx); + stream.set_hostname(host); + + auto hs_result = co_await stream.handshake(); + if (!hs_result) { + continue; + } + + co_return std::move(stream); } - - co_return std::move(stream); + + co_return std::nullopt; } /// TLS listener for accepting secure connections diff --git a/tests/unit/test_io.cpp b/tests/unit/test_io.cpp index cc3bc11..0bcd8d3 100644 --- a/tests/unit/test_io.cpp +++ b/tests/unit/test_io.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -1011,6 +1012,75 @@ static task tcp_connect_regression_attempt( co_return; } +static task accept_n_connections( + tcp_listener& listener, + int count, + std::atomic& accepted) { + for (int i = 0; i < count; ++i) { + auto stream = co_await listener.accept(); + if (stream) { + accepted.fetch_add(1, std::memory_order_relaxed); + } + } + co_return; +} + +static task tcp_connect_hostname_attempt( + std::string host, + uint16_t port, + std::atomic& connected, + std::atomic& failed, + std::atomic& first_error) { + resolve_options options; + options.use_cache = true; + + auto addresses = co_await resolve_all(host, port, options); + if (addresses.empty()) { + failed.fetch_add(1, std::memory_order_relaxed); + int err = errno ? errno : EHOSTUNREACH; + int expected = 0; + first_error.compare_exchange_strong(expected, err); + co_return; + } + + int last_error = 0; + for (const auto& addr : addresses) { + auto stream = co_await tcp_connect(addr); + if (stream) { + connected.fetch_add(1, std::memory_order_relaxed); + co_return; + } + last_error = errno; + } + + failed.fetch_add(1, std::memory_order_relaxed); + int err = last_error ? last_error : EHOSTUNREACH; + int expected = 0; + first_error.compare_exchange_strong(expected, err); + co_return; +} + +static task resolve_hostname_attempt( + std::string host, + uint16_t port, + std::optional& resolved, + std::atomic& done) { + resolved = co_await resolve_hostname(host, port); + done.store(true, std::memory_order_relaxed); + co_return; +} + +static task resolve_all_attempt_with_options( + std::string host, + uint16_t port, + resolve_options options, + std::vector& resolved, + std::atomic& done) { + resolved = co_await resolve_all(host, port, options); + done.store(true, std::memory_order_relaxed); + co_return; +} + TEST_CASE("ipv4_address basic operations", "[tcp][address][ipv4]") { SECTION("default constructor") { ipv4_address addr; @@ -1247,13 +1317,229 @@ TEST_CASE("TCP connect regression avoids double connect", "[tcp][connect][regres REQUIRE(failed == 0); } -TEST_CASE("socket_address with hostname resolution", "[tcp][address][dns]") { - // Test that socket_address can be constructed from "localhost" - // This tests the DNS resolution path - SECTION("localhost resolves") { - socket_address addr("localhost", 80); - // Should resolve to either IPv4 or IPv6 - REQUIRE((addr.is_v4() || addr.is_v6())); - REQUIRE(addr.port() == 80); +TEST_CASE("explicit hostname resolution", "[tcp][address][dns]") { + SECTION("localhost resolves asynchronously") { + scheduler sched(1); + sched.start(); + + std::optional resolved; + std::atomic done{false}; + + auto task = resolve_hostname_attempt("localhost", 80, resolved, done); + sched.spawn(task.release()); + + for (int i = 0; i < 200 && !done.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + sched.shutdown(); + + REQUIRE(done.load(std::memory_order_relaxed)); + REQUIRE(resolved.has_value()); + REQUIRE((resolved->is_v4() || resolved->is_v6())); + REQUIRE(resolved->port() == 80); + } +} + +TEST_CASE("tcp_connect hostname resolution uses cache", "[tcp][connect][dns][cache]") { + default_resolve_cache().clear(); + + auto listener = tcp_listener::bind(socket_address(0)); + REQUIRE(listener.has_value()); + + const uint16_t port = listener->local_address().port(); + REQUIRE(port > 0); + + std::atomic accepted{0}; + std::atomic connected{0}; + std::atomic failed{0}; + std::atomic first_error{0}; + + scheduler sched(2); + sched.start(); + + auto stats_before = default_resolve_cache().stats(); + + auto accept_task = accept_n_connections(*listener, 2, accepted); + sched.spawn(accept_task.release()); + + auto first_task = tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); + sched.spawn(first_task.release()); + + for (int i = 0; i < 300 && connected.load(std::memory_order_relaxed) < 1; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); } + + auto stats_after_first = default_resolve_cache().stats(); + + auto second_task = tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); + sched.spawn(second_task.release()); + + for (int i = 0; i < 300 && (accepted.load(std::memory_order_relaxed) < 2 + || connected.load(std::memory_order_relaxed) < 2 + || failed.load(std::memory_order_relaxed) != 0); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + sched.shutdown(); + + auto stats_after_second = default_resolve_cache().stats(); + + INFO("connected=" << connected.load() << ", failed=" << failed.load() + << ", first errno=" << first_error.load()); + INFO("before stats: hits=" << stats_before.cache_hits + << ", misses=" << stats_before.cache_misses + << ", stores=" << stats_before.cache_stores + << ", invalidations=" << stats_before.cache_invalidations); + INFO("first stats: hits=" << stats_after_first.cache_hits + << ", misses=" << stats_after_first.cache_misses + << ", stores=" << stats_after_first.cache_stores + << ", invalidations=" << stats_after_first.cache_invalidations); + INFO("second stats: hits=" << stats_after_second.cache_hits + << ", misses=" << stats_after_second.cache_misses + << ", stores=" << stats_after_second.cache_stores + << ", invalidations=" << stats_after_second.cache_invalidations); + + REQUIRE(accepted == 2); + REQUIRE(connected == 2); + REQUIRE(failed == 0); + REQUIRE(stats_after_first.cache_misses >= (stats_before.cache_misses + 1)); + REQUIRE(stats_after_first.cache_stores >= (stats_before.cache_stores + 1)); + REQUIRE(stats_after_second.cache_hits >= (stats_after_first.cache_hits + 1)); + REQUIRE(stats_after_second.cache_misses == stats_after_first.cache_misses); +} + +TEST_CASE("resolve_options can disable cache", "[tcp][dns][cache][config]") { + default_resolve_cache().clear(); + auto stats_before = default_resolve_cache().stats(); + + resolve_options options; + options.use_cache = false; + + scheduler sched(1); + sched.start(); + + std::vector resolved_first; + std::vector resolved_second; + std::atomic done_first{false}; + std::atomic done_second{false}; + + auto first = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); + sched.spawn(first.release()); + + for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + auto second = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); + sched.spawn(second.release()); + + for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + sched.shutdown(); + + auto stats_after = default_resolve_cache().stats(); + REQUIRE(done_first.load(std::memory_order_relaxed)); + REQUIRE(done_second.load(std::memory_order_relaxed)); + REQUIRE_FALSE(resolved_first.empty()); + REQUIRE_FALSE(resolved_second.empty()); + REQUIRE(stats_after.cache_hits == stats_before.cache_hits); + REQUIRE(stats_after.cache_misses == stats_before.cache_misses); + REQUIRE(stats_after.cache_stores == stats_before.cache_stores); +} + +TEST_CASE("resolve_options can use custom cache instance", "[tcp][dns][cache][config]") { + default_resolve_cache().clear(); + auto default_before = default_resolve_cache().stats(); + + resolve_cache custom_cache; + resolve_options options; + options.use_cache = true; + options.cache = &custom_cache; + + scheduler sched(1); + sched.start(); + + std::vector resolved_first; + std::vector resolved_second; + std::atomic done_first{false}; + std::atomic done_second{false}; + + auto first = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); + sched.spawn(first.release()); + + for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + auto second = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); + sched.spawn(second.release()); + + for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + sched.shutdown(); + + auto custom_after = custom_cache.stats(); + auto default_after = default_resolve_cache().stats(); + + REQUIRE(done_first.load(std::memory_order_relaxed)); + REQUIRE(done_second.load(std::memory_order_relaxed)); + REQUIRE_FALSE(resolved_first.empty()); + REQUIRE_FALSE(resolved_second.empty()); + REQUIRE(custom_after.cache_misses >= 1); + REQUIRE(custom_after.cache_stores >= 1); + REQUIRE(custom_after.cache_hits >= 1); + REQUIRE(default_after.cache_hits == default_before.cache_hits); + REQUIRE(default_after.cache_misses == default_before.cache_misses); + REQUIRE(default_after.cache_stores == default_before.cache_stores); +} + +TEST_CASE("resolve_options ttl controls cache expiry", "[tcp][dns][cache][config]") { + resolve_cache cache; + resolve_options options; + options.use_cache = true; + options.cache = &cache; + options.positive_ttl = std::chrono::seconds(0); + + scheduler sched(1); + sched.start(); + + std::vector resolved_first; + std::vector resolved_second; + std::atomic done_first{false}; + std::atomic done_second{false}; + + auto first = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); + sched.spawn(first.release()); + + for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + auto second = resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); + sched.spawn(second.release()); + + for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + sched.shutdown(); + + auto stats = cache.stats(); + REQUIRE(done_first.load(std::memory_order_relaxed)); + REQUIRE(done_second.load(std::memory_order_relaxed)); + REQUIRE_FALSE(resolved_first.empty()); + REQUIRE_FALSE(resolved_second.empty()); + REQUIRE(stats.cache_misses >= 2); + REQUIRE(stats.cache_hits == 0); } diff --git a/tools/elio-gdb.py b/tools/elio-gdb.py old mode 100755 new mode 100644 index 155fe64..95f0dd8 --- a/tools/elio-gdb.py +++ b/tools/elio-gdb.py @@ -32,6 +32,8 @@ 4: "failed" } +EXCEPTION_PTR_SIZE = 8 + def read_atomic(val): """Read value from std::atomic.""" @@ -77,6 +79,13 @@ def read_cstring(addr): addr_val = int(addr) except: return None + + +def promise_to_handle_addr(promise_addr, ptr_size): + """Convert a promise_base* back to the coroutine handle address.""" + if promise_addr == 0: + return 0 + return promise_addr - 2 * ptr_size if addr_val == 0: return None @@ -120,14 +129,22 @@ def get_frame_from_handle(handle_addr): if magic != FRAME_MAGIC: return None - # Read promise_base fields - # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) - + # Read promise_base fields. + # Current layout begins with: + # magic(8) + parent(ptr) + activation_parent(ptr) + vthread_owner(ptr) + # + exception_ptr + debug_location + debug_state + debug_worker_id + debug_id + parent_bytes = inferior.read_memory(promise_addr + 8, ptr_size) parent = int.from_bytes(bytes(parent_bytes), 'little') + + activation_parent_bytes = inferior.read_memory(promise_addr + 8 + ptr_size, ptr_size) + activation_parent = int.from_bytes(bytes(activation_parent_bytes), 'little') + + owner_bytes = inferior.read_memory(promise_addr + 8 + 2 * ptr_size, ptr_size) + owner = int.from_bytes(bytes(owner_bytes), 'little') - # debug_location starts at offset 8+8+16=32 - loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr + # debug_location follows magic + 3 pointers + exception_ptr + loc_offset = 8 + 3 * ptr_size + EXCEPTION_PTR_SIZE file_ptr_bytes = inferior.read_memory(promise_addr + loc_offset, ptr_size) file_ptr = int.from_bytes(bytes(file_ptr_bytes), 'little') @@ -137,8 +154,8 @@ def get_frame_from_handle(handle_addr): line_bytes = inferior.read_memory(promise_addr + loc_offset + 2 * ptr_size, 4) line = int.from_bytes(bytes(line_bytes), 'little') - # state at loc_offset + 24 - state_offset = loc_offset + 2 * ptr_size + 4 + # debug_location = file(ptr) + function(ptr) + line(u32) + padding(4) + state_offset = loc_offset + 2 * ptr_size + 8 state_byte = inferior.read_memory(promise_addr + state_offset, 1) state = int.from_bytes(bytes(state_byte), 'little') @@ -149,6 +166,18 @@ def get_frame_from_handle(handle_addr): # debug_id at worker_id + 4 debug_id_bytes = inferior.read_memory(promise_addr + state_offset + 8, 8) debug_id = int.from_bytes(bytes(debug_id_bytes), 'little') + + affinity_bytes = inferior.read_memory(promise_addr + state_offset + 16, ptr_size) + affinity = int.from_bytes(bytes(affinity_bytes), 'little') + + frame_size_bytes = inferior.read_memory(promise_addr + state_offset + 16 + ptr_size, ptr_size) + frame_size = int.from_bytes(bytes(frame_size_bytes), 'little') + + started_byte = inferior.read_memory(promise_addr + state_offset + 16 + 2 * ptr_size, 1) + started = int.from_bytes(bytes(started_byte), 'little') != 0 + + root_byte = inferior.read_memory(promise_addr + state_offset + 16 + 2 * ptr_size + 1, 1) + is_root = int.from_bytes(bytes(root_byte), 'little') != 0 # Read strings file_str = None @@ -171,9 +200,15 @@ def get_frame_from_handle(handle_addr): "state": COROUTINE_STATES.get(state, "unknown"), "worker_id": worker_id, "parent": parent, + "activation_parent": activation_parent, + "owner": owner, "file": file_str, "function": func_str, "line": line, + "affinity": affinity, + "frame_size": frame_size, + "started": started, + "is_root": is_root, "address": handle_addr, "promise_addr": promise_addr } @@ -185,21 +220,36 @@ def walk_virtual_stack(handle_addr): """Walk the virtual stack from a coroutine handle.""" stack = [] visited = set() + ptr_size = gdb.lookup_type("void").pointer().sizeof info = get_frame_from_handle(handle_addr) if info: + relation = "activation_parent" if info["activation_parent"] != 0 else "parent" + info["edge"] = "self" stack.append(info) - visited.add(handle_addr) + visited.add(info["promise_addr"]) - # Walk parent chain - parent = info["parent"] + parent = info[relation] while parent != 0 and parent not in visited: visited.add(parent) - # Parent is a promise_base*, need to find the frame address - # This is tricky - for now just note we have a parent - stack.append({"id": 0, "address": parent, "state": "parent", - "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) - break + parent_handle = promise_to_handle_addr(parent, ptr_size) + parent_info = get_frame_from_handle(parent_handle) + if parent_info is None: + stack.append({ + "id": 0, + "address": parent_handle, + "promise_addr": parent, + "state": relation, + "function": None, + "file": None, + "line": 0, + "worker_id": 0xFFFFFFFF, + "edge": relation, + }) + break + parent_info["edge"] = relation + stack.append(parent_info) + parent = parent_info[relation] return stack @@ -503,6 +553,12 @@ def invoke(self, arg, from_tty): print(f" Worker: {worker_id}") print(f" Handle: 0x{info['address']:016x}") print(f" Promise: 0x{info['promise_addr']:016x}") + print(f" Owner: 0x{info['owner']:016x}") + print(f" Root: {'yes' if info['is_root'] else 'no'}") + print(f" Started: {'yes' if info['started'] else 'no'}") + print(f" FrameSz: {info['frame_size']}") + print(f" Parent: 0x{info['parent']:016x}") + print(f" ActParent:0x{info['activation_parent']:016x}") if info["function"]: print(f" Function: {info['function']}") @@ -512,7 +568,8 @@ def invoke(self, arg, from_tty): loc += f":{info['line']}" print(f" Location: {loc}") - print(f"\n Virtual Call Stack:") + chain_name = "activation" if info["activation_parent"] != 0 else "construction" + print(f"\n Virtual Call Stack ({chain_name} chain):") stack = walk_virtual_stack(task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" @@ -521,7 +578,8 @@ def invoke(self, arg, from_tty): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - print(f" #{i:<3} {func}{loc}") + edge = frame.get("edge", "self") + print(f" #{i:<3} [{edge}] {func}{loc}") return diff --git a/tools/elio-lldb.py b/tools/elio-lldb.py old mode 100755 new mode 100644 index 6724998..d26e84a --- a/tools/elio-lldb.py +++ b/tools/elio-lldb.py @@ -32,6 +32,8 @@ 4: "failed" } +EXCEPTION_PTR_SIZE = 8 + def read_cstring(process, addr): """Read a null-terminated string from memory.""" @@ -76,6 +78,13 @@ def read_pointer(process, addr): return process.ReadUnsignedFromMemory(addr, ptr_size, error) +def promise_to_handle_addr(promise_addr, ptr_size): + """Convert a promise_base* back to a coroutine handle address.""" + if promise_addr == 0: + return 0 + return promise_addr - 2 * ptr_size + + def get_scheduler(target, process): """Find the current scheduler.""" # Try to find scheduler::current() @@ -128,19 +137,21 @@ def get_frame_from_handle(process, handle_addr): if magic != FRAME_MAGIC: return None - # Read promise_base fields - # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) - + # Read promise_base fields. + # Current layout begins with: + # magic(8) + parent(ptr) + activation_parent(ptr) + vthread_owner(ptr) + # + exception_ptr + debug_location + debug_state + debug_worker_id + debug_id + parent = read_pointer(process, promise_addr + 8) + activation_parent = read_pointer(process, promise_addr + 8 + ptr_size) + owner = read_pointer(process, promise_addr + 8 + 2 * ptr_size) - # debug_location starts at offset 8+8+16=32 - loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr + loc_offset = 8 + 3 * ptr_size + EXCEPTION_PTR_SIZE file_ptr = read_pointer(process, promise_addr + loc_offset) func_ptr = read_pointer(process, promise_addr + loc_offset + ptr_size) line = read_uint32(process, promise_addr + loc_offset + 2 * ptr_size) - # state at loc_offset + 24 - state_offset = loc_offset + 2 * ptr_size + 4 + state_offset = loc_offset + 2 * ptr_size + 8 state = read_uint8(process, promise_addr + state_offset) # worker_id at state_offset + 4 (after 3 bytes padding) @@ -148,15 +159,26 @@ def get_frame_from_handle(process, handle_addr): # debug_id at worker_id + 4 debug_id = read_uint64(process, promise_addr + state_offset + 8) + + affinity = read_pointer(process, promise_addr + state_offset + 16) + frame_size = read_pointer(process, promise_addr + state_offset + 16 + ptr_size) + started = read_uint8(process, promise_addr + state_offset + 16 + 2 * ptr_size) != 0 + is_root = read_uint8(process, promise_addr + state_offset + 16 + 2 * ptr_size + 1) != 0 return { "id": debug_id, "state": COROUTINE_STATES.get(state, "unknown"), "worker_id": worker_id, "parent": parent, + "activation_parent": activation_parent, + "owner": owner, "file": read_cstring(process, file_ptr), "function": read_cstring(process, func_ptr), "line": line, + "affinity": affinity, + "frame_size": frame_size, + "started": started, + "is_root": is_root, "address": handle_addr, "promise_addr": promise_addr } @@ -171,18 +193,33 @@ def walk_virtual_stack(process, handle_addr): info = get_frame_from_handle(process, handle_addr) if info: + ptr_size = process.GetAddressByteSize() + relation = "activation_parent" if info["activation_parent"] != 0 else "parent" + info["edge"] = "self" stack.append(info) - visited.add(handle_addr) + visited.add(info["promise_addr"]) - # Walk parent chain - parent = info["parent"] + parent = info[relation] while parent != 0 and parent not in visited: visited.add(parent) - # Parent is a promise_base*, need to find the frame address - # For now just note we have a parent - stack.append({"id": 0, "address": parent, "state": "parent", - "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) - break + parent_handle = promise_to_handle_addr(parent, ptr_size) + parent_info = get_frame_from_handle(process, parent_handle) + if parent_info is None: + stack.append({ + "id": 0, + "address": parent_handle, + "promise_addr": parent, + "state": relation, + "function": None, + "file": None, + "line": 0, + "worker_id": 0xFFFFFFFF, + "edge": relation, + }) + break + parent_info["edge"] = relation + stack.append(parent_info) + parent = parent_info[relation] return stack @@ -469,6 +506,12 @@ def elio_info(debugger, command, result, internal_dict): result.AppendMessage(f" Worker: {worker}") result.AppendMessage(f" Handle: 0x{info['address']:016x}") result.AppendMessage(f" Promise: 0x{info['promise_addr']:016x}") + result.AppendMessage(f" Owner: 0x{info['owner']:016x}") + result.AppendMessage(f" Root: {'yes' if info['is_root'] else 'no'}") + result.AppendMessage(f" Started: {'yes' if info['started'] else 'no'}") + result.AppendMessage(f" FrameSz: {info['frame_size']}") + result.AppendMessage(f" Parent: 0x{info['parent']:016x}") + result.AppendMessage(f" ActParent:0x{info['activation_parent']:016x}") if info["function"]: result.AppendMessage(f" Function: {info['function']}") @@ -478,7 +521,8 @@ def elio_info(debugger, command, result, internal_dict): loc += f":{info['line']}" result.AppendMessage(f" Location: {loc}") - result.AppendMessage(f"\n Virtual Call Stack:") + chain_name = "activation" if info["activation_parent"] != 0 else "construction" + result.AppendMessage(f"\n Virtual Call Stack ({chain_name} chain):") stack = walk_virtual_stack(process, task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" @@ -487,7 +531,8 @@ def elio_info(debugger, command, result, internal_dict): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - result.AppendMessage(f" #{i:<3} {func}{loc}") + edge = frame.get("edge", "self") + result.AppendMessage(f" #{i:<3} [{edge}] {func}{loc}") return diff --git a/tools/elio_lldb.py b/tools/elio_lldb.py new file mode 100644 index 0000000..9625892 --- /dev/null +++ b/tools/elio_lldb.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +"""LLDB import entrypoint that loads the hyphen-named implementation. + +Use this file with: + command script import /path/to/tools/elio_lldb.py +""" + +import importlib.util +import pathlib + + +def _load_impl(): + script_path = pathlib.Path(__file__).with_name("elio-lldb.py") + spec = importlib.util.spec_from_file_location("elio_lldb_impl", script_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + +_impl = _load_impl() + + +def __getattr__(name): + return getattr(_impl, name) + + +def __lldb_init_module(debugger, internal_dict): + return _impl.__lldb_init_module(debugger, internal_dict) diff --git a/wiki/API-Reference.md b/wiki/API-Reference.md index a4cc311..07b1fe1 100644 --- a/wiki/API-Reference.md +++ b/wiki/API-Reference.md @@ -74,6 +74,12 @@ int b = co_await h2; int c = co_await h3; ``` +**Semantics Notes:** + +- Direct `co_await child` usually keeps `child` in the caller's current vthread and does not require relocation. +- `spawn()` and `go()` always establish a new vthread root. The root frame is attached to a fresh owner domain before first execution. +- Work stealing may move execution to another worker thread, but does not by itself change frame ownership. + ### `join_handle` Handle for awaiting spawned tasks. Returned by `task::spawn()`. diff --git a/wiki/Core-Concepts.md b/wiki/Core-Concepts.md index 11aba17..3d6ae1c 100644 --- a/wiki/Core-Concepts.md +++ b/wiki/Core-Concepts.md @@ -244,20 +244,38 @@ All three functions are also available in the `elio` namespace as convenience al **Why MPSC inbox for external submissions.** Cross-thread task submissions (e.g., spawning a task onto a specific worker from another thread) go through a bounded MPSC ring buffer rather than directly into the Chase-Lev deque. This separation keeps the deque's invariants simple -- only the owner ever pushes -- and the bounded capacity with cache-line aligned slots (`alignas(64)`) eliminates false sharing between producers and the consumer. -## Virtual Stack +## Virtual Stack And VThread Ownership C++20 stackless coroutines do not maintain a call stack in the traditional sense. When a coroutine suspends, the compiler-generated frame is stored on the heap, but the chain of callers that led to that suspension point is lost. This makes debugging difficult -- tools like `gdb bt` show the scheduler's dispatch loop rather than the logical call chain of coroutines. -Elio reconstructs this information through a **virtual stack**: an intrusive linked list of `promise_base` objects connected by `parent_` pointers. Each `promise_base` constructor links itself to the current frame via the `current_frame_` thread-local, and the destructor restores the previous frame. This gives every coroutine a pointer to the coroutine that `co_await`ed it. +Elio now tracks three related but different concepts: -The overhead is minimal -- one pointer per coroutine frame, set during construction and cleared during destruction. +- **Construction parent**: recorded in `parent_` when the promise is constructed from `current_frame_`. This is cheap and still useful for debugging, but it reflects construction nesting, not necessarily the true await chain. +- **Activation parent**: recorded when a cold coroutine is first activated via direct `co_await` or by a detached/joinable spawn boundary. This reflects the logical runtime relationship more accurately than construction order. +- **Vthread owner**: the ownership domain that is responsible for the frame's memory and for propagating vthread-local execution context across suspension and resumption. + +This distinction matters because construction order and first execution order can differ. In an expression such as `co_await bar(foo())`, `foo()` is constructed before `bar(...)`, but `bar` executes first and only later activates `foo`. A single parent pointer is therefore not enough to describe both debug lineage and runtime ownership. + +### Attachment And Relocation Rules + +- **Direct `co_await child`**: the common fast path. If the child is first activated inside the caller's current vthread, no relocation is needed. +- **`spawn()` / `go()`**: these create a new vthread boundary. The spawned root frame is cold-relocated exactly once into a fresh owner domain before first resume. +- **Work stealing**: no relocation is needed. A suspended leaf may migrate to another worker, but frame ownership remains unchanged; only the executing worker changes. + +The overhead remains small: metadata is still stored in `promise_base`, while owner-backed roots use segmented storage and cold-frame relocation only when ownership boundaries require it. ### What it enables - **`elio-pstack`**: A CLI tool that attaches to a running process (or reads a coredump) and walks the virtual stack chains to print coroutine backtraces, similar to `pstack` for threads. -- **Debugger extensions**: `elio-gdb.py` and `elio-lldb.py` use the same frame linkage to implement `elio bt` (backtrace) and `elio list` (list active coroutines). +- **Debugger extensions**: `elio-gdb.py` and the LLDB entrypoint `elio_lldb.py` (loading `elio-lldb.py`) use the same frame linkage to implement `elio bt` (backtrace) and `elio list` (list active coroutines). - **Exception propagation**: When a coroutine throws, `unhandled_exception()` captures it in the promise. The parent coroutine can then rethrow the exception when it `co_await`s the child's result, propagating errors up the logical call chain. +### Current limits + +- Construction-parent traversal is still the most stable always-available chain for debugger tooling. +- Activation-parent and vthread-owner metadata are the runtime-accurate semantics for ownership and first activation. +- A nested direct-`co_await` expression may still have different construction order and execution order; this is expected and documented behavior. + ### Frame metadata Each `promise_base` also carries debug metadata: diff --git a/wiki/Debugging.md b/wiki/Debugging.md index 916799b..e60e584 100644 --- a/wiki/Debugging.md +++ b/wiki/Debugging.md @@ -9,23 +9,35 @@ Elio coroutines maintain debug metadata in each frame: - State (created, running, suspended, completed, failed) - Source location (file, function, line) - Worker thread assignment -- Parent pointer for virtual stack traversal +- Construction parent, activation parent, and vthread ownership metadata The debugger extensions find coroutine frames by traversing the scheduler's worker queues (Chase-Lev deque and MPSC inbox). This approach has **zero runtime overhead** - no global registry or synchronization is required. +Important limitation: today these tools primarily see **queued** coroutines. A coroutine that is currently running on a worker thread is not guaranteed to appear until it suspends or is re-enqueued. + ## Virtual Stack -C++20 stackless coroutines allocate each frame independently on the heap. When a coroutine suspends, the native call stack unwinds completely, so traditional stack traces cannot show the logical call chain. Elio reconstructs this information through an intrusive virtual stack built into every coroutine frame. +C++20 stackless coroutines allocate each frame independently on the heap. When a coroutine suspends, the native call stack unwinds completely, so traditional stack traces cannot show the logical call chain. Elio reconstructs this information through an intrusive virtual-stack and vthread metadata model built into every coroutine frame. ### How It Works -Each coroutine's promise type inherits from `promise_base`, which contains a `parent_` pointer. When coroutine A `co_await`s coroutine B, B's promise stores a pointer back to A's promise. This forms a singly-linked list from the innermost frame to the outermost caller, mirroring what a native call stack would look like if the coroutines were regular functions. +Each coroutine's promise type inherits from `promise_base`, which now carries multiple relationships: + +- `parent_`: the construction-time parent captured from `current_frame_` +- `activation_parent_`: the coroutine that first activates a cold child +- `vthread_owner_`: the owner domain responsible for memory ownership and resume context + +`current_frame_` is still used to capture low-cost construction lineage. Runtime execution, however, is determined later: -The thread-local `current_frame_` tracks which frame is currently executing. When a new coroutine starts, it reads `current_frame_` to set its `parent_`, then installs itself as the new `current_frame_`. On completion or suspension, the previous frame is restored. +- direct `co_await` binds activation-parent metadata when the child is first awaited +- `spawn()` and `go()` create a fresh vthread owner and cold-relocate the spawned root before first resume +- work stealing resumes an already-owned suspended leaf on a different worker without changing frame ownership + +This means the debugger-visible construction chain and the runtime ownership chain are related, but not identical. In particular, nested expressions such as `co_await bar(foo())` may have construction order different from first execution order. ### Frame Validation -Each `promise_base` contains a `frame_magic_` field set to `0x454C494F46524D45` (the ASCII string "ELIOFRMR"). The debugger tools check this magic value when traversing memory to distinguish valid Elio coroutine frames from arbitrary data. This is especially important during coredump analysis, where the debugger walks raw memory without type information. +Each `promise_base` contains a `frame_magic_` field set to `0x454C494F46524D45` (the ASCII string "ELIOFRME"). The debugger tools check this magic value when traversing memory to distinguish valid Elio coroutine frames from arbitrary data. This is especially important during coredump analysis, where the debugger walks raw memory without type information. ### Debug Metadata @@ -33,14 +45,16 @@ Every frame carries the following debug metadata with no additional allocation: | Field | Description | |-------|-------------| -| `id_` | Unique monotonic identifier assigned at creation | -| `state_` | Current state: created, running, suspended, completed, or failed | -| `worker_id_` | Index of the worker thread the frame is assigned to (or -1 if unassigned) | -| `file_`, `function_`, `line_` | Source location captured via `std::source_location` or manual `set_location()` | -| `parent_` | Pointer to the calling frame's promise, forming the virtual stack chain | +| `debug_id_` | Unique monotonic identifier assigned on demand | +| `debug_state_` | Current state: created, running, suspended, completed, or failed | +| `debug_worker_id_` | Index of the worker thread the frame is assigned to (or -1 if unassigned) | +| `debug_location_` | Source file, function name, and line number | +| `parent_` | Construction-time parent pointer used for low-overhead lineage/debug traversal | +| `activation_parent_` | Runtime first-activation parent for direct await / detached activation semantics | +| `vthread_owner_` | Owner domain used for frame ownership and vthread context restoration | | `frame_magic_` | Magic number for frame integrity validation | -The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio-lldb.py`) use this metadata to present coroutine state in a format familiar to anyone who has used `pstack` or `thread apply all bt`. +The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio_lldb.py`) use this metadata to present coroutine state in a format familiar to anyone who has used `pstack` or `thread apply all bt`. They now prefer the activation-parent chain when it is present, and fall back to the construction-parent chain otherwise. Owner metadata is shown separately in `elio info` style output. ## Tools @@ -48,7 +62,7 @@ The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio-lldb.py`) use this metad |------|-------------| | `elio-pstack` | Command-line tool similar to `pstack` | | `elio-gdb.py` | GDB Python extension | -| `elio-lldb.py` | LLDB Python extension | +| `elio_lldb.py` | LLDB Python entrypoint | ## elio-pstack @@ -146,12 +160,18 @@ vthread #1 Worker: 0 Handle: 0x00007f1234567890 Promise: 0x00007f12345678a0 + Owner: 0x00007f1234500000 + Root: no + Started: yes + FrameSz: 160 + Parent: 0x00007f1234567000 + ActParent:0x00007f1234567000 Function: worker_task Location: debug_test.cpp:84 - Virtual Call Stack: - #0 worker_task at debug_test.cpp:84 - #1 async_main at debug_test.cpp:112 + Virtual Call Stack (activation chain): + #0 [self] worker_task at debug_test.cpp:84 + #1 [activation_parent] async_main at debug_test.cpp:112 (gdb) elio workers Scheduler: running @@ -177,13 +197,13 @@ Total tasks executed: 4780 ```bash # From LLDB command line -lldb -o 'command script import /path/to/tools/elio-lldb.py' ./myapp +lldb -o 'command script import /path/to/tools/elio_lldb.py' ./myapp # Or in LLDB session -(lldb) command script import /path/to/tools/elio-lldb.py +(lldb) command script import /path/to/tools/elio_lldb.py # Or add to ~/.lldbinit -command script import /path/to/tools/elio-lldb.py +command script import /path/to/tools/elio_lldb.py ``` ### Commands @@ -199,6 +219,8 @@ The LLDB extension provides the same commands as GDB: | `elio workers` | Show worker thread information | | `elio stats` | Show scheduler statistics | +`elio info` in both GDB and LLDB now prints owner/root/started metadata in addition to source location and queue worker information. + ## Setting Debug Location For more accurate debugging information, you can manually set the debug location in your coroutines: diff --git a/wiki/Getting-Started.md b/wiki/Getting-Started.md index 7ec3076..db3209a 100644 --- a/wiki/Getting-Started.md +++ b/wiki/Getting-Started.md @@ -204,8 +204,8 @@ include/elio/ ├── elio.hpp # Main include ├── coro/ # Coroutine primitives │ ├── task.hpp # task -│ ├── promise_base.hpp # Virtual stack base -│ ├── frame.hpp # Stack introspection +│ ├── promise_base.hpp # Vthread metadata base +│ ├── frame.hpp # Frame and owner introspection │ ├── frame_allocator.hpp # Frame memory pool │ ├── cancel_token.hpp # Cooperative cancellation │ └── awaitable_base.hpp # Awaitable interface @@ -257,6 +257,18 @@ include/elio/ The repository also contains `examples/` with runnable programs, `tests/` with Catch2 tests, and `tools/` with debugging utilities (`elio-pstack`, GDB/LLDB extensions). +## VThread Model At A Glance + +Elio's coroutine runtime distinguishes three relationships that are easy to conflate if you only think in terms of a single parent pointer: + +- **Construction parent**: captured when the promise is created +- **Activation parent**: captured when a cold coroutine is first activated +- **Vthread owner**: the owner domain responsible for frame memory and resume context + +In the common direct-`co_await` case, these relationships often line up and no relocation is needed. In `spawn()` and `go()` cases, however, Elio creates a new vthread boundary and moves the cold root frame into a fresh owner domain before first execution. + +The debugging tools expose this distinction explicitly: `elio info` reports owner/root/parent metadata, while stack-style output prefers the activation-parent chain when it is available. + ## Next Steps - Read [[Core Concepts]] to understand how Elio works diff --git a/wiki/Home.md b/wiki/Home.md index 6582d3d..9b70b1c 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -60,7 +60,7 @@ Elio is built around a few key technical decisions: - **Linux-native**: Deep integration with io_uring and signalfd enables optimal performance on modern Linux kernels. epoll provides a fallback for older systems. - **Per-worker I/O**: Each scheduler thread owns its I/O backend (io_uring or epoll), eliminating I/O-related locking entirely. Cross-thread communication uses lock-free MPSC queues. - **Work-stealing**: The Chase-Lev deque provides lock-free local operations with a global load balancing fallback. Tasks with thread affinity are respected during stealing. -- **Virtual stack tracking**: C++20 stackless coroutines lose stack information at suspension points. Elio's intrusive virtual stack enables production debugging via `elio-pstack` and GDB/LLDB extensions. +- **Extended vthread tracking**: Elio distinguishes construction-time parentage, first-activation parentage, and vthread ownership. This preserves low-overhead virtual stack debugging while making `co_await`, `spawn()`, `go()`, and work stealing semantics explicit. ## Wiki Contents diff --git a/wiki/Networking.md b/wiki/Networking.md index 4b7c2fb..ef64172 100644 --- a/wiki/Networking.md +++ b/wiki/Networking.md @@ -72,8 +72,14 @@ coro::task server(uint16_t port) { ```cpp coro::task client(const std::string& host, uint16_t port) { - // Connect to server (hostname is resolved automatically) - auto stream = co_await tcp_connect(ipv4_address(host, port)); + // Resolve host to concrete address, then connect + auto resolved = co_await resolve_hostname(host, port); + if (!resolved) { + ELIO_LOG_ERROR("Resolve failed: {}", strerror(errno)); + co_return; + } + + auto stream = co_await tcp_connect(*resolved); if (!stream) { ELIO_LOG_ERROR("Connect failed: {}", strerror(errno)); co_return; @@ -97,26 +103,33 @@ coro::task client(const std::string& host, uint16_t port) { Elio provides three address types for TCP networking: `ipv4_address`, `ipv6_address`, and `socket_address` (a variant wrapper that holds either). +These types are value objects for already-parsed socket addresses. Their string constructors accept numeric IP literals only and do not perform DNS. For hostname resolution, use `co_await resolve_hostname(host, port)` (single best address) or `co_await resolve_all(host, port)` (all candidate addresses), then call `tcp_connect()` with a concrete address. + ```cpp // IPv4 address with port ipv4_address addr1(8080); // 0.0.0.0:8080 ipv4_address addr2("192.168.1.1", 8080); // 192.168.1.1:8080 -ipv4_address addr3("example.com", 80); // DNS resolved // IPv6 address with port ipv6_address addr4(8080); // [::]:8080 ipv6_address addr5("::1", 8080); // [::1]:8080 ipv6_address addr6("fe80::1%eth0", 8080); // Link-local with scope ID -ipv6_address addr7("example.com", 443); // DNS resolved (AAAA) // Generic socket_address (variant of ipv4_address | ipv6_address) socket_address sa1(ipv4_address(8080)); // From IPv4 socket_address sa2(ipv6_address("::1", 8080)); // From IPv6 -socket_address sa3("example.com", 443); // Auto-detects v4/v6 +socket_address sa3("127.0.0.1", 443); // From literal IPv4 + +// Explicit async hostname resolution +auto resolved = co_await resolve_hostname("example.com", 443); +if (resolved && resolved->is_v6()) { + const auto& v6 = resolved->as_v6(); + ELIO_LOG_INFO("IPv6: {}", v6.to_string()); +} // Inspect address type -if (sa3.is_v6()) { - const auto& v6 = sa3.as_v6(); +if (sa2.is_v6()) { + const auto& v6 = sa2.as_v6(); ELIO_LOG_INFO("IPv6: {}", v6.to_string()); } @@ -337,6 +350,40 @@ coro::task advanced_client() { } ``` +### Hostname Resolve Configuration (HTTP/HTTP2/WS/SSE) + +All HTTP-family clients now expose DNS resolve/cache behavior via client config. + +```cpp +#include + +// HTTP/1.1 +http::client_config http_cfg; +http_cfg.resolve_options.use_cache = true; +http_cfg.resolve_options.positive_ttl = std::chrono::seconds(30); +http_cfg.resolve_options.negative_ttl = std::chrono::seconds(2); +http_cfg.rotate_resolved_addresses = true; // round-robin start address +http::client http_client(http_cfg); + +// HTTP/2 +http::h2_client_config h2_cfg; +h2_cfg.resolve_options = net::default_cached_resolve_options(); +h2_cfg.rotate_resolved_addresses = true; +http::h2_client h2_client(h2_cfg); + +// WebSocket +http::websocket::client_config ws_cfg; +ws_cfg.resolve_options.use_cache = false; // always resolve fresh +ws_cfg.rotate_resolved_addresses = false; // deterministic order +http::websocket::ws_client ws(ws_cfg); + +// SSE +http::sse::client_config sse_cfg; +sse_cfg.resolve_options.use_cache = true; +sse_cfg.resolve_options.positive_ttl = std::chrono::seconds(10); +http::sse::sse_client sse(sse_cfg); +``` + ### HTTP Server ```cpp @@ -467,8 +514,11 @@ coro::task secure_connection() { tls_ctx.use_default_verify_paths(); tls_ctx.set_verify_mode(true); - // Connect TCP - auto tcp = co_await tcp_connect(ipv4_address("example.com", 443)); + // Resolve hostname, then connect using a concrete socket address + auto resolved = co_await resolve_hostname("example.com", 443); + if (!resolved) co_return; + + auto tcp = co_await tcp_connect(*resolved); if (!tcp) co_return; // Wrap with TLS diff --git a/wiki/RPC-Framework.md b/wiki/RPC-Framework.md index 728e53b..482fa8c 100644 --- a/wiki/RPC-Framework.md +++ b/wiki/RPC-Framework.md @@ -102,10 +102,8 @@ coro::task run_server(uint16_t port) { ```cpp coro::task run_client(const char* host, uint16_t port) { - auto& ctx = io::default_io_context(); - // Connect to server - auto client = co_await tcp_rpc_client::connect(ctx, host, port); + auto client = co_await tcp_rpc_client::connect(host, port); if (!client) { ELIO_LOG_ERROR("Failed to connect"); co_return; @@ -124,6 +122,30 @@ coro::task run_client(const char* host, uint16_t port) { } ``` +### Client Resolve/Cache Configuration + +```cpp +#include + +coro::task run_client_with_resolver(std::string_view host, uint16_t port) { + net::resolve_options opts = net::default_cached_resolve_options(); + opts.positive_ttl = std::chrono::seconds(30); + opts.negative_ttl = std::chrono::seconds(2); + + auto client = co_await tcp_rpc_client::connect(host, port, opts); + if (!client) { + ELIO_LOG_ERROR("Failed to connect"); + co_return; + } + + GetUserRequest req{42}; + auto result = co_await (*client)->call(req, std::chrono::seconds(5)); + if (!result.ok()) { + ELIO_LOG_ERROR("RPC failed: {}", result.error_message()); + } +} +``` + ## Supported Types ### Primitive Types diff --git a/wiki/WebSocket-SSE.md b/wiki/WebSocket-SSE.md index b65dc77..a08f2e5 100644 --- a/wiki/WebSocket-SSE.md +++ b/wiki/WebSocket-SSE.md @@ -109,6 +109,21 @@ coro::task connect_example() { } ``` +### WebSocket Resolve/Cache Configuration + +```cpp +#include + +websocket::client_config cfg; +cfg.resolve_options = net::default_cached_resolve_options(); +cfg.resolve_options.positive_ttl = std::chrono::seconds(20); +cfg.resolve_options.negative_ttl = std::chrono::seconds(3); +cfg.rotate_resolved_addresses = true; + +websocket::ws_client client(cfg); +co_await client.connect("wss://example.com/ws"); +``` + ### WebSocket Frame Types | Opcode | Name | Description | @@ -198,14 +213,15 @@ using namespace elio; using namespace elio::http::sse; coro::task listen_events() { - auto& ctx = io::default_io_context(); - // Configure client client_config config; config.auto_reconnect = true; config.default_retry_ms = 3000; + config.resolve_options.use_cache = true; + config.resolve_options.positive_ttl = std::chrono::seconds(15); + config.rotate_resolved_addresses = true; - sse_client client(ctx, config); + sse_client client(config); // Connect if (!co_await client.connect("http://localhost:8080/events")) { @@ -226,6 +242,19 @@ coro::task listen_events() { } ``` +### SSE Resolve/Cache Configuration + +```cpp +#include + +sse::client_config cfg; +cfg.resolve_options.use_cache = false; // disable cache for dynamic DNS targets +cfg.rotate_resolved_addresses = false; // always try first resolved address first + +sse::sse_client client(cfg); +co_await client.connect("https://example.com/events"); +``` + ### SSE Event Format SSE events are formatted as text with specific fields: From d3f85f978cfbfe264a390c516a0c6fcc48243d10 Mon Sep 17 00:00:00 2001 From: Coldwings Date: Thu, 19 Mar 2026 10:39:06 +0800 Subject: [PATCH 2/4] minor fix --- include/elio/coro/promise_base.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/elio/coro/promise_base.hpp b/include/elio/coro/promise_base.hpp index bdb51ea..7d61e44 100644 --- a/include/elio/coro/promise_base.hpp +++ b/include/elio/coro/promise_base.hpp @@ -163,6 +163,15 @@ class promise_base { owner_bindings_.fetch_add(1, std::memory_order_relaxed); return true; } + // Construction-time owner can differ from first-activation owner. + // For cold frames (not started yet), allow one-way owner transfer + // during first activation binding (e.g. task created in A, first + // awaited in B via spawn/join wrapper). + if (!started_ && vthread_owner_ != owner) { + vthread_owner_ = owner; + owner_bindings_.fetch_add(1, std::memory_order_relaxed); + return true; + } assert(vthread_owner_ == owner && "vthread_owner rebound inconsistently"); return false; } From 70a0e9bb4a6826b2a3c2bafada62e2e21e6fd2cc Mon Sep 17 00:00:00 2001 From: Coldwings Date: Thu, 19 Mar 2026 11:51:04 +0800 Subject: [PATCH 3/4] seems like vthread stack is useless in most cases --- README.md | 61 ++------ include/elio/coro/frame.hpp | 32 ++-- include/elio/coro/frame_allocator.hpp | 75 ++------- include/elio/coro/promise_base.hpp | 159 +------------------ include/elio/coro/task.hpp | 142 +---------------- include/elio/coro/vthread_owner.hpp | 214 -------------------------- include/elio/rpc/rpc_client.hpp | 1 - include/elio/runtime/scheduler.hpp | 39 +---- tools/elio-gdb.py | 92 ++--------- tools/elio-lldb.py | 82 +++------- wiki/API-Reference.md | 6 - wiki/Core-Concepts.md | 26 +--- wiki/Debugging.md | 52 ++----- wiki/Getting-Started.md | 16 +- wiki/Home.md | 2 +- wiki/Networking.md | 68 ++------ wiki/RPC-Framework.md | 28 +--- wiki/WebSocket-SSE.md | 35 +---- 18 files changed, 113 insertions(+), 1017 deletions(-) delete mode 100644 include/elio/coro/vthread_owner.hpp mode change 100644 => 100755 tools/elio-gdb.py mode change 100644 => 100755 tools/elio-lldb.py diff --git a/README.md b/README.md index e2bfd81..fe7bd36 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Elio Coroutine Library -**Elio** is a modern, production-ready C++20 coroutine library for high-performance asynchronous programming on Linux. It provides stackless coroutines with extended vthread tracking, a multi-threaded work-stealing scheduler, and a foundation for efficient I/O operations. +**Elio** is a modern, production-ready C++20 coroutine library for high-performance asynchronous programming on Linux. It provides stackless coroutines with virtual stack tracking, a multi-threaded work-stealing scheduler, and a foundation for efficient I/O operations. [![CI](https://github.com/Coldwings/Elio/actions/workflows/ci.yml/badge.svg)](https://github.com/Coldwings/Elio/actions/workflows/ci.yml) [![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://en.cppreference.com/w/cpp/20) @@ -10,7 +10,7 @@ - **C++20 Stackless Coroutines** with `task` type - **Ergonomic Task Spawning**: `go()` for fire-and-forget, `spawn()` for joinable tasks -- **Extended VThread Tracking** covering construction lineage, first activation, and owner-domain semantics +- **Virtual Stack Tracking** for natural exception propagation - **Work-Stealing Scheduler** with lock-free Chase-Lev deques - **Dynamic Thread Pool** with runtime adjustment - **Autoscaler** for automatic worker thread scaling under load @@ -142,9 +142,9 @@ elio:: ├── coro:: // Coroutine primitives │ ├── task // Primary coroutine type │ ├── join_handle // Handle for awaiting spawned tasks -│ ├── promise_base // Vthread metadata base class +│ ├── promise_base // Virtual stack base class │ ├── awaitable_base // CRTP awaitable base -│ └── frame utilities // Frame and owner inspection +│ └── frame utilities // Virtual stack inspection │ ├── runtime:: // Scheduler and execution │ ├── scheduler // Work-stealing scheduler @@ -192,17 +192,12 @@ tools/ // Debugging tools └── elio-lldb.py // LLDB implementation script ``` -### Virtual Stack And VThreads +### Virtual Stack -Elio tracks coroutine relationships at three levels: -- construction lineage via `parent_` -- first-activation lineage via `activation_parent_` -- memory and execution ownership via `vthread_owner_` - -This enables: +Elio implements a **virtual stack** by linking coroutine frames together. This enables: - Natural exception propagation through `co_await` chains -- Call chain inspection for debugging and postmortem analysis -- Explicit semantics for `spawn()`, `go()`, and worker migration +- Call chain inspection for debugging +- Automatic cleanup on coroutine completion ```cpp outer() -> middle() -> inner() @@ -261,7 +256,7 @@ make ## Debugging -Elio provides debugging tools to inspect queued coroutine states, virtual call stacks, and vthread ownership metadata: +Elio provides debugging tools to inspect coroutine states and virtual call stacks: ```bash # pstack-like tool for coroutines @@ -273,17 +268,13 @@ gdb -ex 'source tools/elio-gdb.py' ./myapp (gdb) elio list # List all vthreads (gdb) elio bt # Show all backtraces (gdb) elio bt 42 # Show backtrace for vthread #42 -(gdb) elio info 42 # Show owner/root/parent metadata -# LLDB extension (use entrypoint wrapper) +# LLDB extension lldb -o 'command script import tools/elio_lldb.py' ./myapp (lldb) elio list (lldb) elio bt -(lldb) elio info 42 ``` -The debugger tools currently enumerate queued coroutines from worker queues. They prefer the activation-parent chain when it exists, and fall back to the construction-parent chain otherwise. - See the [Debugging wiki page](wiki/Debugging.md) for detailed documentation. ## API Reference @@ -359,38 +350,6 @@ if (handle.is_ready()) { } ``` -### Hostname Resolution Configuration - -Hostname resolution is explicit and configurable via `net::resolve_options`. -`tcp_connect` accepts concrete addresses only, so host+port callers should resolve first. - -```cpp -#include - -// Common options with cache enabled -auto opts = elio::net::default_cached_resolve_options(); -opts.positive_ttl = std::chrono::seconds(30); -opts.negative_ttl = std::chrono::seconds(2); - -// Resolve + connect (single best address) -auto addr = co_await elio::net::resolve_hostname("api.example.com", 443, opts); -if (addr) { - auto tcp = co_await elio::net::tcp_connect(*addr); -} - -// HTTP-family clients can set this in config directly -elio::http::client_config http_cfg; -http_cfg.resolve_options = opts; -http_cfg.rotate_resolved_addresses = true; - -elio::http::h2_client_config h2_cfg; -h2_cfg.resolve_options = opts; -h2_cfg.rotate_resolved_addresses = true; - -// RPC client explicit resolve configuration -auto rpc = co_await elio::rpc::tcp_rpc_client::connect("rpc.example.com", 9000, opts); -``` - ### Exception Handling ```cpp diff --git a/include/elio/coro/frame.hpp b/include/elio/coro/frame.hpp index beeab58..c22b3f2 100644 --- a/include/elio/coro/frame.hpp +++ b/include/elio/coro/frame.hpp @@ -1,11 +1,9 @@ #pragma once #include "promise_base.hpp" -#include "frame_allocator.hpp" -#include "vthread_owner.hpp" -#include #include #include +#include #include namespace elio::coro { @@ -51,19 +49,21 @@ inline void log_virtual_stack() { /// but is portable across GCC and Clang. The frame layout is: /// [resume_fn_ptr][destroy_fn_ptr][promise...] inline promise_base* get_promise_base(void* handle_addr) noexcept { - return promise_base::from_handle_address(handle_addr); -} - -inline void ensure_vthread_owner(std::coroutine_handle<> handle) { - auto* promise = get_promise_base(handle.address()); - if (!promise) return; - if (promise->vthread_owner()) return; - - auto* owner = new vthread_owner(); - promise->bind_vthread_owner_once(owner); - promise->set_vthread_root(true); - frame_allocator::set_owner_metadata(handle.address(), owner, true); - promise_base::record_root_owner_creation(); + if (!handle_addr) return nullptr; + + // The coroutine frame layout has the promise after two function pointers + // (resume and destroy). This is consistent across GCC and Clang. + constexpr size_t promise_offset = 2 * sizeof(void*); + + auto* candidate = reinterpret_cast( + static_cast(handle_addr) + promise_offset); + + // Validate using the magic number + if (candidate->frame_magic() == promise_base::FRAME_MAGIC) { + return candidate; + } + + return nullptr; } /// Check if a coroutine has affinity for a specific worker diff --git a/include/elio/coro/frame_allocator.hpp b/include/elio/coro/frame_allocator.hpp index 7e3bcbc..88177d1 100644 --- a/include/elio/coro/frame_allocator.hpp +++ b/include/elio/coro/frame_allocator.hpp @@ -32,18 +32,11 @@ namespace elio::coro { /// Note: Under sanitizers, pooling is disabled to allow proper leak/error detection. class frame_allocator { public: - struct owner_metadata { - void* owner = nullptr; - bool is_root = false; - bool found = false; - }; - // Support frames up to 256 bytes (covers most simple tasks) // Actual allocation includes header, so user-visible size is MAX_FRAME_SIZE static constexpr size_t MAX_FRAME_SIZE = 256; static constexpr size_t POOL_SIZE = 1024; static constexpr size_t REMOTE_QUEUE_BATCH = 64; // Process remote returns in batches - static constexpr uint32_t INVALID_POOL_ID = UINT32_MAX; // Detect sanitizers: GCC uses __SANITIZE_*, Clang uses __has_feature #if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) @@ -55,20 +48,13 @@ class frame_allocator { #endif #ifdef ELIO_SANITIZER_ACTIVE - // Under sanitizers, bypass pooling entirely, but still keep the hidden - // header so delete-path metadata inspection remains valid. + // Under sanitizers, bypass pooling entirely for accurate leak detection static void* allocate(size_t size) { - void* block = ::operator new(HEADER_SIZE + size); - auto* header = static_cast(block); - header->source_pool_id = INVALID_POOL_ID; - header->next.store(nullptr, std::memory_order_relaxed); - header->owner = nullptr; - header->is_root = false; - return block_to_user(block); + return ::operator new(size); } static void deallocate(void* ptr, [[maybe_unused]] size_t size) noexcept { - delete_block(user_to_block(ptr)); + ::operator delete(ptr); } #else static void* allocate(size_t size) { @@ -84,8 +70,6 @@ class frame_allocator { // This is important because blocks may have been returned from remote threads auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; - header->owner = nullptr; - header->is_root = false; return block_to_user(block); } @@ -94,19 +78,10 @@ class frame_allocator { auto* header = static_cast(block); header->source_pool_id = alloc.pool_id_; header->next.store(nullptr, std::memory_order_relaxed); - header->owner = nullptr; - header->is_root = false; return block_to_user(block); } - // Large frames still carry a small header so owner metadata can be - // attached later without touching promise memory in operator delete. - void* block = ::operator new(HEADER_SIZE + size); - auto* header = static_cast(block); - header->source_pool_id = INVALID_POOL_ID; - header->next.store(nullptr, std::memory_order_relaxed); - header->owner = nullptr; - header->is_root = false; - return block_to_user(block); + // Fall back to standard allocation for large frames (no header) + return ::operator new(size); } static void deallocate(void* ptr, size_t size) noexcept { @@ -122,7 +97,7 @@ class frame_allocator { return; } // Pool full, delete the block (not the user pointer!) - delete_block(block); + ::operator delete(block); return; } else { // Cross-thread deallocation: push to source pool's remote queue @@ -132,50 +107,20 @@ class frame_allocator { return; } // Source pool no longer exists (thread exited), delete the block - delete_block(block); + ::operator delete(block); return; } } - // Large allocation - free the underlying block carrying the header - delete_block(user_to_block(ptr)); + // Large allocation - was allocated without header + ::operator delete(ptr); } #endif - static void set_owner_metadata(void* ptr, void* owner, bool is_root) noexcept { - if (!ptr) return; - auto* header = static_cast(user_to_block(ptr)); - header->owner = owner; - header->is_root = is_root; - } - - [[nodiscard]] static owner_metadata inspect_owner_metadata(void* ptr) noexcept { - if (!ptr) return {}; - auto* header = static_cast(user_to_block(ptr)); - return owner_metadata{ - .owner = header->owner, - .is_root = header->is_root, - .found = header->owner != nullptr, - }; - } - private: - static void delete_block(void* block) noexcept { -#if defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmismatched-new-delete" -#endif - ::operator delete(block); -#if defined(__GNUC__) -#pragma GCC diagnostic pop -#endif - } - // Block header stored before user data struct block_header { uint32_t source_pool_id; // ID of the pool that allocated this block std::atomic next; // For MPSC queue linkage - void* owner; // Owning vthread domain, if attached later - bool is_root; // Root frame responsible for owner lifetime }; // Total block size including header, aligned for user data @@ -194,7 +139,7 @@ class frame_allocator { frame_allocator() : free_count_(0) , pool_id_(next_pool_id_.fetch_add(1, std::memory_order_relaxed)) - , remote_head_{0, {nullptr}, nullptr, false} // Dummy head for remote queue + , remote_head_{0, {nullptr}} // Initialize dummy head: pool_id=0, next=nullptr , remote_tail_(&remote_head_) { // Register this pool for cross-thread access register_pool(this); diff --git a/include/elio/coro/promise_base.hpp b/include/elio/coro/promise_base.hpp index 7d61e44..669744e 100644 --- a/include/elio/coro/promise_base.hpp +++ b/include/elio/coro/promise_base.hpp @@ -1,8 +1,6 @@ #pragma once -#include #include -#include #include #include #include @@ -90,15 +88,10 @@ class promise_base { promise_base() noexcept : frame_magic_(FRAME_MAGIC) , parent_(current_frame_) - , activation_parent_(nullptr) - , vthread_owner_(current_owner_) , debug_state_(coroutine_state::created) , debug_worker_id_(static_cast(-1)) , debug_id_(0) // Lazy allocation - only allocated when id() is called , affinity_(NO_AFFINITY) - , frame_size_(consume_next_frame_size()) - , started_(false) - , vthread_root_(false) { current_frame_ = this; } @@ -125,137 +118,10 @@ class promise_base { return parent_; } - // Construction-time parent relationship (legacy parent semantics) - [[nodiscard]] promise_base* construction_parent() const noexcept { - return parent_; - } - - // First-activation parent relationship (runtime await-chain semantics) - [[nodiscard]] promise_base* activation_parent() const noexcept { - return activation_parent_; - } - - void set_activation_parent(promise_base* parent) noexcept { - activation_parent_ = parent; - } - - bool bind_activation_parent_once(promise_base* parent) noexcept { - if (activation_parent_ == nullptr) { - activation_parent_ = parent; - activation_bindings_.fetch_add(1, std::memory_order_relaxed); - return true; - } - assert(activation_parent_ == parent && "activation_parent rebound inconsistently"); - return false; - } - - [[nodiscard]] void* vthread_owner() const noexcept { - return vthread_owner_; - } - - void set_vthread_owner(void* owner) noexcept { - vthread_owner_ = owner; - } - - bool bind_vthread_owner_once(void* owner) noexcept { - if (vthread_owner_ == nullptr) { - vthread_owner_ = owner; - owner_bindings_.fetch_add(1, std::memory_order_relaxed); - return true; - } - // Construction-time owner can differ from first-activation owner. - // For cold frames (not started yet), allow one-way owner transfer - // during first activation binding (e.g. task created in A, first - // awaited in B via spawn/join wrapper). - if (!started_ && vthread_owner_ != owner) { - vthread_owner_ = owner; - owner_bindings_.fetch_add(1, std::memory_order_relaxed); - return true; - } - assert(vthread_owner_ == owner && "vthread_owner rebound inconsistently"); - return false; - } - - [[nodiscard]] size_t frame_size() const noexcept { - return frame_size_; - } - - [[nodiscard]] bool started() const noexcept { - return started_; - } - - void mark_started() noexcept { - started_ = true; - } - - [[nodiscard]] bool is_vthread_root() const noexcept { - return vthread_root_; - } - - void set_vthread_root(bool value) noexcept { - vthread_root_ = value; - } - [[nodiscard]] static promise_base* current_frame() noexcept { return current_frame_; } - [[nodiscard]] static void* current_owner() noexcept { - return current_owner_; - } - - static void set_current_owner(void* owner) noexcept { - current_owner_ = owner; - } - - static void set_next_frame_size(size_t size) noexcept { - next_frame_size_ = size; - } - - static void record_root_owner_creation() noexcept { - root_owner_creations_.fetch_add(1, std::memory_order_relaxed); - } - - static void record_owner_context_restore() noexcept { - owner_context_restores_.fetch_add(1, std::memory_order_relaxed); - } - - static void record_ownerless_resume() noexcept { - ownerless_resumes_.fetch_add(1, std::memory_order_relaxed); - } - - [[nodiscard]] static uint64_t owner_bindings() noexcept { - return owner_bindings_.load(std::memory_order_relaxed); - } - - [[nodiscard]] static uint64_t activation_bindings() noexcept { - return activation_bindings_.load(std::memory_order_relaxed); - } - - [[nodiscard]] static uint64_t root_owner_creations() noexcept { - return root_owner_creations_.load(std::memory_order_relaxed); - } - - [[nodiscard]] static uint64_t owner_context_restores() noexcept { - return owner_context_restores_.load(std::memory_order_relaxed); - } - - [[nodiscard]] static uint64_t ownerless_resumes() noexcept { - return ownerless_resumes_.load(std::memory_order_relaxed); - } - - [[nodiscard]] static promise_base* from_handle_address(void* handle_addr) noexcept { - if (!handle_addr) return nullptr; - - // GCC/Clang coroutine frame layout: - // [resume_fn_ptr][destroy_fn_ptr][promise...] - constexpr size_t promise_offset = 2 * sizeof(void*); - auto* candidate = reinterpret_cast( - static_cast(handle_addr) + promise_offset); - - return candidate->frame_magic() == FRAME_MAGIC ? candidate : nullptr; - } - // Debug accessors [[nodiscard]] uint64_t frame_magic() const noexcept { return frame_magic_; } [[nodiscard]] const debug_location& location() const noexcept { return debug_location_; } @@ -300,21 +166,11 @@ class promise_base { void clear_affinity() noexcept { affinity_ = NO_AFFINITY; } private: - static size_t consume_next_frame_size() noexcept { - size_t size = next_frame_size_; - next_frame_size_ = 0; - return size; - } - // Magic number at start for debugger validation uint64_t frame_magic_; - // Construction-time stack tracking + // Virtual stack tracking promise_base* parent_; - // Runtime activation relationship - promise_base* activation_parent_; - // Runtime vthread ownership context - void* vthread_owner_; std::exception_ptr exception_; // Debug metadata @@ -325,21 +181,8 @@ class promise_base { // Thread affinity: NO_AFFINITY means can migrate freely size_t affinity_; - - // Frame metadata - size_t frame_size_; - bool started_; - bool vthread_root_; static inline thread_local promise_base* current_frame_ = nullptr; - static inline thread_local void* current_owner_ = nullptr; - static inline thread_local size_t next_frame_size_ = 0; - - static inline std::atomic owner_bindings_{0}; - static inline std::atomic activation_bindings_{0}; - static inline std::atomic root_owner_creations_{0}; - static inline std::atomic owner_context_restores_{0}; - static inline std::atomic ownerless_resumes_{0}; }; } // namespace elio::coro diff --git a/include/elio/coro/task.hpp b/include/elio/coro/task.hpp index 95dac1c..212f593 100644 --- a/include/elio/coro/task.hpp +++ b/include/elio/coro/task.hpp @@ -2,9 +2,7 @@ #include "promise_base.hpp" #include "frame_allocator.hpp" -#include "vthread_owner.hpp" #include -#include #include #include #include @@ -20,66 +18,6 @@ void schedule_handle(std::coroutine_handle<> handle) noexcept; namespace elio::coro { -inline void* relocate_spawn_go_root_frame(void* source, size_t frame_size) { - if (!source || frame_size == 0) return source; - - auto* owner = new vthread_owner(); - void* destination = owner->allocate(frame_size); - if (!destination) { - delete owner; - return source; - } - - std::memcpy(destination, source, frame_size); - - // GCC stores the get_return_object() return value (task{h}) inside the - // coroutine frame. That stored task contains a coroutine_handle whose - // _M_fr_ptr == source (the old frame address). After memcpy this stale - // self-reference remains, causing final_awaiter::await_suspend to receive - // the OLD frame address via h. Fix up every pointer-sized slot that still - // holds the old address. - auto* dst_bytes = static_cast(destination); - const auto old_val = reinterpret_cast(source); - const auto new_val = reinterpret_cast(destination); - for (size_t i = 0; i + sizeof(void*) <= frame_size; i += sizeof(void*)) { - uintptr_t slot; - std::memcpy(&slot, dst_bytes + i, sizeof(slot)); - if (slot == old_val) { - std::memcpy(dst_bytes + i, &new_val, sizeof(new_val)); - } - } - - auto* destination_promise = promise_base::from_handle_address(destination); - if (!destination_promise) { - delete owner; - return source; - } - - destination_promise->set_vthread_owner(owner); - destination_promise->set_activation_parent(nullptr); - destination_promise->set_vthread_root(true); - vthread_owner::mark_root_allocation(destination, true); - return destination; -} - -/// Free the backing allocation of a cold (pre-resume) coroutine frame -/// WITHOUT invoking C++ destructors. -/// -/// Used after memcpy relocation: the "live" state has been copied to a new address; -/// 'ptr' is the abandoned source. Running destructors here would double-destroy -/// objects already owned by the relocated frame (e.g. captured task handles, -/// shared_ptr ref counts would be incorrectly decremented). -inline void free_cold_frame_backing(void* ptr, size_t frame_size) noexcept { - if (!ptr) return; - // vthread_owner uses bump allocation; individual frees are no-ops. - // Memory is reclaimed when the owning domain is destroyed. - if (vthread_owner::inspect_allocation(ptr).found) { - return; - } - // frame_allocator: return the block to the pool without running any destructor. - frame_allocator::deallocate(ptr, frame_size); -} - template class task; @@ -331,28 +269,10 @@ class task { // Custom allocator for coroutine frames void* operator new(size_t size) { - promise_base::set_next_frame_size(size); - if (auto* owner = static_cast<::elio::coro::vthread_owner*>(promise_base::current_owner())) { - if (void* ptr = owner->allocate(size)) { - return ptr; - } - } return frame_allocator::allocate(size); } void operator delete(void* ptr, size_t size) noexcept { - auto owner_alloc = ::elio::coro::vthread_owner::inspect_allocation(ptr); - if (owner_alloc.found) { - if (owner_alloc.is_root) { - delete static_cast<::elio::coro::vthread_owner*>(owner_alloc.owner); - } - return; - } - - auto frame_owner = frame_allocator::inspect_owner_metadata(ptr); - if (frame_owner.found && frame_owner.is_root) { - delete static_cast<::elio::coro::vthread_owner*>(frame_owner.owner); - } frame_allocator::deallocate(ptr, size); } }; @@ -384,18 +304,6 @@ class task { /// Spawn this task on the current scheduler (fire-and-forget) /// The task will run asynchronously and self-destruct when complete void go() { - if (handle_) { - const size_t frame_size = handle_.promise().frame_size(); - void* old_ptr = handle_.address(); - void* relocated = relocate_spawn_go_root_frame(old_ptr, frame_size); - if (relocated != old_ptr) { - handle_ = handle_type::from_address(relocated); - // Do NOT call old.destroy(): that would run destructors and - // double-destroy captured objects now owned by the relocated frame. - // Instead, free only the backing allocation. - free_cold_frame_backing(old_ptr, frame_size); - } - } runtime::schedule_handle(release()); } @@ -406,15 +314,7 @@ class task { [[nodiscard]] bool await_ready() const noexcept { return false; } [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { - auto& promise = handle_.promise(); - auto* activation_parent = promise_base::from_handle_address(awaiter.address()); - promise.bind_activation_parent_once(activation_parent); - - void* owner = promise_base::current_owner(); - if (owner) { - promise.bind_vthread_owner_once(owner); - } - promise.continuation_ = awaiter; + handle_.promise().continuation_ = awaiter; return handle_; } @@ -451,28 +351,10 @@ class task { // Custom allocator for coroutine frames void* operator new(size_t size) { - promise_base::set_next_frame_size(size); - if (auto* owner = static_cast<::elio::coro::vthread_owner*>(promise_base::current_owner())) { - if (void* ptr = owner->allocate(size)) { - return ptr; - } - } return frame_allocator::allocate(size); } void operator delete(void* ptr, size_t size) noexcept { - auto owner_alloc = ::elio::coro::vthread_owner::inspect_allocation(ptr); - if (owner_alloc.found) { - if (owner_alloc.is_root) { - delete static_cast<::elio::coro::vthread_owner*>(owner_alloc.owner); - } - return; - } - - auto frame_owner = frame_allocator::inspect_owner_metadata(ptr); - if (frame_owner.found && frame_owner.is_root) { - delete static_cast<::elio::coro::vthread_owner*>(frame_owner.owner); - } frame_allocator::deallocate(ptr, size); } }; @@ -504,18 +386,6 @@ class task { /// Spawn this task on the current scheduler (fire-and-forget) /// The task will run asynchronously and self-destruct when complete void go() { - if (handle_) { - const size_t frame_size = handle_.promise().frame_size(); - void* old_ptr = handle_.address(); - void* relocated = relocate_spawn_go_root_frame(old_ptr, frame_size); - if (relocated != old_ptr) { - handle_ = handle_type::from_address(relocated); - // Do NOT call old.destroy(): that would run destructors and - // double-destroy captured objects now owned by the relocated frame. - // Instead, free only the backing allocation. - free_cold_frame_backing(old_ptr, frame_size); - } - } runtime::schedule_handle(release()); } @@ -526,15 +396,7 @@ class task { [[nodiscard]] bool await_ready() const noexcept { return false; } [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { - auto& promise = handle_.promise(); - auto* activation_parent = promise_base::from_handle_address(awaiter.address()); - promise.bind_activation_parent_once(activation_parent); - - void* owner = promise_base::current_owner(); - if (owner) { - promise.bind_vthread_owner_once(owner); - } - promise.continuation_ = awaiter; + handle_.promise().continuation_ = awaiter; return handle_; } diff --git a/include/elio/coro/vthread_owner.hpp b/include/elio/coro/vthread_owner.hpp deleted file mode 100644 index 337edd2..0000000 --- a/include/elio/coro/vthread_owner.hpp +++ /dev/null @@ -1,214 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace elio::coro { - -/// Segmented owner storage for coroutine frames within one vthread domain. -/// -/// This allocator prioritizes pointer stability and simple O(1) bump allocation -/// in the active segment. Individual frame deallocation is intentionally a no-op; -/// memory is reclaimed when the owner is destroyed. -class vthread_owner { -public: - static constexpr size_t INITIAL_SEGMENT_SIZE = 4096; - static constexpr uint64_t ALLOCATION_MAGIC = 0x564F574E4552464DULL; - - struct allocation_info { - vthread_owner* owner = nullptr; - bool is_root = false; - bool found = false; - }; - - vthread_owner() { - add_segment(INITIAL_SEGMENT_SIZE); - } - - ~vthread_owner() { - segment* seg = head_; - while (seg) { - segment* next = seg->next; - unregister_segment(seg->data, seg->capacity); - ::operator delete(seg->data); - delete seg; - seg = next; - } - } - - vthread_owner(const vthread_owner&) = delete; - vthread_owner& operator=(const vthread_owner&) = delete; - vthread_owner(vthread_owner&&) = delete; - vthread_owner& operator=(vthread_owner&&) = delete; - - [[nodiscard]] void* allocate(size_t size, - size_t alignment = alignof(std::max_align_t), - bool is_root = false) { - if (size == 0) return nullptr; - if (alignment == 0) alignment = alignof(std::max_align_t); - if (!current_) return nullptr; - - void* ptr = try_allocate_in_segment(current_, size, alignment, this, is_root); - if (ptr) return ptr; - - const size_t required = size + header_size(alignment) + alignment; - const size_t next_size = std::max(required, current_->capacity * 2); - add_segment(next_size); - return try_allocate_in_segment(current_, size, alignment, this, is_root); - } - - static void mark_root_allocation(void* ptr, bool is_root) noexcept { - auto* header = header_from_user(ptr); - if (!header || header->magic != ALLOCATION_MAGIC) return; - header->is_root = is_root; - } - - [[nodiscard]] static allocation_info inspect_allocation(const void* ptr) noexcept { - if (!is_in_registered_segment(ptr)) { - return {}; - } - - auto* header = header_from_user(ptr); - if (!header || header->magic != ALLOCATION_MAGIC) { - return {}; - } - - return allocation_info{ - .owner = header->owner, - .is_root = header->is_root, - .found = true, - }; - } - - [[nodiscard]] bool owns_address(const void* ptr) const noexcept { - auto addr = reinterpret_cast(ptr); - segment* seg = head_; - while (seg) { - auto begin = reinterpret_cast(seg->data); - auto end = begin + seg->capacity; - if (addr >= begin && addr < end) { - return true; - } - seg = seg->next; - } - return false; - } - -private: - struct segment_range { - uintptr_t begin; - uintptr_t end; - }; - - struct allocation_header { - uint64_t magic; - vthread_owner* owner; - bool is_root; - }; - - struct segment { - char* data; - size_t capacity; - size_t used; - segment* next; - }; - - static size_t align_up(size_t value, size_t alignment) noexcept { - const size_t mask = alignment - 1; - return (value + mask) & ~mask; - } - - static size_t header_size(size_t alignment) noexcept { - return align_up(sizeof(allocation_header), alignment); - } - - static allocation_header* header_from_user(const void* ptr) noexcept { - if (!ptr) return nullptr; - auto* bytes = static_cast(ptr); - auto* header = reinterpret_cast(bytes - sizeof(allocation_header)); - return const_cast(header); - } - - static bool is_in_registered_segment(const void* ptr) { - if (!ptr) return false; - const auto addr = reinterpret_cast(ptr); - std::lock_guard lock(segment_registry_mutex_); - for (const auto& range : segment_registry_) { - if (addr >= range.begin && addr < range.end) { - return true; - } - } - return false; - } - - static void register_segment(const char* data, size_t capacity) { - std::lock_guard lock(segment_registry_mutex_); - segment_registry_.push_back(segment_range{ - .begin = reinterpret_cast(data), - .end = reinterpret_cast(data) + capacity, - }); - } - - static void unregister_segment(const char* data, size_t capacity) { - const auto begin = reinterpret_cast(data); - const auto end = begin + capacity; - std::lock_guard lock(segment_registry_mutex_); - auto it = std::remove_if(segment_registry_.begin(), segment_registry_.end(), - [&](const segment_range& range) { - return range.begin == begin && range.end == end; - }); - segment_registry_.erase(it, segment_registry_.end()); - } - - static void* try_allocate_in_segment(segment* seg, - size_t size, - size_t alignment, - vthread_owner* owner, - bool is_root) noexcept { - const size_t header = header_size(alignment); - const size_t offset = align_up(seg->used + header, alignment); - if (offset + size > seg->capacity) { - return nullptr; - } - - auto* allocation = reinterpret_cast(seg->data + offset - sizeof(allocation_header)); - allocation->magic = ALLOCATION_MAGIC; - allocation->owner = owner; - allocation->is_root = is_root; - - void* ptr = seg->data + offset; - seg->used = offset + size; - return ptr; - } - - void add_segment(size_t capacity) { - auto* seg = new segment{ - .data = static_cast(::operator new(capacity)), - .capacity = capacity, - .used = 0, - .next = nullptr, - }; - register_segment(seg->data, seg->capacity); - - if (!head_) { - head_ = seg; - current_ = seg; - return; - } - - current_->next = seg; - current_ = seg; - } - - segment* head_ = nullptr; - segment* current_ = nullptr; - - static inline std::mutex segment_registry_mutex_{}; - static inline std::vector segment_registry_{}; -}; - -} // namespace elio::coro diff --git a/include/elio/rpc/rpc_client.hpp b/include/elio/rpc/rpc_client.hpp index 7b2271f..e0aabc6 100644 --- a/include/elio/rpc/rpc_client.hpp +++ b/include/elio/rpc/rpc_client.hpp @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/include/elio/runtime/scheduler.hpp b/include/elio/runtime/scheduler.hpp index 22b0aaf..b060c69 100644 --- a/include/elio/runtime/scheduler.hpp +++ b/include/elio/runtime/scheduler.hpp @@ -296,24 +296,7 @@ inline void schedule_handle(std::coroutine_handle<> handle) noexcept { sched->spawn(handle); } else { // No scheduler - run synchronously. Task self-destructs via final_suspend. - if (!handle.done()) { - coro::ensure_vthread_owner(handle); - auto* frame = coro::get_promise_base(handle.address()); - void* previous_owner = nullptr; - if (frame) { - previous_owner = coro::promise_base::current_owner(); - coro::promise_base::set_current_owner(frame->vthread_owner()); - frame->mark_started(); - if (!frame->vthread_owner()) { - coro::promise_base::record_ownerless_resume(); - } - } - handle.resume(); - if (frame) { - coro::promise_base::set_current_owner(previous_owner); - coro::promise_base::record_owner_context_restore(); - } - } + if (!handle.done()) handle.resume(); } } @@ -450,28 +433,8 @@ inline void worker_thread::run_task(std::coroutine_handle<> handle) noexcept { } if (!handle || handle.done()) [[unlikely]] return; - - coro::ensure_vthread_owner(handle); - auto* frame = coro::get_promise_base(handle.address()); - void* previous_owner = nullptr; - if (frame) { - previous_owner = coro::promise_base::current_owner(); - coro::promise_base::set_current_owner(frame->vthread_owner()); - frame->mark_started(); - frame->set_worker_id(static_cast(worker_id_)); - frame->set_state(coro::coroutine_state::running); - if (!frame->vthread_owner()) { - coro::promise_base::record_ownerless_resume(); - } - } handle.resume(); - - if (frame) { - coro::promise_base::set_current_owner(previous_owner); - coro::promise_base::record_owner_context_restore(); - } - tasks_executed_.fetch_add(1, std::memory_order_relaxed); update_last_task_time(); diff --git a/tools/elio-gdb.py b/tools/elio-gdb.py old mode 100644 new mode 100755 index 95f0dd8..155fe64 --- a/tools/elio-gdb.py +++ b/tools/elio-gdb.py @@ -32,8 +32,6 @@ 4: "failed" } -EXCEPTION_PTR_SIZE = 8 - def read_atomic(val): """Read value from std::atomic.""" @@ -79,13 +77,6 @@ def read_cstring(addr): addr_val = int(addr) except: return None - - -def promise_to_handle_addr(promise_addr, ptr_size): - """Convert a promise_base* back to the coroutine handle address.""" - if promise_addr == 0: - return 0 - return promise_addr - 2 * ptr_size if addr_val == 0: return None @@ -129,22 +120,14 @@ def get_frame_from_handle(handle_addr): if magic != FRAME_MAGIC: return None - # Read promise_base fields. - # Current layout begins with: - # magic(8) + parent(ptr) + activation_parent(ptr) + vthread_owner(ptr) - # + exception_ptr + debug_location + debug_state + debug_worker_id + debug_id - + # Read promise_base fields + # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) + parent_bytes = inferior.read_memory(promise_addr + 8, ptr_size) parent = int.from_bytes(bytes(parent_bytes), 'little') - - activation_parent_bytes = inferior.read_memory(promise_addr + 8 + ptr_size, ptr_size) - activation_parent = int.from_bytes(bytes(activation_parent_bytes), 'little') - - owner_bytes = inferior.read_memory(promise_addr + 8 + 2 * ptr_size, ptr_size) - owner = int.from_bytes(bytes(owner_bytes), 'little') - # debug_location follows magic + 3 pointers + exception_ptr - loc_offset = 8 + 3 * ptr_size + EXCEPTION_PTR_SIZE + # debug_location starts at offset 8+8+16=32 + loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr file_ptr_bytes = inferior.read_memory(promise_addr + loc_offset, ptr_size) file_ptr = int.from_bytes(bytes(file_ptr_bytes), 'little') @@ -154,8 +137,8 @@ def get_frame_from_handle(handle_addr): line_bytes = inferior.read_memory(promise_addr + loc_offset + 2 * ptr_size, 4) line = int.from_bytes(bytes(line_bytes), 'little') - # debug_location = file(ptr) + function(ptr) + line(u32) + padding(4) - state_offset = loc_offset + 2 * ptr_size + 8 + # state at loc_offset + 24 + state_offset = loc_offset + 2 * ptr_size + 4 state_byte = inferior.read_memory(promise_addr + state_offset, 1) state = int.from_bytes(bytes(state_byte), 'little') @@ -166,18 +149,6 @@ def get_frame_from_handle(handle_addr): # debug_id at worker_id + 4 debug_id_bytes = inferior.read_memory(promise_addr + state_offset + 8, 8) debug_id = int.from_bytes(bytes(debug_id_bytes), 'little') - - affinity_bytes = inferior.read_memory(promise_addr + state_offset + 16, ptr_size) - affinity = int.from_bytes(bytes(affinity_bytes), 'little') - - frame_size_bytes = inferior.read_memory(promise_addr + state_offset + 16 + ptr_size, ptr_size) - frame_size = int.from_bytes(bytes(frame_size_bytes), 'little') - - started_byte = inferior.read_memory(promise_addr + state_offset + 16 + 2 * ptr_size, 1) - started = int.from_bytes(bytes(started_byte), 'little') != 0 - - root_byte = inferior.read_memory(promise_addr + state_offset + 16 + 2 * ptr_size + 1, 1) - is_root = int.from_bytes(bytes(root_byte), 'little') != 0 # Read strings file_str = None @@ -200,15 +171,9 @@ def get_frame_from_handle(handle_addr): "state": COROUTINE_STATES.get(state, "unknown"), "worker_id": worker_id, "parent": parent, - "activation_parent": activation_parent, - "owner": owner, "file": file_str, "function": func_str, "line": line, - "affinity": affinity, - "frame_size": frame_size, - "started": started, - "is_root": is_root, "address": handle_addr, "promise_addr": promise_addr } @@ -220,36 +185,21 @@ def walk_virtual_stack(handle_addr): """Walk the virtual stack from a coroutine handle.""" stack = [] visited = set() - ptr_size = gdb.lookup_type("void").pointer().sizeof info = get_frame_from_handle(handle_addr) if info: - relation = "activation_parent" if info["activation_parent"] != 0 else "parent" - info["edge"] = "self" stack.append(info) - visited.add(info["promise_addr"]) + visited.add(handle_addr) - parent = info[relation] + # Walk parent chain + parent = info["parent"] while parent != 0 and parent not in visited: visited.add(parent) - parent_handle = promise_to_handle_addr(parent, ptr_size) - parent_info = get_frame_from_handle(parent_handle) - if parent_info is None: - stack.append({ - "id": 0, - "address": parent_handle, - "promise_addr": parent, - "state": relation, - "function": None, - "file": None, - "line": 0, - "worker_id": 0xFFFFFFFF, - "edge": relation, - }) - break - parent_info["edge"] = relation - stack.append(parent_info) - parent = parent_info[relation] + # Parent is a promise_base*, need to find the frame address + # This is tricky - for now just note we have a parent + stack.append({"id": 0, "address": parent, "state": "parent", + "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) + break return stack @@ -553,12 +503,6 @@ def invoke(self, arg, from_tty): print(f" Worker: {worker_id}") print(f" Handle: 0x{info['address']:016x}") print(f" Promise: 0x{info['promise_addr']:016x}") - print(f" Owner: 0x{info['owner']:016x}") - print(f" Root: {'yes' if info['is_root'] else 'no'}") - print(f" Started: {'yes' if info['started'] else 'no'}") - print(f" FrameSz: {info['frame_size']}") - print(f" Parent: 0x{info['parent']:016x}") - print(f" ActParent:0x{info['activation_parent']:016x}") if info["function"]: print(f" Function: {info['function']}") @@ -568,8 +512,7 @@ def invoke(self, arg, from_tty): loc += f":{info['line']}" print(f" Location: {loc}") - chain_name = "activation" if info["activation_parent"] != 0 else "construction" - print(f"\n Virtual Call Stack ({chain_name} chain):") + print(f"\n Virtual Call Stack:") stack = walk_virtual_stack(task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" @@ -578,8 +521,7 @@ def invoke(self, arg, from_tty): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - edge = frame.get("edge", "self") - print(f" #{i:<3} [{edge}] {func}{loc}") + print(f" #{i:<3} {func}{loc}") return diff --git a/tools/elio-lldb.py b/tools/elio-lldb.py old mode 100644 new mode 100755 index d26e84a..249acd8 --- a/tools/elio-lldb.py +++ b/tools/elio-lldb.py @@ -6,7 +6,8 @@ It finds coroutine frames by traversing the scheduler's worker queues. Usage: - In LLDB: command script import /path/to/elio-lldb.py + Preferred: command script import /path/to/elio_lldb.py + (This file remains the implementation module.) Commands: elio list - List all vthreads from worker queues @@ -32,8 +33,6 @@ 4: "failed" } -EXCEPTION_PTR_SIZE = 8 - def read_cstring(process, addr): """Read a null-terminated string from memory.""" @@ -78,13 +77,6 @@ def read_pointer(process, addr): return process.ReadUnsignedFromMemory(addr, ptr_size, error) -def promise_to_handle_addr(promise_addr, ptr_size): - """Convert a promise_base* back to a coroutine handle address.""" - if promise_addr == 0: - return 0 - return promise_addr - 2 * ptr_size - - def get_scheduler(target, process): """Find the current scheduler.""" # Try to find scheduler::current() @@ -137,21 +129,19 @@ def get_frame_from_handle(process, handle_addr): if magic != FRAME_MAGIC: return None - # Read promise_base fields. - # Current layout begins with: - # magic(8) + parent(ptr) + activation_parent(ptr) + vthread_owner(ptr) - # + exception_ptr + debug_location + debug_state + debug_worker_id + debug_id - + # Read promise_base fields + # Layout: magic(8) + parent(8) + exception(16) + debug_location(24) + state(1) + pad(3) + worker_id(4) + debug_id(8) + parent = read_pointer(process, promise_addr + 8) - activation_parent = read_pointer(process, promise_addr + 8 + ptr_size) - owner = read_pointer(process, promise_addr + 8 + 2 * ptr_size) - loc_offset = 8 + 3 * ptr_size + EXCEPTION_PTR_SIZE + # debug_location starts at offset 8+8+16=32 + loc_offset = 8 + ptr_size + 16 # magic + parent + exception_ptr file_ptr = read_pointer(process, promise_addr + loc_offset) func_ptr = read_pointer(process, promise_addr + loc_offset + ptr_size) line = read_uint32(process, promise_addr + loc_offset + 2 * ptr_size) - state_offset = loc_offset + 2 * ptr_size + 8 + # state at loc_offset + 24 + state_offset = loc_offset + 2 * ptr_size + 4 state = read_uint8(process, promise_addr + state_offset) # worker_id at state_offset + 4 (after 3 bytes padding) @@ -159,26 +149,15 @@ def get_frame_from_handle(process, handle_addr): # debug_id at worker_id + 4 debug_id = read_uint64(process, promise_addr + state_offset + 8) - - affinity = read_pointer(process, promise_addr + state_offset + 16) - frame_size = read_pointer(process, promise_addr + state_offset + 16 + ptr_size) - started = read_uint8(process, promise_addr + state_offset + 16 + 2 * ptr_size) != 0 - is_root = read_uint8(process, promise_addr + state_offset + 16 + 2 * ptr_size + 1) != 0 return { "id": debug_id, "state": COROUTINE_STATES.get(state, "unknown"), "worker_id": worker_id, "parent": parent, - "activation_parent": activation_parent, - "owner": owner, "file": read_cstring(process, file_ptr), "function": read_cstring(process, func_ptr), "line": line, - "affinity": affinity, - "frame_size": frame_size, - "started": started, - "is_root": is_root, "address": handle_addr, "promise_addr": promise_addr } @@ -193,33 +172,18 @@ def walk_virtual_stack(process, handle_addr): info = get_frame_from_handle(process, handle_addr) if info: - ptr_size = process.GetAddressByteSize() - relation = "activation_parent" if info["activation_parent"] != 0 else "parent" - info["edge"] = "self" stack.append(info) - visited.add(info["promise_addr"]) + visited.add(handle_addr) - parent = info[relation] + # Walk parent chain + parent = info["parent"] while parent != 0 and parent not in visited: visited.add(parent) - parent_handle = promise_to_handle_addr(parent, ptr_size) - parent_info = get_frame_from_handle(process, parent_handle) - if parent_info is None: - stack.append({ - "id": 0, - "address": parent_handle, - "promise_addr": parent, - "state": relation, - "function": None, - "file": None, - "line": 0, - "worker_id": 0xFFFFFFFF, - "edge": relation, - }) - break - parent_info["edge"] = relation - stack.append(parent_info) - parent = parent_info[relation] + # Parent is a promise_base*, need to find the frame address + # For now just note we have a parent + stack.append({"id": 0, "address": parent, "state": "parent", + "function": None, "file": None, "line": 0, "worker_id": 0xFFFFFFFF}) + break return stack @@ -506,12 +470,6 @@ def elio_info(debugger, command, result, internal_dict): result.AppendMessage(f" Worker: {worker}") result.AppendMessage(f" Handle: 0x{info['address']:016x}") result.AppendMessage(f" Promise: 0x{info['promise_addr']:016x}") - result.AppendMessage(f" Owner: 0x{info['owner']:016x}") - result.AppendMessage(f" Root: {'yes' if info['is_root'] else 'no'}") - result.AppendMessage(f" Started: {'yes' if info['started'] else 'no'}") - result.AppendMessage(f" FrameSz: {info['frame_size']}") - result.AppendMessage(f" Parent: 0x{info['parent']:016x}") - result.AppendMessage(f" ActParent:0x{info['activation_parent']:016x}") if info["function"]: result.AppendMessage(f" Function: {info['function']}") @@ -521,8 +479,7 @@ def elio_info(debugger, command, result, internal_dict): loc += f":{info['line']}" result.AppendMessage(f" Location: {loc}") - chain_name = "activation" if info["activation_parent"] != 0 else "construction" - result.AppendMessage(f"\n Virtual Call Stack ({chain_name} chain):") + result.AppendMessage(f"\n Virtual Call Stack:") stack = walk_virtual_stack(process, task_addr) for i, frame in enumerate(stack): func = frame["function"] or "" @@ -531,8 +488,7 @@ def elio_info(debugger, command, result, internal_dict): loc = f" at {frame['file']}" if frame["line"] > 0: loc += f":{frame['line']}" - edge = frame.get("edge", "self") - result.AppendMessage(f" #{i:<3} [{edge}] {func}{loc}") + result.AppendMessage(f" #{i:<3} {func}{loc}") return diff --git a/wiki/API-Reference.md b/wiki/API-Reference.md index 07b1fe1..a4cc311 100644 --- a/wiki/API-Reference.md +++ b/wiki/API-Reference.md @@ -74,12 +74,6 @@ int b = co_await h2; int c = co_await h3; ``` -**Semantics Notes:** - -- Direct `co_await child` usually keeps `child` in the caller's current vthread and does not require relocation. -- `spawn()` and `go()` always establish a new vthread root. The root frame is attached to a fresh owner domain before first execution. -- Work stealing may move execution to another worker thread, but does not by itself change frame ownership. - ### `join_handle` Handle for awaiting spawned tasks. Returned by `task::spawn()`. diff --git a/wiki/Core-Concepts.md b/wiki/Core-Concepts.md index 3d6ae1c..692469f 100644 --- a/wiki/Core-Concepts.md +++ b/wiki/Core-Concepts.md @@ -244,38 +244,20 @@ All three functions are also available in the `elio` namespace as convenience al **Why MPSC inbox for external submissions.** Cross-thread task submissions (e.g., spawning a task onto a specific worker from another thread) go through a bounded MPSC ring buffer rather than directly into the Chase-Lev deque. This separation keeps the deque's invariants simple -- only the owner ever pushes -- and the bounded capacity with cache-line aligned slots (`alignas(64)`) eliminates false sharing between producers and the consumer. -## Virtual Stack And VThread Ownership +## Virtual Stack C++20 stackless coroutines do not maintain a call stack in the traditional sense. When a coroutine suspends, the compiler-generated frame is stored on the heap, but the chain of callers that led to that suspension point is lost. This makes debugging difficult -- tools like `gdb bt` show the scheduler's dispatch loop rather than the logical call chain of coroutines. -Elio now tracks three related but different concepts: +Elio reconstructs this information through a **virtual stack**: an intrusive linked list of `promise_base` objects connected by `parent_` pointers. Each `promise_base` constructor links itself to the current frame via the `current_frame_` thread-local, and the destructor restores the previous frame. This gives every coroutine a pointer to the coroutine that `co_await`ed it. -- **Construction parent**: recorded in `parent_` when the promise is constructed from `current_frame_`. This is cheap and still useful for debugging, but it reflects construction nesting, not necessarily the true await chain. -- **Activation parent**: recorded when a cold coroutine is first activated via direct `co_await` or by a detached/joinable spawn boundary. This reflects the logical runtime relationship more accurately than construction order. -- **Vthread owner**: the ownership domain that is responsible for the frame's memory and for propagating vthread-local execution context across suspension and resumption. - -This distinction matters because construction order and first execution order can differ. In an expression such as `co_await bar(foo())`, `foo()` is constructed before `bar(...)`, but `bar` executes first and only later activates `foo`. A single parent pointer is therefore not enough to describe both debug lineage and runtime ownership. - -### Attachment And Relocation Rules - -- **Direct `co_await child`**: the common fast path. If the child is first activated inside the caller's current vthread, no relocation is needed. -- **`spawn()` / `go()`**: these create a new vthread boundary. The spawned root frame is cold-relocated exactly once into a fresh owner domain before first resume. -- **Work stealing**: no relocation is needed. A suspended leaf may migrate to another worker, but frame ownership remains unchanged; only the executing worker changes. - -The overhead remains small: metadata is still stored in `promise_base`, while owner-backed roots use segmented storage and cold-frame relocation only when ownership boundaries require it. +The overhead is minimal -- one pointer per coroutine frame, set during construction and cleared during destruction. ### What it enables - **`elio-pstack`**: A CLI tool that attaches to a running process (or reads a coredump) and walks the virtual stack chains to print coroutine backtraces, similar to `pstack` for threads. -- **Debugger extensions**: `elio-gdb.py` and the LLDB entrypoint `elio_lldb.py` (loading `elio-lldb.py`) use the same frame linkage to implement `elio bt` (backtrace) and `elio list` (list active coroutines). +- **Debugger extensions**: `elio-gdb.py` and `elio_lldb.py` use the same frame linkage to implement `elio bt` (backtrace) and `elio list` (list active coroutines). - **Exception propagation**: When a coroutine throws, `unhandled_exception()` captures it in the promise. The parent coroutine can then rethrow the exception when it `co_await`s the child's result, propagating errors up the logical call chain. -### Current limits - -- Construction-parent traversal is still the most stable always-available chain for debugger tooling. -- Activation-parent and vthread-owner metadata are the runtime-accurate semantics for ownership and first activation. -- A nested direct-`co_await` expression may still have different construction order and execution order; this is expected and documented behavior. - ### Frame metadata Each `promise_base` also carries debug metadata: diff --git a/wiki/Debugging.md b/wiki/Debugging.md index e60e584..a822884 100644 --- a/wiki/Debugging.md +++ b/wiki/Debugging.md @@ -9,35 +9,23 @@ Elio coroutines maintain debug metadata in each frame: - State (created, running, suspended, completed, failed) - Source location (file, function, line) - Worker thread assignment -- Construction parent, activation parent, and vthread ownership metadata +- Parent pointer for virtual stack traversal The debugger extensions find coroutine frames by traversing the scheduler's worker queues (Chase-Lev deque and MPSC inbox). This approach has **zero runtime overhead** - no global registry or synchronization is required. -Important limitation: today these tools primarily see **queued** coroutines. A coroutine that is currently running on a worker thread is not guaranteed to appear until it suspends or is re-enqueued. - ## Virtual Stack -C++20 stackless coroutines allocate each frame independently on the heap. When a coroutine suspends, the native call stack unwinds completely, so traditional stack traces cannot show the logical call chain. Elio reconstructs this information through an intrusive virtual-stack and vthread metadata model built into every coroutine frame. +C++20 stackless coroutines allocate each frame independently on the heap. When a coroutine suspends, the native call stack unwinds completely, so traditional stack traces cannot show the logical call chain. Elio reconstructs this information through an intrusive virtual stack built into every coroutine frame. ### How It Works -Each coroutine's promise type inherits from `promise_base`, which now carries multiple relationships: - -- `parent_`: the construction-time parent captured from `current_frame_` -- `activation_parent_`: the coroutine that first activates a cold child -- `vthread_owner_`: the owner domain responsible for memory ownership and resume context - -`current_frame_` is still used to capture low-cost construction lineage. Runtime execution, however, is determined later: +Each coroutine's promise type inherits from `promise_base`, which contains a `parent_` pointer. When coroutine A `co_await`s coroutine B, B's promise stores a pointer back to A's promise. This forms a singly-linked list from the innermost frame to the outermost caller, mirroring what a native call stack would look like if the coroutines were regular functions. -- direct `co_await` binds activation-parent metadata when the child is first awaited -- `spawn()` and `go()` create a fresh vthread owner and cold-relocate the spawned root before first resume -- work stealing resumes an already-owned suspended leaf on a different worker without changing frame ownership - -This means the debugger-visible construction chain and the runtime ownership chain are related, but not identical. In particular, nested expressions such as `co_await bar(foo())` may have construction order different from first execution order. +The thread-local `current_frame_` tracks which frame is currently executing. When a new coroutine starts, it reads `current_frame_` to set its `parent_`, then installs itself as the new `current_frame_`. On completion or suspension, the previous frame is restored. ### Frame Validation -Each `promise_base` contains a `frame_magic_` field set to `0x454C494F46524D45` (the ASCII string "ELIOFRME"). The debugger tools check this magic value when traversing memory to distinguish valid Elio coroutine frames from arbitrary data. This is especially important during coredump analysis, where the debugger walks raw memory without type information. +Each `promise_base` contains a `frame_magic_` field set to `0x454C494F46524D45` (the ASCII string "ELIOFRMR"). The debugger tools check this magic value when traversing memory to distinguish valid Elio coroutine frames from arbitrary data. This is especially important during coredump analysis, where the debugger walks raw memory without type information. ### Debug Metadata @@ -45,16 +33,14 @@ Every frame carries the following debug metadata with no additional allocation: | Field | Description | |-------|-------------| -| `debug_id_` | Unique monotonic identifier assigned on demand | -| `debug_state_` | Current state: created, running, suspended, completed, or failed | -| `debug_worker_id_` | Index of the worker thread the frame is assigned to (or -1 if unassigned) | -| `debug_location_` | Source file, function name, and line number | -| `parent_` | Construction-time parent pointer used for low-overhead lineage/debug traversal | -| `activation_parent_` | Runtime first-activation parent for direct await / detached activation semantics | -| `vthread_owner_` | Owner domain used for frame ownership and vthread context restoration | +| `id_` | Unique monotonic identifier assigned at creation | +| `state_` | Current state: created, running, suspended, completed, or failed | +| `worker_id_` | Index of the worker thread the frame is assigned to (or -1 if unassigned) | +| `file_`, `function_`, `line_` | Source location captured via `std::source_location` or manual `set_location()` | +| `parent_` | Pointer to the calling frame's promise, forming the virtual stack chain | | `frame_magic_` | Magic number for frame integrity validation | -The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio_lldb.py`) use this metadata to present coroutine state in a format familiar to anyone who has used `pstack` or `thread apply all bt`. They now prefer the activation-parent chain when it is present, and fall back to the construction-parent chain otherwise. Owner metadata is shown separately in `elio info` style output. +The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio_lldb.py`) use this metadata to present coroutine state in a format familiar to anyone who has used `pstack` or `thread apply all bt`. ## Tools @@ -62,7 +48,7 @@ The debugger tools (`elio-pstack`, `elio-gdb.py`, `elio_lldb.py`) use this metad |------|-------------| | `elio-pstack` | Command-line tool similar to `pstack` | | `elio-gdb.py` | GDB Python extension | -| `elio_lldb.py` | LLDB Python entrypoint | +| `elio_lldb.py` | LLDB import entrypoint (loads `elio-lldb.py`) | ## elio-pstack @@ -160,18 +146,12 @@ vthread #1 Worker: 0 Handle: 0x00007f1234567890 Promise: 0x00007f12345678a0 - Owner: 0x00007f1234500000 - Root: no - Started: yes - FrameSz: 160 - Parent: 0x00007f1234567000 - ActParent:0x00007f1234567000 Function: worker_task Location: debug_test.cpp:84 - Virtual Call Stack (activation chain): - #0 [self] worker_task at debug_test.cpp:84 - #1 [activation_parent] async_main at debug_test.cpp:112 + Virtual Call Stack: + #0 worker_task at debug_test.cpp:84 + #1 async_main at debug_test.cpp:112 (gdb) elio workers Scheduler: running @@ -219,8 +199,6 @@ The LLDB extension provides the same commands as GDB: | `elio workers` | Show worker thread information | | `elio stats` | Show scheduler statistics | -`elio info` in both GDB and LLDB now prints owner/root/started metadata in addition to source location and queue worker information. - ## Setting Debug Location For more accurate debugging information, you can manually set the debug location in your coroutines: diff --git a/wiki/Getting-Started.md b/wiki/Getting-Started.md index db3209a..7ec3076 100644 --- a/wiki/Getting-Started.md +++ b/wiki/Getting-Started.md @@ -204,8 +204,8 @@ include/elio/ ├── elio.hpp # Main include ├── coro/ # Coroutine primitives │ ├── task.hpp # task -│ ├── promise_base.hpp # Vthread metadata base -│ ├── frame.hpp # Frame and owner introspection +│ ├── promise_base.hpp # Virtual stack base +│ ├── frame.hpp # Stack introspection │ ├── frame_allocator.hpp # Frame memory pool │ ├── cancel_token.hpp # Cooperative cancellation │ └── awaitable_base.hpp # Awaitable interface @@ -257,18 +257,6 @@ include/elio/ The repository also contains `examples/` with runnable programs, `tests/` with Catch2 tests, and `tools/` with debugging utilities (`elio-pstack`, GDB/LLDB extensions). -## VThread Model At A Glance - -Elio's coroutine runtime distinguishes three relationships that are easy to conflate if you only think in terms of a single parent pointer: - -- **Construction parent**: captured when the promise is created -- **Activation parent**: captured when a cold coroutine is first activated -- **Vthread owner**: the owner domain responsible for frame memory and resume context - -In the common direct-`co_await` case, these relationships often line up and no relocation is needed. In `spawn()` and `go()` cases, however, Elio creates a new vthread boundary and moves the cold root frame into a fresh owner domain before first execution. - -The debugging tools expose this distinction explicitly: `elio info` reports owner/root/parent metadata, while stack-style output prefers the activation-parent chain when it is available. - ## Next Steps - Read [[Core Concepts]] to understand how Elio works diff --git a/wiki/Home.md b/wiki/Home.md index 9b70b1c..6582d3d 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -60,7 +60,7 @@ Elio is built around a few key technical decisions: - **Linux-native**: Deep integration with io_uring and signalfd enables optimal performance on modern Linux kernels. epoll provides a fallback for older systems. - **Per-worker I/O**: Each scheduler thread owns its I/O backend (io_uring or epoll), eliminating I/O-related locking entirely. Cross-thread communication uses lock-free MPSC queues. - **Work-stealing**: The Chase-Lev deque provides lock-free local operations with a global load balancing fallback. Tasks with thread affinity are respected during stealing. -- **Extended vthread tracking**: Elio distinguishes construction-time parentage, first-activation parentage, and vthread ownership. This preserves low-overhead virtual stack debugging while making `co_await`, `spawn()`, `go()`, and work stealing semantics explicit. +- **Virtual stack tracking**: C++20 stackless coroutines lose stack information at suspension points. Elio's intrusive virtual stack enables production debugging via `elio-pstack` and GDB/LLDB extensions. ## Wiki Contents diff --git a/wiki/Networking.md b/wiki/Networking.md index ef64172..4b7c2fb 100644 --- a/wiki/Networking.md +++ b/wiki/Networking.md @@ -72,14 +72,8 @@ coro::task server(uint16_t port) { ```cpp coro::task client(const std::string& host, uint16_t port) { - // Resolve host to concrete address, then connect - auto resolved = co_await resolve_hostname(host, port); - if (!resolved) { - ELIO_LOG_ERROR("Resolve failed: {}", strerror(errno)); - co_return; - } - - auto stream = co_await tcp_connect(*resolved); + // Connect to server (hostname is resolved automatically) + auto stream = co_await tcp_connect(ipv4_address(host, port)); if (!stream) { ELIO_LOG_ERROR("Connect failed: {}", strerror(errno)); co_return; @@ -103,33 +97,26 @@ coro::task client(const std::string& host, uint16_t port) { Elio provides three address types for TCP networking: `ipv4_address`, `ipv6_address`, and `socket_address` (a variant wrapper that holds either). -These types are value objects for already-parsed socket addresses. Their string constructors accept numeric IP literals only and do not perform DNS. For hostname resolution, use `co_await resolve_hostname(host, port)` (single best address) or `co_await resolve_all(host, port)` (all candidate addresses), then call `tcp_connect()` with a concrete address. - ```cpp // IPv4 address with port ipv4_address addr1(8080); // 0.0.0.0:8080 ipv4_address addr2("192.168.1.1", 8080); // 192.168.1.1:8080 +ipv4_address addr3("example.com", 80); // DNS resolved // IPv6 address with port ipv6_address addr4(8080); // [::]:8080 ipv6_address addr5("::1", 8080); // [::1]:8080 ipv6_address addr6("fe80::1%eth0", 8080); // Link-local with scope ID +ipv6_address addr7("example.com", 443); // DNS resolved (AAAA) // Generic socket_address (variant of ipv4_address | ipv6_address) socket_address sa1(ipv4_address(8080)); // From IPv4 socket_address sa2(ipv6_address("::1", 8080)); // From IPv6 -socket_address sa3("127.0.0.1", 443); // From literal IPv4 - -// Explicit async hostname resolution -auto resolved = co_await resolve_hostname("example.com", 443); -if (resolved && resolved->is_v6()) { - const auto& v6 = resolved->as_v6(); - ELIO_LOG_INFO("IPv6: {}", v6.to_string()); -} +socket_address sa3("example.com", 443); // Auto-detects v4/v6 // Inspect address type -if (sa2.is_v6()) { - const auto& v6 = sa2.as_v6(); +if (sa3.is_v6()) { + const auto& v6 = sa3.as_v6(); ELIO_LOG_INFO("IPv6: {}", v6.to_string()); } @@ -350,40 +337,6 @@ coro::task advanced_client() { } ``` -### Hostname Resolve Configuration (HTTP/HTTP2/WS/SSE) - -All HTTP-family clients now expose DNS resolve/cache behavior via client config. - -```cpp -#include - -// HTTP/1.1 -http::client_config http_cfg; -http_cfg.resolve_options.use_cache = true; -http_cfg.resolve_options.positive_ttl = std::chrono::seconds(30); -http_cfg.resolve_options.negative_ttl = std::chrono::seconds(2); -http_cfg.rotate_resolved_addresses = true; // round-robin start address -http::client http_client(http_cfg); - -// HTTP/2 -http::h2_client_config h2_cfg; -h2_cfg.resolve_options = net::default_cached_resolve_options(); -h2_cfg.rotate_resolved_addresses = true; -http::h2_client h2_client(h2_cfg); - -// WebSocket -http::websocket::client_config ws_cfg; -ws_cfg.resolve_options.use_cache = false; // always resolve fresh -ws_cfg.rotate_resolved_addresses = false; // deterministic order -http::websocket::ws_client ws(ws_cfg); - -// SSE -http::sse::client_config sse_cfg; -sse_cfg.resolve_options.use_cache = true; -sse_cfg.resolve_options.positive_ttl = std::chrono::seconds(10); -http::sse::sse_client sse(sse_cfg); -``` - ### HTTP Server ```cpp @@ -514,11 +467,8 @@ coro::task secure_connection() { tls_ctx.use_default_verify_paths(); tls_ctx.set_verify_mode(true); - // Resolve hostname, then connect using a concrete socket address - auto resolved = co_await resolve_hostname("example.com", 443); - if (!resolved) co_return; - - auto tcp = co_await tcp_connect(*resolved); + // Connect TCP + auto tcp = co_await tcp_connect(ipv4_address("example.com", 443)); if (!tcp) co_return; // Wrap with TLS diff --git a/wiki/RPC-Framework.md b/wiki/RPC-Framework.md index 482fa8c..728e53b 100644 --- a/wiki/RPC-Framework.md +++ b/wiki/RPC-Framework.md @@ -102,8 +102,10 @@ coro::task run_server(uint16_t port) { ```cpp coro::task run_client(const char* host, uint16_t port) { + auto& ctx = io::default_io_context(); + // Connect to server - auto client = co_await tcp_rpc_client::connect(host, port); + auto client = co_await tcp_rpc_client::connect(ctx, host, port); if (!client) { ELIO_LOG_ERROR("Failed to connect"); co_return; @@ -122,30 +124,6 @@ coro::task run_client(const char* host, uint16_t port) { } ``` -### Client Resolve/Cache Configuration - -```cpp -#include - -coro::task run_client_with_resolver(std::string_view host, uint16_t port) { - net::resolve_options opts = net::default_cached_resolve_options(); - opts.positive_ttl = std::chrono::seconds(30); - opts.negative_ttl = std::chrono::seconds(2); - - auto client = co_await tcp_rpc_client::connect(host, port, opts); - if (!client) { - ELIO_LOG_ERROR("Failed to connect"); - co_return; - } - - GetUserRequest req{42}; - auto result = co_await (*client)->call(req, std::chrono::seconds(5)); - if (!result.ok()) { - ELIO_LOG_ERROR("RPC failed: {}", result.error_message()); - } -} -``` - ## Supported Types ### Primitive Types diff --git a/wiki/WebSocket-SSE.md b/wiki/WebSocket-SSE.md index a08f2e5..b65dc77 100644 --- a/wiki/WebSocket-SSE.md +++ b/wiki/WebSocket-SSE.md @@ -109,21 +109,6 @@ coro::task connect_example() { } ``` -### WebSocket Resolve/Cache Configuration - -```cpp -#include - -websocket::client_config cfg; -cfg.resolve_options = net::default_cached_resolve_options(); -cfg.resolve_options.positive_ttl = std::chrono::seconds(20); -cfg.resolve_options.negative_ttl = std::chrono::seconds(3); -cfg.rotate_resolved_addresses = true; - -websocket::ws_client client(cfg); -co_await client.connect("wss://example.com/ws"); -``` - ### WebSocket Frame Types | Opcode | Name | Description | @@ -213,15 +198,14 @@ using namespace elio; using namespace elio::http::sse; coro::task listen_events() { + auto& ctx = io::default_io_context(); + // Configure client client_config config; config.auto_reconnect = true; config.default_retry_ms = 3000; - config.resolve_options.use_cache = true; - config.resolve_options.positive_ttl = std::chrono::seconds(15); - config.rotate_resolved_addresses = true; - sse_client client(config); + sse_client client(ctx, config); // Connect if (!co_await client.connect("http://localhost:8080/events")) { @@ -242,19 +226,6 @@ coro::task listen_events() { } ``` -### SSE Resolve/Cache Configuration - -```cpp -#include - -sse::client_config cfg; -cfg.resolve_options.use_cache = false; // disable cache for dynamic DNS targets -cfg.rotate_resolved_addresses = false; // always try first resolved address first - -sse::sse_client client(cfg); -co_await client.connect("https://example.com/events"); -``` - ### SSE Event Format SSE events are formatted as text with specific fields: From 09699aab0ed76a902ce4962cc4aa317d597d9a48 Mon Sep 17 00:00:00 2001 From: Coldwings Date: Thu, 19 Mar 2026 12:35:58 +0800 Subject: [PATCH 4/4] minor fix --- include/elio/runtime/scheduler.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/elio/runtime/scheduler.hpp b/include/elio/runtime/scheduler.hpp index b060c69..9208c7a 100644 --- a/include/elio/runtime/scheduler.hpp +++ b/include/elio/runtime/scheduler.hpp @@ -9,7 +9,6 @@ #include #include #include -#include namespace elio::runtime {