From a23acdc022512c218d7e8959588488bb33347b4a Mon Sep 17 00:00:00 2001 From: Coldwings Date: Thu, 26 Mar 2026 14:19:24 +0800 Subject: [PATCH] Huge change for vthread-stack --- examples/async_file_io.cpp | 12 +- examples/autoscaler_example.cpp | 4 +- examples/benchmark.cpp | 90 +-- examples/debug_test.cpp | 42 +- examples/dynamic_threads.cpp | 3 +- examples/http_server.cpp | 4 +- examples/io_benchmark.cpp | 5 +- examples/microbench.cpp | 42 +- examples/parallel_tasks.cpp | 3 +- examples/quick_benchmark.cpp | 9 +- examples/rpc_client_example.cpp | 6 +- examples/rpc_server_example.cpp | 6 +- examples/scalability_test.cpp | 3 +- examples/signal_handling.cpp | 9 +- examples/sse_server.cpp | 7 +- examples/tcp_echo_client.cpp | 3 +- examples/tcp_echo_server.cpp | 11 +- examples/thread_affinity.cpp | 6 +- examples/uds_echo_client.cpp | 3 +- examples/uds_echo_server.cpp | 11 +- examples/websocket_server.cpp | 2 +- include/elio/coro/frame_allocator.hpp | 329 ----------- include/elio/coro/promise_base.hpp | 71 ++- include/elio/coro/task.hpp | 208 +++---- include/elio/coro/task_handle.hpp | 119 ++-- include/elio/coro/vthread_stack.hpp | 171 ++++++ include/elio/elio.hpp | 4 + include/elio/http/http_server.hpp | 10 +- include/elio/http/websocket_server.hpp | 10 +- include/elio/net/resolve.hpp | 197 +++---- include/elio/rpc/rpc_client.hpp | 64 +-- include/elio/rpc/rpc_server.hpp | 6 +- include/elio/runtime/async_main.hpp | 90 ++- include/elio/runtime/blocking_pool.hpp | 89 +++ include/elio/runtime/scheduler.hpp | 148 ++++- include/elio/runtime/serve.hpp | 50 +- include/elio/runtime/spawn.hpp | 95 +++ include/elio/runtime/spawn_blocking.hpp | 107 ++++ include/elio/sync/primitives.hpp | 26 +- tests/CMakeLists.txt | 1 + tests/integration/test_dynamic_threads.cpp | 39 +- .../test_exception_propagation.cpp | 24 +- tests/integration/test_parallel_tasks.cpp | 24 +- .../test_scheduler_integration.cpp | 18 +- tests/unit/test_affinity.cpp | 54 +- tests/unit/test_awaitable_base.cpp | 30 +- tests/unit/test_frame_allocator.cpp | 93 --- tests/unit/test_io.cpp | 132 ++--- tests/unit/test_scheduler.cpp | 26 +- tests/unit/test_signalfd.cpp | 16 +- tests/unit/test_sync.cpp | 109 ++-- tests/unit/test_task.cpp | 168 +++--- tests/unit/test_timer.cpp | 36 +- tests/unit/test_vthread_stack.cpp | 543 ++++++++++++++++++ 54 files changed, 2063 insertions(+), 1325 deletions(-) delete mode 100644 include/elio/coro/frame_allocator.hpp create mode 100644 include/elio/coro/vthread_stack.hpp create mode 100644 include/elio/runtime/blocking_pool.hpp create mode 100644 include/elio/runtime/spawn.hpp create mode 100644 include/elio/runtime/spawn_blocking.hpp delete mode 100644 tests/unit/test_frame_allocator.cpp create mode 100644 tests/unit/test_vthread_stack.cpp diff --git a/examples/async_file_io.cpp b/examples/async_file_io.cpp index e797300..9efd73b 100644 --- a/examples/async_file_io.cpp +++ b/examples/async_file_io.cpp @@ -270,8 +270,10 @@ int main(int argc, char* argv[]) { done = true; }; + coro::detail::heap_alloc_guard guard; auto t = run(); - sched.spawn(t.release()); + auto handle = coro::detail::task_access::release(t); + sched.spawn(handle); } else if (mode == "--read") { std::vector files; for (int i = 2; i < argc; ++i) { @@ -283,8 +285,10 @@ int main(int argc, char* argv[]) { done = true; }; + coro::detail::heap_alloc_guard guard; auto t = run(); - sched.spawn(t.release()); + auto handle = coro::detail::task_access::release(t); + sched.spawn(handle); } else if (argc >= 3) { // File copy mode std::string src = argv[1]; @@ -296,8 +300,10 @@ int main(int argc, char* argv[]) { done = true; }; + coro::detail::heap_alloc_guard guard; auto t = run(); - sched.spawn(t.release()); + auto handle = coro::detail::task_access::release(t); + sched.spawn(handle); } else { std::cerr << "Invalid arguments" << std::endl; return 1; diff --git a/examples/autoscaler_example.cpp b/examples/autoscaler_example.cpp index 26208cb..0e3e0a9 100644 --- a/examples/autoscaler_example.cpp +++ b/examples/autoscaler_example.cpp @@ -51,7 +51,7 @@ int main() { // Submit heavy workload for (int i = 0; i < 2000; ++i) { - sched.spawn(workload_task(completed).release()); + sched.go([&completed]() { return workload_task(completed); }); } std::cout << "Phase 1: High load - expecting scale-up..." << std::endl; @@ -80,7 +80,7 @@ int main() { // Submit even heavier workload for (int i = 0; i < 3000; ++i) { - sched.spawn(workload_task(completed2).release()); + sched.go([&completed2]() { return workload_task(completed2); }); } std::cout << "Phase 2: Higher load - expecting more scale-up..." << std::endl; diff --git a/examples/benchmark.cpp b/examples/benchmark.cpp index 323fc64..687b731 100644 --- a/examples/benchmark.cpp +++ b/examples/benchmark.cpp @@ -70,16 +70,22 @@ void benchmark_spawn_overhead() { while (duration_cast(high_resolution_clock::now() - bench_start) < MIN_BENCH_DURATION) { runtime::scheduler sched(4); sched.start(); - + + std::atomic completed(0); + auto batch_start = high_resolution_clock::now(); - + + auto taskdef = [&completed]() -> coro::task { + completed.fetch_add(1, std::memory_order_release); + co_return; + }; + for (int i = 0; i < batch_size; ++i) { - auto t = empty_task(); - sched.spawn(t.release()); + sched.go(taskdef); } - + // Wait for all to complete - while (sched.pending_tasks() > 0) { + while (completed.load(std::memory_order_acquire) < batch_size) { std::this_thread::sleep_for(microseconds(1)); } @@ -127,24 +133,23 @@ void benchmark_context_switch() { runtime::scheduler sched(4); sched.start(); - std::atomic completed{0}; - - auto task_with_await = [&]() -> coro::task { - for (int i = 0; i < awaits_per_task; ++i) { - int value = co_await compute_task(i); + std::atomic completed(0); + + auto taskdef = [&completed]() -> coro::task { + for (int j = 0; j < awaits_per_task; ++j) { + int value = co_await compute_task(j); (void)value; } completed.fetch_add(1, std::memory_order_relaxed); co_return; }; - + auto batch_start = high_resolution_clock::now(); - + for (int i = 0; i < batch_size; ++i) { - auto t = task_with_await(); - sched.spawn(t.release()); + sched.go(taskdef); } - + while (completed.load(std::memory_order_relaxed) < batch_size) { std::this_thread::sleep_for(microseconds(1)); } @@ -199,12 +204,11 @@ void benchmark_yield() { runtime::scheduler sched(1); // Single worker thread sched.start(); - std::atomic completed{0}; - std::atomic end_time_ns{0}; // Last task records end timestamp - - // Each vthread yields multiple times - auto yield_task = [&]() -> coro::task { - for (int i = 0; i < yields_per_vthread; ++i) { + std::atomic completed(0); + std::atomic end_time_ns(0); // Last task records end timestamp + + auto taskdef = [&completed, &end_time_ns, num_vthreads]() -> coro::task { + for (int j = 0; j < yields_per_vthread; ++j) { co_await time::yield(); } // Last task to complete records the end timestamp @@ -215,17 +219,17 @@ void benchmark_yield() { } co_return; }; - + + // Capture start time in main thread auto start_time_ns = duration_cast( steady_clock::now().time_since_epoch()).count(); - + // Spawn all vthreads for (int i = 0; i < num_vthreads; ++i) { - auto t = yield_task(); - sched.spawn(t.release()); + sched.go(taskdef); } - + // Wait for end_time_ns to be set (spin-wait for accuracy) while (end_time_ns.load(std::memory_order_acquire) == 0) { // Spin without yielding for accurate measurement @@ -272,32 +276,31 @@ void benchmark_work_stealing() { runtime::scheduler sched(4); sched.start(); - std::atomic completed{0}; - + std::atomic completed(0); + // Record initial per-worker task counts std::vector initial_counts(4); for (size_t i = 0; i < 4; ++i) { initial_counts[i] = sched.worker_tasks_executed(i); } - - auto heavy_task = [&]() -> coro::task { + + auto taskdef = [&completed]() -> coro::task { volatile int sum = 0; - for (int i = 0; i < 10000; ++i) { - sum = sum + i * i; + for (int j = 0; j < 10000; ++j) { + sum = sum + j * j; } (void)sum; completed.fetch_add(1, std::memory_order_relaxed); co_return; }; - + auto batch_start = high_resolution_clock::now(); - + // Spawn ALL tasks to worker 0 to test work stealing for (int i = 0; i < batch_size; ++i) { - auto t = heavy_task(); - sched.spawn_to(0, t.release()); + sched.go_to(0, taskdef); } - + while (completed.load(std::memory_order_relaxed) < batch_size) { std::this_thread::sleep_for(microseconds(1)); } @@ -364,13 +367,13 @@ void benchmark_scalability() { runtime::scheduler sched(num_threads); sched.start(); - std::atomic completed{0}; + std::atomic completed(0); - auto task_func = [&]() -> coro::task { + auto taskdef = [&completed]() -> coro::task { // Larger CPU-bound work to minimize scheduling overhead ratio volatile int sum = 0; - for (int i = 0; i < work_iterations; ++i) { - sum = sum + i * i; + for (int j = 0; j < work_iterations; ++j) { + sum = sum + j * j; } (void)sum; completed.fetch_add(1, std::memory_order_relaxed); @@ -381,8 +384,7 @@ void benchmark_scalability() { // Distribute tasks evenly across workers for true parallel scaling test for (int i = 0; i < batch_size; ++i) { - auto t = task_func(); - sched.spawn(t.release()); // Round-robin distribution + sched.go(taskdef); } while (completed.load(std::memory_order_relaxed) < batch_size) { diff --git a/examples/debug_test.cpp b/examples/debug_test.cpp index 8b9fdde..a3b9810 100644 --- a/examples/debug_test.cpp +++ b/examples/debug_test.cpp @@ -45,7 +45,7 @@ coro::task signal_handler_task() { } while(0) // Helper awaitable to get promise reference -namespace detail { +namespace debug_detail { struct get_promise { bool await_ready() const noexcept { return false; } @@ -66,7 +66,7 @@ struct get_promise { // Level 3: Leaf coroutine that does some work coro::task compute_value(int x) { // Set debug location - auto& p = co_await detail::get_promise{}; + auto& p = co_await debug_detail::get_promise{}; p.set_location(__FILE__, "compute_value", __LINE__); p.set_state(coro::coroutine_state::running); @@ -79,7 +79,7 @@ coro::task compute_value(int x) { // Level 2: Middle coroutine coro::task process_data(int id) { - auto& p = co_await detail::get_promise{}; + auto& p = co_await debug_detail::get_promise{}; p.set_location(__FILE__, "process_data", __LINE__); p.set_state(coro::coroutine_state::running); @@ -90,7 +90,7 @@ coro::task process_data(int id) { // Level 1: Outer coroutine (worker) coro::task worker_task(int worker_id) { - auto& p = co_await detail::get_promise{}; + auto& p = co_await debug_detail::get_promise{}; p.set_location(__FILE__, "worker_task", __LINE__); p.set_state(coro::coroutine_state::running); @@ -107,7 +107,7 @@ coro::task worker_task(int worker_id) { // Long-running task for debugging coro::task long_running_task([[maybe_unused]] int id) { - auto& p = co_await detail::get_promise{}; + auto& p = co_await debug_detail::get_promise{}; p.set_location(__FILE__, "long_running_task", __LINE__); p.set_state(coro::coroutine_state::running); @@ -141,19 +141,7 @@ coro::task async_main(int argc, char* argv[]) { std::cout << std::endl; } - // Spawn some worker tasks - std::vector> workers; - for (int i = 0; i < 4; ++i) { - workers.push_back(worker_task(i)); - } - - // Spawn long-running tasks for debugging - std::vector> long_tasks; - for (int i = 0; i < 2; ++i) { - long_tasks.push_back(long_running_task(i)); - } - - // Get scheduler and spawn tasks + // Get scheduler auto* sched = runtime::scheduler::current(); if (!sched) { std::cerr << "Error: No scheduler" << std::endl; @@ -161,17 +149,19 @@ coro::task async_main(int argc, char* argv[]) { } // Spawn signal handler coroutine - auto sig_handler = signal_handler_task(); - sched->spawn(sig_handler.release()); + sched->go(signal_handler_task); - for (auto& w : workers) { - sched->spawn(w.release()); + // Spawn some worker tasks + for (int i = 0; i < 4; ++i) { + sched->go([i]() { return worker_task(i); }); } - for (auto& t : long_tasks) { - sched->spawn(t.release()); + + // Spawn long-running tasks for debugging + for (int i = 0; i < 2; ++i) { + sched->go([i]() { return long_running_task(i); }); } - std::cout << "Spawned " << workers.size() + long_tasks.size() << " tasks" << std::endl; + std::cout << "Spawned " << 4 + 2 << " tasks" << std::endl; std::cout << std::endl; if (pause_mode) { @@ -198,5 +188,5 @@ int main(int argc, char* argv[]) { sigs.block_all_threads(); // Use elio::run() with the async_main coroutine - return elio::run(async_main(argc, argv)); + return elio::run([&]() { return async_main(argc, argv); }); } diff --git a/examples/dynamic_threads.cpp b/examples/dynamic_threads.cpp index 512f722..35fe153 100644 --- a/examples/dynamic_threads.cpp +++ b/examples/dynamic_threads.cpp @@ -21,8 +21,7 @@ void run_batch(runtime::scheduler& sched, int num_tasks, const std::string& labe // Spawn tasks for (int i = 0; i < num_tasks; ++i) { - auto t = simple_task(completed); - sched.spawn(t.release()); + sched.go([&completed]() { return simple_task(completed); }); } // Wait for completion diff --git a/examples/http_server.cpp b/examples/http_server.cpp index 954f74b..a83f55a 100644 --- a/examples/http_server.cpp +++ b/examples/http_server.cpp @@ -231,14 +231,14 @@ coro::task async_main(int argc, char* argv[]) { try { auto tls_ctx = tls::tls_context::make_server(cert_file, key_file); ELIO_LOG_INFO("Starting HTTPS server on {}", bind_addr.to_string()); - co_await elio::serve(srv, srv.listen_tls(bind_addr, tls_ctx, opts)); + co_await elio::serve(srv, [&]() { return srv.listen_tls(bind_addr, tls_ctx, opts); }); } catch (const std::exception& e) { ELIO_LOG_ERROR("Failed to start HTTPS server: {}", e.what()); co_return 1; } } else { ELIO_LOG_INFO("Starting HTTP server on {}", bind_addr.to_string()); - co_await elio::serve(srv, srv.listen(bind_addr, opts)); + co_await elio::serve(srv, [&]() { return srv.listen(bind_addr, opts); }); } co_return 0; diff --git a/examples/io_benchmark.cpp b/examples/io_benchmark.cpp index 0c5611d..133f7dd 100644 --- a/examples/io_benchmark.cpp +++ b/examples/io_benchmark.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -48,7 +49,7 @@ void benchmark_file_io() { }; auto start = high_resolution_clock::now(); - io_task().go(); + elio::go(io_task); while (completed.load(std::memory_order_acquire) == 0) { std::this_thread::sleep_for(microseconds(100)); @@ -104,7 +105,7 @@ void benchmark_concurrent_file_io() { auto start = high_resolution_clock::now(); for (int i = 0; i < NUM_TASKS; ++i) { - io_task().go(); + elio::go(io_task); } while (completed.load(std::memory_order_acquire) < NUM_TASKS) { diff --git a/examples/microbench.cpp b/examples/microbench.cpp index 09c09d0..a557a0f 100644 --- a/examples/microbench.cpp +++ b/examples/microbench.cpp @@ -19,6 +19,10 @@ int main() { constexpr int N = 100000; + // NOTE: Tests 1-2 below use detail::task_access to directly measure + // coroutine frame allocation overhead. This is intentional for low-level + // performance analysis and requires manual handle management. + // 1. Measure coroutine frame allocation (cold - first time) { std::vector> handles; @@ -27,7 +31,7 @@ int main() { auto start = high_resolution_clock::now(); for (int i = 0; i < N; ++i) { auto t = empty_task(); - handles.push_back(t.release()); + handles.push_back(coro::detail::task_access::release(t)); } auto end = high_resolution_clock::now(); auto ns = duration_cast(end - start).count(); @@ -46,7 +50,7 @@ int main() { auto start = high_resolution_clock::now(); for (int i = 0; i < N; ++i) { auto t = empty_task(); - handles.push_back(t.release()); + handles.push_back(coro::detail::task_access::release(t)); } auto end = high_resolution_clock::now(); auto ns = duration_cast(end - start).count(); @@ -119,23 +123,14 @@ int main() { close(fd); } - // 7. Full spawn path (with running scheduler) - cold + // 7. Full spawn path (with running scheduler) - includes alloc + spawn { runtime::scheduler sched(4); sched.start(); - std::vector> handles; - handles.reserve(N); - - // Pre-create tasks - for (int i = 0; i < N; ++i) { - auto t = empty_task(); - handles.push_back(t.release()); - } - auto start = high_resolution_clock::now(); - for (auto h : handles) { - sched.spawn(h); + for (int i = 0; i < N; ++i) { + sched.go(empty_task); } auto end = high_resolution_clock::now(); @@ -145,12 +140,12 @@ int main() { } auto ns = duration_cast(end - start).count(); - std::cout << "spawn() only (pre-alloc): " << (ns / N) << " ns/spawn" << std::endl; + std::cout << "sched.go() full path: " << (ns / N) << " ns/go" << std::endl; sched.shutdown(); } - // 8. Measure idle worker overhead + // 8. Measure warmed-up worker overhead { runtime::scheduler sched(4); sched.start(); @@ -158,18 +153,9 @@ int main() { // Let workers warm up std::this_thread::sleep_for(std::chrono::milliseconds(50)); - std::vector> handles; - handles.reserve(N); - - // Pre-create tasks - for (int i = 0; i < N; ++i) { - auto t = empty_task(); - handles.push_back(t.release()); - } - auto start = high_resolution_clock::now(); - for (auto h : handles) { - sched.spawn(h); + for (int i = 0; i < N; ++i) { + sched.go(empty_task); } auto end = high_resolution_clock::now(); @@ -179,7 +165,7 @@ int main() { } auto ns = duration_cast(end - start).count(); - std::cout << "spawn() only (workers idle): " << (ns / N) << " ns/spawn" << std::endl; + std::cout << "sched.go() (workers warmed): " << (ns / N) << " ns/go" << std::endl; sched.shutdown(); } diff --git a/examples/parallel_tasks.cpp b/examples/parallel_tasks.cpp index 35a55d5..6671392 100644 --- a/examples/parallel_tasks.cpp +++ b/examples/parallel_tasks.cpp @@ -64,8 +64,7 @@ coro::task async_main([[maybe_unused]] int argc, [[maybe_unused]] char* arg for (int i = 0; i < num_tasks; ++i) { // Vary work amount: some tasks do more work than others int work_amount = 10 + (i % 20); - auto t = worker_task(i, work_amount, completed); - sched->spawn(t.release()); + sched->go([i, work_amount, &completed]() { return worker_task(i, work_amount, completed); }); } // Monitor progress using yield diff --git a/examples/quick_benchmark.cpp b/examples/quick_benchmark.cpp index 5b7fff6..9ed7fd1 100644 --- a/examples/quick_benchmark.cpp +++ b/examples/quick_benchmark.cpp @@ -69,8 +69,7 @@ void benchmark_spawn_overhead() { auto batch_start = high_resolution_clock::now(); for (int i = 0; i < batch_size; ++i) { - auto t = empty_task(); - sched.spawn(t.release()); + sched.go(empty_task); } while (sched.pending_tasks() > 0) { @@ -125,8 +124,7 @@ void benchmark_context_switch() { auto batch_start = high_resolution_clock::now(); for (int i = 0; i < batch_size; ++i) { - auto t = task_with_await(); - sched.spawn(t.release()); + sched.go(task_with_await); } while (completed.load(std::memory_order_relaxed) < batch_size) { @@ -188,8 +186,7 @@ void benchmark_yield() { steady_clock::now().time_since_epoch()).count(); for (int i = 0; i < num_vthreads; ++i) { - auto t = yield_task(); - sched.spawn(t.release()); + sched.go(yield_task); } while (end_time_ns.load(std::memory_order_acquire) == 0) {} diff --git a/examples/rpc_client_example.cpp b/examples/rpc_client_example.cpp index c1c8ffb..e1b31e6 100644 --- a/examples/rpc_client_example.cpp +++ b/examples/rpc_client_example.cpp @@ -268,8 +268,7 @@ task run_demo(tcp_rpc_client::ptr client) { auto* sched = scheduler::current(); if (sched) { - auto t = call_task(); - sched->spawn(t.release()); + sched->go(call_task); } } @@ -369,8 +368,7 @@ int main(int argc, char* argv[]) { sched.start(); // Run client - auto client = client_main(host, port); - sched.spawn(client.release()); + sched.go([&]() { return client_main(host, port); }); // Wait for completion std::this_thread::sleep_for(std::chrono::seconds(5)); diff --git a/examples/rpc_server_example.cpp b/examples/rpc_server_example.cpp index ca33657..d6a704d 100644 --- a/examples/rpc_server_example.cpp +++ b/examples/rpc_server_example.cpp @@ -321,12 +321,10 @@ int main(int argc, char* argv[]) { sched.start(); // Spawn signal handler coroutine - auto sig_handler = signal_handler_task(); - sched.spawn(sig_handler.release()); + sched.go(signal_handler_task); // Run server - auto server = server_main(port, sched); - sched.spawn(server.release()); + sched.go([port, &sched]() { return server_main(port, sched); }); // Wait for shutdown while (g_running) { diff --git a/examples/scalability_test.cpp b/examples/scalability_test.cpp index 1596dee..afadf46 100644 --- a/examples/scalability_test.cpp +++ b/examples/scalability_test.cpp @@ -52,8 +52,7 @@ int main() { // Distribute tasks evenly via spawn (round-robin) for (int i = 0; i < batch_size; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } while (completed.load(std::memory_order_relaxed) < batch_size) { diff --git a/examples/signal_handling.cpp b/examples/signal_handling.cpp index dfba213..162567f 100644 --- a/examples/signal_handling.cpp +++ b/examples/signal_handling.cpp @@ -105,14 +105,12 @@ task main_task(scheduler& sched) { ELIO_LOG_INFO("Starting application with PID {}", getpid()); // Spawn the signal handler - auto sig_handler = signal_handler_task(sched); - sched.spawn(sig_handler.release()); + sched.go([&sched]() { return signal_handler_task(sched); }); // Spawn some worker coroutines constexpr int num_workers = 3; for (int i = 0; i < num_workers; ++i) { - auto worker = worker_task(i); - sched.spawn(worker.release()); + sched.go([i]() { return worker_task(i); }); } ELIO_LOG_INFO("All workers started"); @@ -142,8 +140,7 @@ int main() { sched.start(); // Spawn main task - auto main = main_task(sched); - sched.spawn(main.release()); + sched.go([&sched]() { return main_task(sched); }); // Run until shutdown while (g_running) { diff --git a/examples/sse_server.cpp b/examples/sse_server.cpp index 70723cf..6ae36a1 100644 --- a/examples/sse_server.cpp +++ b/examples/sse_server.cpp @@ -129,8 +129,9 @@ class sse_http_server { continue; } - auto handler = handle_connection(std::move(*stream_result)); - sched->spawn(handler.release()); + sched->go([this, stream = std::move(*stream_result)]() mutable { + return handle_connection(std::move(stream)); + }); } } @@ -367,7 +368,7 @@ coro::task async_main(int argc, char* argv[]) { // Start server and wait for shutdown signal // elio::serve() handles signal waiting and graceful shutdown automatically - co_await elio::serve(srv, srv.listen(bind_addr)); + co_await elio::serve(srv, [&]() { return srv.listen(bind_addr); }); co_return 0; } diff --git a/examples/tcp_echo_client.cpp b/examples/tcp_echo_client.cpp index f6c6c22..f6131b8 100644 --- a/examples/tcp_echo_client.cpp +++ b/examples/tcp_echo_client.cpp @@ -209,8 +209,7 @@ int main(int argc, char* argv[]) { done = true; }; - auto client = run_client(); - sched.spawn(client.release()); + sched.go(run_client); // Wait for completion while (!done) { diff --git a/examples/tcp_echo_server.cpp b/examples/tcp_echo_server.cpp index 0cb25db..bbaea48 100644 --- a/examples/tcp_echo_server.cpp +++ b/examples/tcp_echo_server.cpp @@ -133,8 +133,9 @@ task server_main(const socket_address& bind_addr, const tcp_options& opts, // Spawn handler coroutine for this client int client_id = ++client_counter; - auto handler = handle_client(std::move(*stream_result), client_id); - sched.spawn(handler.release()); + sched.go([stream = std::move(*stream_result), client_id]() mutable { + return handle_client(std::move(stream), client_id); + }); } ELIO_LOG_INFO("Server shutting down..."); @@ -199,12 +200,10 @@ int main(int argc, char* argv[]) { sched.start(); // Spawn signal handler coroutine - auto sig_handler = signal_handler_task(); - sched.spawn(sig_handler.release()); + sched.go(signal_handler_task); // Run server - auto server = server_main(bind_addr, opts, sched); - sched.spawn(server.release()); + sched.go([&bind_addr, &opts, &sched]() { return server_main(bind_addr, opts, sched); }); // Wait until interrupted while (g_running) { diff --git a/examples/thread_affinity.cpp b/examples/thread_affinity.cpp index c173f6c..81d8617 100644 --- a/examples/thread_affinity.cpp +++ b/examples/thread_affinity.cpp @@ -106,8 +106,7 @@ coro::task thread_local_state_example() { // Spawn multiple tasks bound to different workers for (int i = 0; i < 8; ++i) { size_t target = i % std::min(num_workers, size_t(2)); // Distribute across 2 workers - auto t = thread_local_state_task(i, target); - sched->spawn(t.release()); + sched->go([i, target]() { return thread_local_state_task(i, target); }); } // Give tasks time to complete @@ -175,8 +174,7 @@ coro::task multi_worker_example() { std::cout << "Spawning " << num_workers << " tasks, one per worker..." << std::endl; for (size_t i = 0; i < num_workers; ++i) { - auto t = worker_task(i, counters[i]); - sched->spawn(t.release()); + sched->go([i, &counters]() { return worker_task(i, counters[i]); }); } // Wait for completion diff --git a/examples/uds_echo_client.cpp b/examples/uds_echo_client.cpp index 45a3436..4ae05a9 100644 --- a/examples/uds_echo_client.cpp +++ b/examples/uds_echo_client.cpp @@ -188,8 +188,7 @@ int main(int argc, char* argv[]) { done = true; }; - auto client = run_client(); - sched.spawn(client.release()); + sched.go(run_client); // Wait for completion while (!done) { diff --git a/examples/uds_echo_server.cpp b/examples/uds_echo_server.cpp index 994fff8..c2b7c2f 100644 --- a/examples/uds_echo_server.cpp +++ b/examples/uds_echo_server.cpp @@ -126,8 +126,9 @@ task server_main(const unix_address& addr, scheduler& sched) { // Spawn handler coroutine for this client int client_id = ++client_counter; - auto handler = handle_client(std::move(*stream_result), client_id); - sched.spawn(handler.release()); + sched.go([stream = std::move(*stream_result), client_id]() mutable { + return handle_client(std::move(stream), client_id); + }); } ELIO_LOG_INFO("Server shutting down..."); @@ -162,12 +163,10 @@ int main(int argc, char* argv[]) { sched.start(); // Spawn signal handler coroutine - auto sig_handler = signal_handler_task(); - sched.spawn(sig_handler.release()); + sched.go(signal_handler_task); // Run server - auto server = server_main(addr, sched); - sched.spawn(server.release()); + sched.go([&addr, &sched]() { return server_main(addr, sched); }); // Wait until interrupted while (g_running) { diff --git a/examples/websocket_server.cpp b/examples/websocket_server.cpp index 01ada1a..7a4583a 100644 --- a/examples/websocket_server.cpp +++ b/examples/websocket_server.cpp @@ -297,7 +297,7 @@ coro::task async_main(int argc, char* argv[]) { // Start server and wait for shutdown signal // elio::serve() handles signal waiting and graceful shutdown automatically - co_await elio::serve(srv, srv.listen(bind_addr)); + co_await elio::serve(srv, [&]() { return srv.listen(bind_addr); }); co_return 0; } diff --git a/include/elio/coro/frame_allocator.hpp b/include/elio/coro/frame_allocator.hpp deleted file mode 100644 index f98d93a..0000000 --- a/include/elio/coro/frame_allocator.hpp +++ /dev/null @@ -1,329 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -// Architecture-specific CPU pause/yield hint for tight spin loops. -// Reduces power consumption and allows the HT sibling to run. -#if defined(__x86_64__) || defined(__i386__) -# define ELIO_CPU_PAUSE() __builtin_ia32_pause() -#elif defined(__aarch64__) || defined(__arm__) -# define ELIO_CPU_PAUSE() __asm__ __volatile__("yield" ::: "memory") -#else -# include -# define ELIO_CPU_PAUSE() std::this_thread::yield() -#endif - -namespace elio::coro { - -/// Thread-local free-list based frame allocator for small coroutine frames -/// Dramatically reduces allocation overhead for frequently created/destroyed coroutines -/// -/// Design: Each allocated frame has a hidden header storing the source pool ID and size class. -/// When deallocated on a different thread, the frame is returned via an MPSC queue -/// to its source pool. This handles work-stealing scenarios where coroutines -/// are allocated on thread A but deallocated on thread B. -/// -/// Size Classes: Multiple pools for different frame sizes (32, 64, 128, 256 bytes) -/// reduce memory waste for small frames while maintaining allocation performance. -/// -/// Note: Under sanitizers, pooling is disabled to allow proper leak/error detection. -class frame_allocator { -public: - // Size classes for different frame sizes - static constexpr size_t SIZE_CLASSES[] = {32, 64, 128, 256}; - static constexpr size_t NUM_SIZE_CLASSES = 4; - static constexpr size_t POOL_SIZE = 512; // Per size class - static constexpr size_t REMOTE_QUEUE_BATCH = 64; // Process remote returns in batches - -// Detect sanitizers: GCC uses __SANITIZE_*, Clang uses __has_feature -#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) -#define ELIO_SANITIZER_ACTIVE 1 -#elif defined(__has_feature) -#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) -#define ELIO_SANITIZER_ACTIVE 1 -#endif -#endif - -#ifdef ELIO_SANITIZER_ACTIVE - // Under sanitizers, bypass pooling entirely for accurate leak detection - static void* allocate(size_t size) { - return ::operator new(size); - } - - static void deallocate(void* ptr, [[maybe_unused]] size_t size) noexcept { - ::operator delete(ptr); - } -#else - static void* allocate(size_t size) { - size_t sc = find_size_class(size); - if (sc < NUM_SIZE_CLASSES) { - auto& alloc = instance(); - - // First try to reclaim remote returns periodically - alloc.reclaim_remote_returns(); - - if (alloc.free_count_[sc] > 0) { - void* block = alloc.pool_[sc][--alloc.free_count_[sc]]; - // Update header to reflect current pool ownership - auto* header = static_cast(block); - header->source_pool_id = alloc.pool_id_; - header->size_class = static_cast(sc); - return block_to_user(block); - } - - // Allocate new block with header - void* block = ::operator new(alloc_block_size(sc)); - auto* header = static_cast(block); - header->source_pool_id = alloc.pool_id_; - header->size_class = static_cast(sc); - header->next.store(nullptr, std::memory_order_relaxed); - return block_to_user(block); - } - // Fall back to standard allocation for large frames - return ::operator new(size); - } - - static void deallocate(void* ptr, size_t size) noexcept { - size_t sc = find_size_class(size); - if (sc < NUM_SIZE_CLASSES) { - void* block = user_to_block(ptr); - auto* header = static_cast(block); - auto& alloc = instance(); - - // Fast path: same thread - return directly to local pool - if (header->source_pool_id == alloc.pool_id_) { - if (alloc.free_count_[sc] < POOL_SIZE) { - alloc.pool_[sc][alloc.free_count_[sc]++] = block; - return; - } - // Pool full, delete the block - ::operator delete(block); - return; - } else { - // Cross-thread deallocation: push to source pool's remote queue - frame_allocator* source = get_pool_by_id(header->source_pool_id); - if (source) { - source->push_remote_return(block); - return; - } - // Source pool no longer exists (thread exited), delete the block - ::operator delete(block); - return; - } - } - // Large allocation - was allocated without header - ::operator delete(ptr); - } -#endif - -private: - // Block header stored before user data - struct block_header { - uint32_t source_pool_id; // ID of the pool that allocated this block - uint8_t size_class; // Size class index (0-3) - std::atomic next; // For MPSC queue linkage - }; - - // Header size - static constexpr size_t HEADER_SIZE = sizeof(block_header); - - // Find size class index for requested size - static size_t find_size_class(size_t size) noexcept { - for (size_t i = 0; i < NUM_SIZE_CLASSES; ++i) { - if (size <= SIZE_CLASSES[i]) { - return i; - } - } - return NUM_SIZE_CLASSES; // Not found (for sizes > 256) - } - - // Get actual size for a size class - static size_t size_class_size(size_t idx) noexcept { - return SIZE_CLASSES[idx]; - } - - // Total block size including header for a given size class - static size_t alloc_block_size(size_t size_class_idx) noexcept { - return HEADER_SIZE + SIZE_CLASSES[size_class_idx]; - } - - // Convert between block (with header) and user pointer - static void* block_to_user(void* block) noexcept { - return static_cast(block) + HEADER_SIZE; - } - - static void* user_to_block(void* user) noexcept { - return static_cast(user) - HEADER_SIZE; - } - - frame_allocator() - : pool_id_(next_pool_id_.fetch_add(1, std::memory_order_relaxed)) - , remote_head_{} - , remote_tail_(&remote_head_) { - // Initialize remote_head_ fields after default construction - remote_head_.source_pool_id = 0; - remote_head_.size_class = 0; - remote_head_.next.store(nullptr, std::memory_order_relaxed); - // Initialize free counts to 0 - for (size_t i = 0; i < NUM_SIZE_CLASSES; ++i) { - free_count_[i] = 0; - } - // Register this pool for cross-thread access - register_pool(this); - } - - ~frame_allocator() { - // Unregister before cleanup - unregister_pool(this); - - // Reclaim any remaining remote returns - reclaim_all_remote_returns(); - - // Free all cached frames when thread exits - for (size_t sc = 0; sc < NUM_SIZE_CLASSES; ++sc) { - for (size_t i = 0; i < free_count_[sc]; ++i) { - ::operator delete(pool_[sc][i]); - } - } - } - - // MPSC queue: push from any thread (producers), pop from owner only (consumer) - void push_remote_return(void* block) noexcept { - auto* header = static_cast(block); - header->next.store(nullptr, std::memory_order_relaxed); - - // Atomic push to MPSC queue (lock-free) - block_header* prev = remote_tail_.exchange(header, std::memory_order_acq_rel); - prev->next.store(header, std::memory_order_release); - } - - // Called by owner thread to reclaim remote returns for all size classes - void reclaim_remote_returns() noexcept { - // Quick check without full synchronization - block_header* head = remote_head_.next.load(std::memory_order_acquire); - if (!head) return; - - size_t count = 0; - while (head && count < REMOTE_QUEUE_BATCH) { - block_header* next = head->next.load(std::memory_order_acquire); - - // If next is null but tail points elsewhere, the producer is in the - // middle of push() (has done the tail exchange but not yet written - // prev->next). Spin briefly with a CPU pause hint. - if (!next && remote_tail_.load(std::memory_order_acquire) != head) { - for (int i = 0; i < 16; ++i) { - ELIO_CPU_PAUSE(); - next = head->next.load(std::memory_order_acquire); - if (next) break; - } - // If the link still isn't ready, stop without consuming 'head'. - // Consuming it would leave the queue in a broken state because - // the producer would later write through a recycled pointer. - if (!next) break; - } - - // Add to appropriate size class pool - size_t sc = head->size_class; - if (sc < NUM_SIZE_CLASSES && free_count_[sc] < POOL_SIZE) { - pool_[sc][free_count_[sc]++] = head; - remote_head_.next.store(next, std::memory_order_release); - ++count; - } else if (sc >= NUM_SIZE_CLASSES) { - // Invalid size class - delete the block - ::operator delete(head); - remote_head_.next.store(next, std::memory_order_release); - } else { - // Pool full - leave it in the queue for later - break; - } - head = next; - } - } - - // Called during destruction to reclaim all - void reclaim_all_remote_returns() noexcept { - block_header* head = remote_head_.next.load(std::memory_order_acquire); - while (head) { - block_header* next = head->next.load(std::memory_order_acquire); - - // Same safe spin pattern as reclaim_remote_returns(), but with more - // retries because we're in teardown and really want to drain the queue. - if (!next && remote_tail_.load(std::memory_order_acquire) != head) { - for (int i = 0; i < 32; ++i) { - ELIO_CPU_PAUSE(); - next = head->next.load(std::memory_order_acquire); - if (next) break; - } - // Stop safely rather than risk corrupting a partially-linked node. - if (!next) break; - } - - size_t sc = head->size_class; - if (sc < NUM_SIZE_CLASSES && free_count_[sc] < POOL_SIZE) { - pool_[sc][free_count_[sc]++] = head; - } else { - ::operator delete(head); - } - head = next; - } - remote_head_.next.store(nullptr, std::memory_order_release); - remote_tail_.store(&remote_head_, std::memory_order_release); - } - - static frame_allocator& instance() { - static thread_local frame_allocator alloc; - return alloc; - } - - // Pool registry for cross-thread access - static constexpr size_t MAX_POOLS = 256; - - // Registry entries - atomic for lock-free reads, protected by mutex for writes - static inline std::atomic pool_registry_[MAX_POOLS]{}; - static inline std::mutex registry_mutex_; // Protects unregister operations - - static void register_pool(frame_allocator* pool) noexcept { - uint32_t id = pool->pool_id_; - if (id < MAX_POOLS) { - pool_registry_[id].store(pool, std::memory_order_release); - } - } - - static void unregister_pool(frame_allocator* pool) noexcept { - uint32_t id = pool->pool_id_; - if (id < MAX_POOLS) { - // Use mutex to ensure no concurrent lookups during unregister - // This prevents the race where a lookup sees a valid pointer - // but the pool is being destroyed - std::lock_guard lock(registry_mutex_); - pool_registry_[id].store(nullptr, std::memory_order_release); - } - } - - // Get pool by ID - returns nullptr if pool was unregistered - static frame_allocator* get_pool_by_id(uint32_t id) noexcept { - if (id < MAX_POOLS) { - return pool_registry_[id].load(std::memory_order_acquire); - } - return nullptr; - } - - std::array, NUM_SIZE_CLASSES> pool_; - std::array free_count_; - uint32_t pool_id_; - - // MPSC queue for remote returns (dummy head node pattern) - block_header remote_head_; // Dummy node - next points to actual head - std::atomic remote_tail_; - - // Global pool ID counter - static inline std::atomic next_pool_id_{0}; -}; - -} // namespace elio::coro diff --git a/include/elio/coro/promise_base.hpp b/include/elio/coro/promise_base.hpp index 113c296..5b317b5 100644 --- a/include/elio/coro/promise_base.hpp +++ b/include/elio/coro/promise_base.hpp @@ -5,6 +5,8 @@ #include #include +#include "vthread_stack.hpp" + namespace elio::coro { /// Constant indicating no affinity (vthread can migrate freely) @@ -94,12 +96,38 @@ class promise_base { , debug_id_(0) // Lazy allocation - only allocated when id() is called #endif , affinity_(NO_AFFINITY) + , vstack_(current_frame_ ? current_frame_->vstack() : nullptr) + , owns_vstack_(false) { current_frame_ = this; } ~promise_base() noexcept { current_frame_ = parent_; + if (owns_vstack_) { + // Clear current_ before deleting vstack. When operator delete later + // calls tagged_dealloc() -> vthread_stack::deallocate(), it will find + // current_ is nullptr and correctly no-op (memory already freed by vstack). + auto* vs = vstack_.exchange(nullptr, std::memory_order_acq_rel); + if (vthread_stack::current() == vs) { + vthread_stack::set_current(nullptr); + } + delete vs; + } + } + + /// Detach this frame from the current thread's frame chain. + /// Call this before spawning a coroutine to another thread to avoid + /// use-after-free when the original thread creates another coroutine. + void detach_from_parent() noexcept { + if (current_frame_ == this) { + // Set to nullptr instead of parent_ to avoid use-after-free. + // parent_ may have been spawned to another thread and destroyed. + current_frame_ = nullptr; + } + parent_ = nullptr; + // Ensure all writes before detach are visible to the thread that will execute this coroutine + std::atomic_thread_fence(std::memory_order_release); } promise_base(const promise_base&) = delete; @@ -126,6 +154,10 @@ class promise_base { return current_frame_; } + static void set_current_frame(promise_base* frame) noexcept { + current_frame_ = frame; + } + // Debug accessors (available only when debug metadata is enabled) #if ELIO_ENABLE_DEBUG_METADATA [[nodiscard]] uint64_t frame_magic() const noexcept { return frame_magic_; } @@ -172,17 +204,41 @@ class promise_base { // Affinity accessors /// Get the current thread affinity for this vthread /// @return Worker ID this vthread is bound to, or NO_AFFINITY if unbound - [[nodiscard]] size_t affinity() const noexcept { return affinity_; } + [[nodiscard]] size_t affinity() const noexcept { + return affinity_.load(std::memory_order_acquire); + } /// Set thread affinity for this vthread /// @param worker_id Worker ID to bind to, or NO_AFFINITY to clear - void set_affinity(size_t worker_id) noexcept { affinity_ = worker_id; } + void set_affinity(size_t worker_id) noexcept { + affinity_.store(worker_id, std::memory_order_release); + } /// Check if this vthread has affinity set - [[nodiscard]] bool has_affinity() const noexcept { return affinity_ != NO_AFFINITY; } + [[nodiscard]] bool has_affinity() const noexcept { + return affinity_.load(std::memory_order_acquire) != NO_AFFINITY; + } /// Clear thread affinity, allowing this vthread to migrate freely - void clear_affinity() noexcept { affinity_ = NO_AFFINITY; } + void clear_affinity() noexcept { + affinity_.store(NO_AFFINITY, std::memory_order_release); + } + + // vthread_stack accessors + [[nodiscard]] vthread_stack* vstack() const noexcept { + return vstack_.load(std::memory_order_acquire); + } + + void set_vstack(vthread_stack* vs) noexcept { + vstack_.store(vs, std::memory_order_release); + } + + void set_vstack_owner(vthread_stack* vs) noexcept { + vstack_.store(vs, std::memory_order_release); + owns_vstack_ = true; + } + + [[nodiscard]] bool owns_vstack() const noexcept { return owns_vstack_; } private: // Magic number at start for debugger validation @@ -201,7 +257,12 @@ class promise_base { #endif // Thread affinity: NO_AFFINITY means can migrate freely - size_t affinity_; + // Must be atomic to avoid data races in work-stealing scenarios + std::atomic affinity_; + + // vthread_stack support + std::atomic vstack_{nullptr}; + bool owns_vstack_ = false; static inline thread_local promise_base* current_frame_ = nullptr; }; diff --git a/include/elio/coro/task.hpp b/include/elio/coro/task.hpp index ceff525..f5b0a10 100644 --- a/include/elio/coro/task.hpp +++ b/include/elio/coro/task.hpp @@ -1,7 +1,7 @@ #pragma once #include "promise_base.hpp" -#include "frame_allocator.hpp" +#include "vthread_stack.hpp" #include #include #include @@ -47,6 +47,54 @@ struct final_awaiter { void await_resume() const noexcept {} }; +// Allocation modes +enum class alloc_mode : uint8_t { stack = 0, heap = 1 }; +inline thread_local alloc_mode current_alloc_mode_ = alloc_mode::stack; + +// RAII guard: temporarily switch to heap allocation +struct heap_alloc_guard { + heap_alloc_guard() noexcept { current_alloc_mode_ = alloc_mode::heap; } + ~heap_alloc_guard() noexcept { current_alloc_mode_ = alloc_mode::stack; } + heap_alloc_guard(const heap_alloc_guard&) = delete; + heap_alloc_guard& operator=(const heap_alloc_guard&) = delete; +}; + +// Tagged allocation +static constexpr size_t TAG_OFFSET = alignof(std::max_align_t); + +inline void* tagged_alloc(size_t size, alloc_mode tag) { + void* raw = (tag == alloc_mode::heap) + ? ::operator new(size + TAG_OFFSET) + : vthread_stack::allocate(size + TAG_OFFSET); + *static_cast(raw) = tag; + return static_cast(raw) + TAG_OFFSET; +} + +inline void tagged_dealloc(void* ptr, size_t size) noexcept { + void* raw = static_cast(ptr) - TAG_OFFSET; + auto tag = *static_cast(raw); + if (tag == alloc_mode::heap) + ::operator delete(raw); + else + vthread_stack::deallocate(raw, size + TAG_OFFSET); +} + +// Friend accessor: extract handle from immovable task +struct task_access { + template + static auto release(TaskT& t) noexcept { + if (t.handle_) { + t.handle_.promise().detached_ = true; + } + return std::exchange(t.handle_, nullptr); + } + // Get handle without transferring ownership (for testing) + template + static auto handle(TaskT& t) noexcept { + return t.handle_; + } +}; + /// Shared state for join_handle - stores result and waiter template struct join_state { @@ -240,96 +288,62 @@ class join_handle { /// Primary template for task where T is not void template class task { + friend struct detail::task_access; public: + using value_type = T; + struct promise_type : promise_base { std::optional value_; std::coroutine_handle<> continuation_; bool detached_ = false; - // Join state for spawn() - only used when task is spawned std::shared_ptr> join_state_; - promise_type() noexcept = default; + void* operator new(size_t size) { + return detail::tagged_alloc(size, detail::current_alloc_mode_); + } + void operator delete(void* ptr, size_t size) noexcept { + detail::tagged_dealloc(ptr, size); + } [[nodiscard]] task get_return_object() noexcept { return task{std::coroutine_handle::from_promise(*this)}; } - [[nodiscard]] std::suspend_always initial_suspend() noexcept { return {}; } [[nodiscard]] detail::final_awaiter final_suspend() noexcept { return {}; } template void return_value(U&& value) { value_.emplace(std::forward(value)); - // Notify join state if present - if (join_state_) { - join_state_->set_value(std::move(*value_)); - } + if (join_state_) join_state_->set_value(std::move(*value_)); } void unhandled_exception() noexcept { promise_base::unhandled_exception(); - // Notify join state if present - if (join_state_) { - join_state_->set_exception(exception()); - } - } - - // Custom allocator for coroutine frames - void* operator new(size_t size) { - return frame_allocator::allocate(size); - } - - void operator delete(void* ptr, size_t size) noexcept { - frame_allocator::deallocate(ptr, size); + if (join_state_) join_state_->set_exception(exception()); } }; using handle_type = std::coroutine_handle; - explicit task(handle_type handle) noexcept : handle_(handle) {} - task(task&& other) noexcept : handle_(std::exchange(other.handle_, nullptr)) {} - - task& operator=(task&& other) noexcept { - if (this != &other) { - if (handle_) handle_.destroy(); - handle_ = std::exchange(other.handle_, nullptr); - } - return *this; - } - - ~task() { if (handle_) handle_.destroy(); } + explicit task(handle_type h) noexcept : handle_(h) {} + // Non-copyable, non-movable task(const task&) = delete; task& operator=(const task&) = delete; + task(task&&) = delete; + task& operator=(task&&) = delete; - [[nodiscard]] handle_type handle() const noexcept { return handle_; } - [[nodiscard]] handle_type release() noexcept { - if (handle_) handle_.promise().detached_ = true; - return std::exchange(handle_, nullptr); - } - - /// Spawn this task on the current scheduler (fire-and-forget) - /// The task will run asynchronously and self-destruct when complete - void go() { - runtime::schedule_handle(release()); - } - - /// Spawn this task and return a join_handle for awaiting the result - /// Usage: auto handle = some_task().spawn(); T result = co_await handle; - [[nodiscard]] join_handle spawn(); + ~task() { if (handle_) handle_.destroy(); } + // co_await interface [[nodiscard]] bool await_ready() const noexcept { return false; } - [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { handle_.promise().continuation_ = awaiter; return handle_; } - T await_resume() { auto& promise = handle_.promise(); - if (promise.exception()) { - std::rethrow_exception(promise.exception()); - } + if (promise.exception()) std::rethrow_exception(promise.exception()); return std::move(*promise.value_); } @@ -340,115 +354,63 @@ class task { /// Specialization for task template<> class task { + friend struct detail::task_access; public: + using value_type = void; + struct promise_type : promise_base { std::coroutine_handle<> continuation_; bool detached_ = false; - // Join state for spawn() - only used when task is spawned std::shared_ptr> join_state_; - promise_type() noexcept = default; + void* operator new(size_t size) { + return detail::tagged_alloc(size, detail::current_alloc_mode_); + } + void operator delete(void* ptr, size_t size) noexcept { + detail::tagged_dealloc(ptr, size); + } [[nodiscard]] task get_return_object() noexcept { return task{std::coroutine_handle::from_promise(*this)}; } - [[nodiscard]] std::suspend_always initial_suspend() noexcept { return {}; } [[nodiscard]] detail::final_awaiter final_suspend() noexcept { return {}; } void return_void() noexcept { - // Notify join state if present - if (join_state_) { - join_state_->set_value(); - } + if (join_state_) join_state_->set_value(); } void unhandled_exception() noexcept { promise_base::unhandled_exception(); - // Notify join state if present - if (join_state_) { - join_state_->set_exception(exception()); - } - } - - // Custom allocator for coroutine frames - void* operator new(size_t size) { - return frame_allocator::allocate(size); - } - - void operator delete(void* ptr, size_t size) noexcept { - frame_allocator::deallocate(ptr, size); + if (join_state_) join_state_->set_exception(exception()); } }; using handle_type = std::coroutine_handle; - explicit task(handle_type handle) noexcept : handle_(handle) {} - task(task&& other) noexcept : handle_(std::exchange(other.handle_, nullptr)) {} - - task& operator=(task&& other) noexcept { - if (this != &other) { - if (handle_) handle_.destroy(); - handle_ = std::exchange(other.handle_, nullptr); - } - return *this; - } - - ~task() { if (handle_) handle_.destroy(); } + explicit task(handle_type h) noexcept : handle_(h) {} + // Non-copyable, non-movable task(const task&) = delete; task& operator=(const task&) = delete; + task(task&&) = delete; + task& operator=(task&&) = delete; - [[nodiscard]] handle_type handle() const noexcept { return handle_; } - [[nodiscard]] handle_type release() noexcept { - if (handle_) handle_.promise().detached_ = true; - return std::exchange(handle_, nullptr); - } - - /// Spawn this task on the current scheduler (fire-and-forget) - /// The task will run asynchronously and self-destruct when complete - void go() { - runtime::schedule_handle(release()); - } - - /// Spawn this task and return a join_handle for awaiting completion - /// Usage: auto handle = some_task().spawn(); co_await handle; - [[nodiscard]] join_handle spawn(); + ~task() { if (handle_) handle_.destroy(); } + // co_await interface [[nodiscard]] bool await_ready() const noexcept { return false; } - [[nodiscard]] std::coroutine_handle<> await_suspend(std::coroutine_handle<> awaiter) noexcept { handle_.promise().continuation_ = awaiter; return handle_; } - void await_resume() { auto& promise = handle_.promise(); - if (promise.exception()) { - std::rethrow_exception(promise.exception()); - } + if (promise.exception()) std::rethrow_exception(promise.exception()); } private: handle_type handle_; }; -// Out-of-line definitions for spawn() methods -template -join_handle task::spawn() { - // Create join state and attach to task's promise - auto state = std::make_shared>(); - handle_.promise().join_state_ = state; - // Release and schedule - the promise will notify join state on completion - runtime::schedule_handle(release()); - return join_handle(std::move(state)); -} - -inline join_handle task::spawn() { - auto state = std::make_shared>(); - handle_.promise().join_state_ = state; - runtime::schedule_handle(release()); - return join_handle(std::move(state)); -} - } // namespace elio::coro diff --git a/include/elio/coro/task_handle.hpp b/include/elio/coro/task_handle.hpp index 33743ce..c7fc6c1 100644 --- a/include/elio/coro/task_handle.hpp +++ b/include/elio/coro/task_handle.hpp @@ -2,7 +2,6 @@ #include "promise_base.hpp" #include "cancel_token.hpp" -#include "frame_allocator.hpp" #include #include #include @@ -23,28 +22,28 @@ void schedule_handle(std::coroutine_handle<> handle) noexcept; namespace elio::coro { -/// 任务执行状态 +/// Task execution status enum class task_status { - pending, ///< 尚未开始或正在执行 - completed, ///< 正常完成(成功) - logic_failed, ///< 业务失败(显式失败,非异常) - exception, ///< 异常失败(抛出异常) - cancelled ///< 被取消 + pending, ///< Not started or currently executing + completed, ///< Normal completion (success) + logic_failed, ///< Business failure (explicit failure, not exception) + exception, ///< Exception failure (exception thrown) + cancelled ///< Cancelled }; -/// 任务结果状态(用于 task_result / awaitable_result) +/// Task result status (for task_result / awaitable_result) enum class result_status { - completed, ///< 正常完成 - logic_failed, ///< 业务失败 - timeout, ///< 超时 - cancelled, ///< 被取消 - exception ///< 异常失败 + completed, ///< Normal completion + logic_failed, ///< Business failure + timeout, ///< Timeout + cancelled, ///< Cancelled + exception ///< Exception failure }; -/// 失败信息(业务失败,非异常) +/// Failure info (business failure, not exception) struct failure { - int code = 0; ///< 错误码 - std::string message; ///< 错误信息 + int code = 0; ///< Error code + std::string message; ///< Error message failure() = default; failure(int c, std::string msg) : code(c), message(std::move(msg)) {} @@ -52,8 +51,8 @@ struct failure { explicit failure(std::string msg) : code(0), message(std::move(msg)) {} }; -/// 辅助函数:创建 failure(用于 co_return,仅适用于非 void task) -/// 用法: co_return coro::fail(404, "not found"); +/// Helper function: create failure (for co_return, only for non-void task) +/// Usage: co_return coro::fail(404, "not found"); inline failure fail(int code, std::string message) { return failure{code, std::move(message)}; } @@ -64,20 +63,20 @@ inline failure fail(std::string message) { namespace detail { -/// 内部共享状态 +/// Internal shared state template struct task_state { - // 状态与结果存储 + // State and result storage std::atomic status_{task_status::pending}; std::optional value_; failure failure_; std::exception_ptr exception_; - // 等待者管理 + // Waiter management std::atomic waiter_{nullptr}; std::mutex mutex_; - // 取消控制 + // Cancel control std::atomic cancel_requested_{false}; void set_value(T&& val) { @@ -158,7 +157,7 @@ struct task_state { } }; -/// void 特化 +/// void specialization template<> struct task_state { std::atomic status_{task_status::pending}; @@ -248,7 +247,7 @@ struct task_state { } // namespace detail // ============================================================================ -// task_result - 结果包装器 +// task_result - Result wrapper // ============================================================================ template @@ -258,34 +257,34 @@ class task_result { task_result() = default; - /// 构造成功结果 + /// Construct success result explicit task_result(T value) : status_(result_status::completed) , value_(std::move(value)) {} - /// 构造业务失败结果 + /// Construct business failure result explicit task_result(result_status status, failure f) : status_(status) , failure_(std::move(f)) {} - /// 构造异常结果 + /// Construct exception result explicit task_result(result_status status, std::exception_ptr ep) : status_(status) , exception_(std::move(ep)) {} - /// 构造 timeout/cancelled 结果 + /// Construct timeout/cancelled result explicit task_result(result_status status) : status_(status) {} - // 移动语义 + // Move semantics task_result(task_result&&) = default; task_result& operator=(task_result&&) = default; - // 不支持拷贝 + // No copy support task_result(const task_result&) = delete; task_result& operator=(const task_result&) = delete; - // ===== 状态查询 ===== + // ===== Status query ===== [[nodiscard]] bool has_value() const noexcept { return status_ == result_status::completed; } @@ -314,7 +313,7 @@ class task_result { return status_ == result_status::logic_failed; } - // ===== 值访问 ===== + // ===== Value access ===== T& value() & { return *value_; } @@ -337,7 +336,7 @@ class task_result { return has_value() ? std::move(value()) : static_cast(std::forward(default_value)); } - // ===== 结果访问 ===== + // ===== Result access ===== const failure& failure_info() const { return failure_; } @@ -360,7 +359,7 @@ class task_result { } } - // ===== 隐式转换 ===== + // ===== Implicit conversion ===== explicit operator bool() const noexcept { return has_value(); } @@ -372,7 +371,7 @@ class task_result { std::exception_ptr exception_; }; -// ===== void 特化 ===== +// ===== void specialization ===== template<> class task_result { public: @@ -380,28 +379,28 @@ class task_result { task_result() = default; - /// 构造成功/timeout/cancelled 结果 + /// Construct success/timeout/cancelled result explicit task_result(result_status status) : status_(status) {} - /// 构造业务失败结果 + /// Construct business failure result explicit task_result(result_status status, failure f) : status_(status) , failure_(std::move(f)) {} - /// 构造异常结果 + /// Construct exception result explicit task_result(result_status status, std::exception_ptr ep) : status_(status) , exception_(std::move(ep)) {} - // 移动语义 + // Move semantics task_result(task_result&&) = default; task_result& operator=(task_result&&) = default; task_result(const task_result&) = delete; task_result& operator=(const task_result&) = delete; - // ===== 状态查询 ===== + // ===== Status query ===== [[nodiscard]] bool has_value() const noexcept { return status_ == result_status::completed; } @@ -430,7 +429,7 @@ class task_result { return status_ == result_status::logic_failed; } - // ===== 结果访问 ===== + // ===== Result access ===== const failure& failure_info() const { return failure_; } @@ -464,7 +463,7 @@ class task_result { }; // ============================================================================ -// task_handle - 任务句柄 +// task_handle - Task handle // ============================================================================ template @@ -479,15 +478,15 @@ class task_handle { ~task_handle() = default; - // 移动语义 + // Move semantics task_handle(task_handle&&) noexcept = default; task_handle& operator=(task_handle&&) noexcept = default; - // 不支持拷贝 + // No copy support task_handle(const task_handle&) = delete; task_handle& operator=(const task_handle&) = delete; - // ===== 有效性检查 ===== + // ===== Validity check ===== [[nodiscard]] bool valid() const noexcept { return state_ != nullptr; } @@ -496,7 +495,7 @@ class task_handle { return valid(); } - // ===== 状态查询 ===== + // ===== Status query ===== [[nodiscard]] task_status status() const noexcept { if (!state_) return task_status::exception; return state_->status_.load(std::memory_order_acquire); @@ -527,7 +526,7 @@ class task_handle { return status() == task_status::pending; } - // ===== 显式结果获取 ===== + // ===== Explicit result retrieval ===== bool try_get(T& out) const { if (!state_) return false; std::lock_guard lock(state_->mutex_); @@ -569,7 +568,7 @@ class task_handle { return static_cast(std::forward(default_value)); } - // ===== 获取完整结果 ===== + // ===== Get complete result ===== task_result get_result() const { if (!state_) { return task_result(result_status::exception, @@ -598,7 +597,7 @@ class task_handle { } } - // ===== 同步等待 ===== + // ===== Synchronous wait ===== task_status wait() { if (!state_) return task_status::exception; @@ -619,7 +618,7 @@ class task_handle { std::unique_lock lock(state_->mutex_); while (!state_->is_done()) { if (std::chrono::steady_clock::now() >= deadline) { - return status(); // 可能仍为 pending + return status(); // May still be pending } lock.unlock(); std::this_thread::yield(); @@ -628,7 +627,7 @@ class task_handle { return status(); } - // ===== 取消控制 ===== + // ===== Cancel control ===== void request_cancel() { if (!state_) return; state_->request_cancel(); @@ -639,7 +638,7 @@ class task_handle { return state_->is_cancellation_requested(); } - // ===== 协程等待(返回 task_result,不抛异常)===== + // ===== Coroutine await (returns task_result, no exception thrown) ===== auto operator co_await() const { struct awaiter { std::shared_ptr> state; @@ -688,7 +687,7 @@ class task_handle { }; // ============================================================================ -// task_handle - void 特化 +// task_handle - void specialization // ============================================================================ template<> @@ -709,7 +708,7 @@ class task_handle { task_handle(const task_handle&) = delete; task_handle& operator=(const task_handle&) = delete; - // ===== 有效性检查 ===== + // ===== Validity check ===== [[nodiscard]] bool valid() const noexcept { return state_ != nullptr; } @@ -718,7 +717,7 @@ class task_handle { return valid(); } - // ===== 状态查询 ===== + // ===== Status query ===== [[nodiscard]] task_status status() const noexcept { if (!state_) return task_status::exception; return state_->status_.load(std::memory_order_acquire); @@ -749,7 +748,7 @@ class task_handle { return status() == task_status::pending; } - // ===== 显式结果获取 ===== + // ===== Explicit result retrieval ===== bool try_get(failure& out) const { if (!state_) return false; std::lock_guard lock(state_->mutex_); @@ -770,7 +769,7 @@ class task_handle { return false; } - // ===== 获取完整结果 ===== + // ===== Get complete result ===== task_result get_result() const { if (!state_) { return task_result(result_status::exception, @@ -795,7 +794,7 @@ class task_handle { } } - // ===== 同步等待 ===== + // ===== Synchronous wait ===== task_status wait() { if (!state_) return task_status::exception; @@ -825,7 +824,7 @@ class task_handle { return status(); } - // ===== 取消控制 ===== + // ===== Cancel control ===== void request_cancel() { if (!state_) return; state_->request_cancel(); @@ -836,7 +835,7 @@ class task_handle { return state_->is_cancellation_requested(); } - // ===== 协程等待(返回 task_result,不抛异常)===== + // ===== Coroutine await (returns task_result, no exception thrown) ===== auto operator co_await() const { struct awaiter { std::shared_ptr> state; diff --git a/include/elio/coro/vthread_stack.hpp b/include/elio/coro/vthread_stack.hpp new file mode 100644 index 0000000..ce4ec7a --- /dev/null +++ b/include/elio/coro/vthread_stack.hpp @@ -0,0 +1,171 @@ +#pragma once + +#include +#include +#include +#include + +// Sanitizer detection for vthread_stack +#ifndef ELIO_SANITIZER_ACTIVE +#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) +#define ELIO_SANITIZER_ACTIVE 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) +#define ELIO_SANITIZER_ACTIVE 1 +#endif +#endif +#endif + +namespace elio::coro { + +/// Segmented bump-pointer stack allocator for vthread coroutine frames. +/// +/// Each vthread maintains its own stack allocator. Coroutine frames are +/// allocated in LIFO order within stack segments. When a segment is exhausted, +/// a new segment is allocated and linked. When all frames in a segment are +/// freed, the segment is released. +/// +/// This allocator provides significant performance improvements over +/// general-purpose allocation for coroutines that follow strict LIFO +/// allocation/deallocation patterns (which is natural for nested coroutines). +class vthread_stack { +public: + // Static interface — for promise_type::operator new/delete +#ifdef ELIO_SANITIZER_ACTIVE + static void* allocate(size_t size) { + return ::operator new(size); + } + + static void deallocate(void* ptr, [[maybe_unused]] size_t size) noexcept { + ::operator delete(ptr); + } +#else + static void* allocate(size_t size) { + if (current_ != nullptr) { + return current_->push(size); + } + // No vthread context, use global new directly + return ::operator new(size); + } + + static void deallocate(void* ptr, size_t size) noexcept { + if (current_ != nullptr) { + current_->pop(ptr, size); + } else { + // When current_ is nullptr and we get here via tagged_dealloc() with + // a vstack tag, it means the vstack that owned this memory has been + // deleted (its destructor clears current_ and frees all segments). + // The memory pointed to by ptr is now invalid (already freed by vstack's + // destructor), so we must NOT try to free it again. + // + // This is a no-op: the memory was already freed when the vstack was deleted. + // Note: calling ::operator delete(ptr) here would be wrong + // because the memory was already freed by the owning vthread_stack. + // + // This situation occurs when: + // 1. A coroutine owns its vstack (owns_vstack_ = true) + // 2. The coroutine completes and its promise destructor runs + // 3. Promise destructor deletes the vstack (freeing all segment memory) + // 4. Then operator delete calls tagged_dealloc() -> vthread_stack::deallocate() + // 5. But current_ is now nullptr because the vstack was just deleted + } + } +#endif + + // thread-local current vthread_stack management + static vthread_stack* current() noexcept { + return current_; + } + + static void set_current(vthread_stack* s) noexcept { + current_ = s; + } + + // Instance lifecycle + vthread_stack() = default; + + ~vthread_stack() { + free_segments(); + } + + vthread_stack(const vthread_stack&) = delete; + vthread_stack& operator=(const vthread_stack&) = delete; + + // Instance allocation interface + void* push(size_t size) { + size_t aligned_size = align_up(size); + + // Check if current segment has enough space + if (current_segment_ == nullptr || + current_segment_->used + aligned_size > current_segment_->capacity) { + allocate_segment(aligned_size); + } + + void* ptr = current_segment_->data() + current_segment_->used; + current_segment_->used += aligned_size; + return ptr; + } + + void pop([[maybe_unused]] void* ptr, size_t size) noexcept { + size_t aligned_size = align_up(size); + + assert(current_segment_ != nullptr && "pop called with no segment"); + assert(current_segment_->used >= aligned_size && "pop size exceeds used"); + assert(ptr == current_segment_->data() + current_segment_->used - aligned_size && + "pop ptr does not match expected position"); + + current_segment_->used -= aligned_size; + + // If current segment is empty and has a previous segment, free current segment and backtrack + if (current_segment_->used == 0 && current_segment_->prev != nullptr) { + segment* old = current_segment_; + current_segment_ = current_segment_->prev; + ::operator delete(old); + } + } + +private: + struct segment { + segment* prev; + size_t capacity; + size_t used; + + // Flexible array member workaround: compute data pointer from end of struct + char* data() noexcept { + return reinterpret_cast(this + 1); + } + const char* data() const noexcept { + return reinterpret_cast(this + 1); + } + }; + + segment* current_segment_ = nullptr; + static constexpr size_t DEFAULT_SEGMENT_SIZE = 16384; // 16KB + static constexpr size_t ALIGNMENT = alignof(std::max_align_t); + + static constexpr size_t align_up(size_t n) noexcept { + return (n + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + } + + void allocate_segment(size_t min_payload) { + size_t payload = min_payload > DEFAULT_SEGMENT_SIZE ? min_payload : DEFAULT_SEGMENT_SIZE; + void* mem = ::operator new(sizeof(segment) + payload); + segment* seg = static_cast(mem); + seg->prev = current_segment_; + seg->capacity = payload; + seg->used = 0; + current_segment_ = seg; + } + + void free_segments() { + while (current_segment_ != nullptr) { + segment* prev = current_segment_->prev; + ::operator delete(current_segment_); + current_segment_ = prev; + } + } + + static inline thread_local vthread_stack* current_ = nullptr; +}; + +} // namespace elio::coro diff --git a/include/elio/elio.hpp b/include/elio/elio.hpp index 86157a7..2496f07 100644 --- a/include/elio/elio.hpp +++ b/include/elio/elio.hpp @@ -18,6 +18,7 @@ #include "coro/awaitable_base.hpp" #include "coro/frame.hpp" #include "coro/cancel_token.hpp" +#include "coro/vthread_stack.hpp" // Runtime scheduler #include "runtime/scheduler.hpp" @@ -26,6 +27,9 @@ #include "runtime/async_main.hpp" #include "runtime/affinity.hpp" #include "runtime/serve.hpp" +#include "runtime/spawn.hpp" +#include "runtime/blocking_pool.hpp" +#include "runtime/spawn_blocking.hpp" #include "runtime/autoscaler_config.hpp" #include "runtime/autoscaler_triggers.hpp" #include "runtime/autoscaler_actions.hpp" diff --git a/include/elio/http/http_server.hpp b/include/elio/http/http_server.hpp index 552a303..fe84ff6 100644 --- a/include/elio/http/http_server.hpp +++ b/include/elio/http/http_server.hpp @@ -270,8 +270,9 @@ class server { } // Spawn connection handler - auto handler = handle_connection(std::move(*stream_result)); - sched->spawn(handler.release()); + sched->go([this, s = std::move(*stream_result)]() mutable { + return handle_connection(std::move(s)); + }); } } @@ -306,8 +307,9 @@ class server { } // Create TLS stream and spawn handler - auto handler = handle_tls_connection(std::move(*stream_result), tls_ctx); - sched->spawn(handler.release()); + sched->go([this, s = std::move(*stream_result), &tls_ctx]() mutable { + return handle_tls_connection(std::move(s), tls_ctx); + }); } } diff --git a/include/elio/http/websocket_server.hpp b/include/elio/http/websocket_server.hpp index b0b88bd..997e102 100644 --- a/include/elio/http/websocket_server.hpp +++ b/include/elio/http/websocket_server.hpp @@ -455,8 +455,9 @@ class ws_server { } // Spawn connection handler - auto handler = handle_connection(std::move(*stream_result)); - sched->spawn(handler.release()); + sched->go([this, s = std::move(*stream_result)]() mutable { + return handle_connection(std::move(s)); + }); } } @@ -490,8 +491,9 @@ class ws_server { } // Spawn TLS connection handler - auto handler = handle_tls_connection(std::move(*stream_result), tls_ctx); - sched->spawn(handler.release()); + sched->go([this, s = std::move(*stream_result), &tls_ctx]() mutable { + return handle_tls_connection(std::move(s), tls_ctx); + }); } } diff --git a/include/elio/net/resolve.hpp b/include/elio/net/resolve.hpp index 57e9de8..1570856 100644 --- a/include/elio/net/resolve.hpp +++ b/include/elio/net/resolve.hpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -11,13 +11,10 @@ #include #include #include -#include -#include #include #include #include #include -#include #include #include @@ -186,30 +183,6 @@ inline resolve_options default_cached_resolve_options() { return opts; } -struct resolve_waiter_state { - std::vector results; - int error = 0; - runtime::scheduler* scheduler = nullptr; - std::coroutine_handle<> handle; - size_t saved_affinity = coro::NO_AFFINITY; - void* handle_address = nullptr; - - void restore_affinity() const noexcept { - if (!handle_address) { - return; - } - auto* promise = coro::get_promise_base(handle_address); - if (!promise) { - return; - } - if (saved_affinity == coro::NO_AFFINITY) { - promise->clear_affinity(); - } else { - promise->set_affinity(saved_affinity); - } - } -}; - inline bool try_parse_ipv4_literal(std::string_view host, uint16_t port, std::vector& out) { struct in_addr addr{}; @@ -249,124 +222,92 @@ inline bool try_parse_ipv6_literal(std::string_view host, uint16_t port, return true; } -class resolve_all_awaitable { -public: - resolve_all_awaitable(std::string_view host, uint16_t port, resolve_options options) - : host_(host) - , key_{std::string(host), port} - , options_(options) - , state_(std::make_shared()) { - if (host.empty() || host == "::" || host == "0.0.0.0") { - state_->results.push_back(socket_address(host, port)); - return; - } +inline coro::task> resolve_all( + std::string_view host, + uint16_t port, + resolve_options options = {}) { - if (host.find(':') != std::string_view::npos) { - try_parse_ipv6_literal(host, port, state_->results); - return; - } + std::vector results; - try_parse_ipv4_literal(host, port, state_->results); + // Handle empty host or wildcard addresses + if (host.empty() || host == "::" || host == "0.0.0.0") { + results.push_back(socket_address(host, port)); + co_return results; } - bool await_ready() const noexcept { - if (!state_->results.empty()) { - return true; + // Try parsing as IPv6 literal + if (host.find(':') != std::string_view::npos) { + if (try_parse_ipv6_literal(host, port, results)) { + co_return results; } + } - if (!options_.use_cache) { - return false; - } + // Try parsing as IPv4 literal + if (try_parse_ipv4_literal(host, port, results)) { + co_return results; + } - resolve_cache* cache = options_.cache ? options_.cache : &default_resolve_cache(); - if (cache->try_get(key_, state_->results)) { - return true; + // Check cache if enabled + resolve_cache_key key{std::string(host), port}; + if (options.use_cache) { + resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); + if (cache->try_get(key, results)) { + co_return results; } - cache->record_miss(); - return false; } - template - bool await_suspend(std::coroutine_handle awaiter) { - state_->handle = awaiter; - state_->scheduler = runtime::scheduler::current(); - state_->handle_address = awaiter.address(); - - if constexpr (std::is_base_of_v) { - state_->saved_affinity = awaiter.promise().affinity(); - auto* worker = runtime::worker_thread::current(); - if (worker) { - awaiter.promise().set_affinity(worker->worker_id()); - } - } - - auto host = host_; - auto key = key_; - auto options = options_; - auto state = state_; - - std::thread([host = std::move(host), key = std::move(key), options, state]() mutable { - struct addrinfo hints{}; - struct addrinfo* result = nullptr; - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - - std::string service = std::to_string(key.port); - int rc = getaddrinfo(host.c_str(), service.c_str(), &hints, &result); - if (rc == 0 && result) { - for (auto* current = result; current != nullptr; current = current->ai_next) { - if (current->ai_family == AF_INET6) { - auto* sa = reinterpret_cast(current->ai_addr); - state->results.push_back(socket_address(ipv6_address(*sa))); - } else if (current->ai_family == AF_INET) { - auto* sa = reinterpret_cast(current->ai_addr); - state->results.push_back(socket_address(ipv4_address(*sa))); - } - } - freeaddrinfo(result); - } - - if (state->results.empty()) { - state->error = (rc == EAI_SYSTEM) ? errno : EHOSTUNREACH; - if (options.use_cache) { - resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); - cache->store(key, {}, options.negative_ttl); + // Perform blocking DNS resolution via spawn_blocking + std::string host_str(host); + auto dns_result = co_await elio::spawn_blocking([host_str, port]() { + struct resolve_result { + std::vector addresses; + int error = 0; + }; + + resolve_result result; + struct addrinfo hints{}; + struct addrinfo* ai_result = nullptr; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + std::string service = std::to_string(port); + int rc = getaddrinfo(host_str.c_str(), service.c_str(), &hints, &ai_result); + if (rc == 0 && ai_result) { + for (auto* current = ai_result; current != nullptr; current = current->ai_next) { + if (current->ai_family == AF_INET6) { + auto* sa = reinterpret_cast(current->ai_addr); + result.addresses.push_back(socket_address(ipv6_address(*sa))); + } else if (current->ai_family == AF_INET) { + auto* sa = reinterpret_cast(current->ai_addr); + result.addresses.push_back(socket_address(ipv4_address(*sa))); } - } else if (options.use_cache) { - resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); - cache->store(key, state->results, options.positive_ttl); - } - - if (state->scheduler && state->scheduler->is_running()) { - state->scheduler->spawn(state->handle); - } else { - runtime::schedule_handle(state->handle); } - }).detach(); - - return true; - } + freeaddrinfo(ai_result); + } - std::vector await_resume() { - state_->restore_affinity(); - if (state_->results.empty()) { - errno = state_->error; + if (result.addresses.empty()) { + result.error = (rc == EAI_SYSTEM) ? errno : EHOSTUNREACH; + } + return result; + }); + + // Update cache based on result + if (options.use_cache) { + resolve_cache* cache = options.cache ? options.cache : &default_resolve_cache(); + if (dns_result.addresses.empty()) { + cache->store(key, {}, options.negative_ttl); + } else { + cache->store(key, dns_result.addresses, options.positive_ttl); } - return state_->results; } -private: - std::string host_; - resolve_cache_key key_; - resolve_options options_; - std::shared_ptr state_; -}; + // Set errno on failure + if (dns_result.addresses.empty()) { + errno = dns_result.error; + } -inline auto resolve_all(std::string_view host, - uint16_t port, - resolve_options options = {}) { - return resolve_all_awaitable(host, port, options); + co_return dns_result.addresses; } inline coro::task> resolve_hostname(std::string_view host, diff --git a/include/elio/rpc/rpc_client.hpp b/include/elio/rpc/rpc_client.hpp index e0aabc6..97635de 100644 --- a/include/elio/rpc/rpc_client.hpp +++ b/include/elio/rpc/rpc_client.hpp @@ -296,29 +296,26 @@ class rpc_client : public std::enable_shared_from_this> { } // Wait for response with timeout - // Start a timeout coroutine - auto self = this->shared_from_this(); - auto timeout_task = [](std::chrono::milliseconds ms, - std::shared_ptr pending, - coro::cancel_token tok) - -> coro::task - { - auto result = co_await time::sleep_for(ms, tok); - - // Only timeout if sleep completed normally (not cancelled) - if (result == coro::cancel_result::completed && pending->try_complete()) { - pending->timed_out = true; - pending->error = rpc_error::timeout; - pending->completion_event.set(); - } - }; - // Spawn timeout watcher auto* sched = runtime::scheduler::current(); if (sched) { - auto task = timeout_task( - std::chrono::duration_cast(timeout), pending, token); - sched->spawn(task.release()); + sched->go([ms = std::chrono::duration_cast(timeout), + p = pending, tok = token]() mutable { + return [](std::chrono::milliseconds ms, + std::shared_ptr pending, + coro::cancel_token tok) + -> coro::task + { + auto result = co_await time::sleep_for(ms, tok); + + // Only timeout if sleep completed normally (not cancelled) + if (result == coro::cancel_result::completed && pending->try_complete()) { + pending->timed_out = true; + pending->error = rpc_error::timeout; + pending->completion_event.set(); + } + }(ms, p, std::move(tok)); + }); } // Wait for completion (either response, timeout, or cancellation) @@ -405,22 +402,18 @@ class rpc_client : public std::enable_shared_from_this> { } // Setup timeout - auto self = this->shared_from_this(); - auto timeout_task = [](std::chrono::milliseconds ms, - std::shared_ptr pending) - -> coro::task - { - co_await time::sleep_for(ms); - if (pending->try_complete()) { - pending->timed_out = true; - pending->completion_event.set(); - } - }; - auto* sched = runtime::scheduler::current(); if (sched) { - auto task = timeout_task(timeout, pending); - sched->spawn(task.release()); + sched->go([ms = timeout, p = pending]() { + return [](std::chrono::milliseconds ms, std::shared_ptr p) + -> coro::task { + co_await time::sleep_for(ms); + if (p->try_complete()) { + p->timed_out = true; + p->completion_event.set(); + } + }(ms, p); + }); } // Wait for pong @@ -448,8 +441,7 @@ class rpc_client : public std::enable_shared_from_this> { auto self = this->shared_from_this(); auto* sched = runtime::scheduler::current(); if (sched) { - auto task = receive_loop(self); - sched->spawn(task.release()); + sched->go([s = self]() { return receive_loop(s); }); } } diff --git a/include/elio/rpc/rpc_server.hpp b/include/elio/rpc/rpc_server.hpp index 0f3282e..dc886ef 100644 --- a/include/elio/rpc/rpc_server.hpp +++ b/include/elio/rpc/rpc_server.hpp @@ -445,8 +445,7 @@ class rpc_server { // Spawn session handler auto* sched = runtime::scheduler::current(); if (sched) { - auto task = run_session(session); - sched->spawn(task.release()); + sched->go([this, s = session]() { return run_session(s); }); } } @@ -482,8 +481,7 @@ class rpc_server { // Spawn session handler auto* sched = runtime::scheduler::current(); if (sched) { - auto task = run_session(session); - sched->spawn(task.release()); + sched->go([this, s = session]() { return run_session(s); }); } } diff --git a/include/elio/runtime/async_main.hpp b/include/elio/runtime/async_main.hpp index d41b232..c88ba05 100644 --- a/include/elio/runtime/async_main.hpp +++ b/include/elio/runtime/async_main.hpp @@ -2,8 +2,10 @@ #include "scheduler.hpp" #include +#include #include #include +#include #include #include #include @@ -17,10 +19,15 @@ namespace elio::runtime { struct run_config { /// Number of worker threads (0 = hardware concurrency) size_t num_threads = 0; + /// Blocking thread pool size (0 = fallback to std::thread per task) + size_t blocking_threads = 4; }; namespace detail { +/// Type alias using definitions from scheduler.hpp +template using task_value_t = typename task_value::type; + /// Completion signal for async_main template struct completion_signal { @@ -84,15 +91,14 @@ struct completion_signal { }; /// Wrapper task that signals completion -template -coro::task completion_wrapper(coro::task inner, completion_signal* signal) { +template +coro::task completion_wrapper(F f, completion_signal* signal) { try { if constexpr (std::is_void_v) { - co_await std::move(inner); + co_await std::invoke(std::move(f)); signal->set_result(); } else { - T result = co_await std::move(inner); - signal->set_result(std::move(result)); + signal->set_result(co_await std::invoke(std::move(f))); } } catch (...) { signal->set_exception(std::current_exception()); @@ -101,13 +107,13 @@ coro::task completion_wrapper(coro::task inner, completion_signal* s } // namespace detail -/// Run a coroutine task to completion and return its result +/// Run a callable that returns a coroutine task to completion /// /// This function creates a scheduler, runs the given task, waits for /// completion, and returns the result. It's the recommended way to /// run async code from a synchronous context (like main()). /// -/// @param task The coroutine task to run +/// @param f The callable that returns a coroutine task /// @param config Configuration (threads) /// @return The result of the task /// @@ -119,27 +125,41 @@ coro::task completion_wrapper(coro::task inner, completion_signal* s /// } /// /// int main() { -/// return elio::run(async_main()); +/// return elio::run(async_main); /// } /// @endcode -template -T run(coro::task task, const run_config& config = {}) { + +/// Overload 1: no-arg callable + optional config +template + requires (std::invocable && detail::is_task_v>) +auto run(F&& f, const run_config& config = {}) + -> detail::task_value_t> +{ + using T = detail::task_value_t>; detail::completion_signal signal; - + size_t threads = config.num_threads; if (threads == 0) { threads = std::thread::hardware_concurrency(); if (threads == 0) threads = 1; } - - scheduler sched(threads); + + scheduler sched(threads, wait_strategy::blocking(), + config.blocking_threads); sched.start(); - - // Create wrapper that signals completion - auto wrapper = detail::completion_wrapper(std::move(task), &signal); - sched.spawn(wrapper.release()); - - // Wait for completion + + // Wrap user function + auto bound = [&f]() { return std::invoke(std::forward(f)); }; + + { + coro::detail::heap_alloc_guard guard; + auto wrapper = detail::completion_wrapper(std::move(bound), &signal); + auto handle = coro::detail::task_access::release(wrapper); + auto* root_vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(root_vstack); + sched.spawn(handle); + } + if constexpr (std::is_void_v) { signal.wait(); sched.shutdown(); @@ -150,10 +170,26 @@ T run(coro::task task, const run_config& config = {}) { } } -/// Run a coroutine task with specified number of threads -template -T run(coro::task task, size_t num_threads) { - return run(std::move(task), run_config{.num_threads = num_threads}); +/// Overload 2: (func, args...) with config first +template + requires (sizeof...(Args) > 0 && std::invocable && detail::is_task_v>) +auto run(const run_config& config, F&& f, Args&&... args) + -> detail::task_value_t> +{ + auto bound = [f = std::forward(f), + ...args = std::forward(args)]() mutable { + return std::invoke(std::move(f), std::move(args)...); + }; + return run(std::move(bound), config); +} + +/// Overload 3: (func, args...) without config +template + requires (!std::is_same_v, run_config> && std::invocable && detail::is_task_v>) +auto run(F&& f, Arg0&& arg0, Args&&... args) + -> detail::task_value_t> +{ + return run(run_config{}, std::forward(f), std::forward(arg0), std::forward(args)...); } } // namespace elio::runtime @@ -188,7 +224,7 @@ using runtime::run_config; /// @endcode #define ELIO_ASYNC_MAIN(async_main_func) \ int main(int argc, char* argv[]) { \ - return elio::run(async_main_func(argc, argv)); \ + return elio::run(async_main_func, argc, argv); \ } /// Macro for async_main that returns void (exits with 0) @@ -197,7 +233,7 @@ using runtime::run_config; /// coro::task async_main(int argc, char* argv[]) #define ELIO_ASYNC_MAIN_VOID(async_main_func) \ int main(int argc, char* argv[]) { \ - elio::run(async_main_func(argc, argv)); \ + elio::run(async_main_func, argc, argv); \ return 0; \ } @@ -207,7 +243,7 @@ using runtime::run_config; /// coro::task async_main() #define ELIO_ASYNC_MAIN_NOARGS(async_main_func) \ int main() { \ - return elio::run(async_main_func()); \ + return elio::run(async_main_func); \ } /// Macro for async_main without arguments, returning void @@ -216,6 +252,6 @@ using runtime::run_config; /// coro::task async_main() #define ELIO_ASYNC_MAIN_VOID_NOARGS(async_main_func) \ int main() { \ - elio::run(async_main_func()); \ + elio::run(async_main_func); \ return 0; \ } diff --git a/include/elio/runtime/blocking_pool.hpp b/include/elio/runtime/blocking_pool.hpp new file mode 100644 index 0000000..2a5bc6c --- /dev/null +++ b/include/elio/runtime/blocking_pool.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace elio::runtime { + +// A simple thread pool for executing blocking tasks. +// Supports both pooled mode (fixed threads) and non-pooled mode (spawn per task). +class blocking_pool { +public: + // num_threads: pool size. 0 means no pooling, each submit spawns a new thread. + explicit blocking_pool(size_t num_threads) + : num_threads_(num_threads) { + threads_.reserve(num_threads); + for (size_t i = 0; i < num_threads; ++i) { + threads_.emplace_back([this] { worker_loop(); }); + } + } + + ~blocking_pool() { + shutdown(); + } + + blocking_pool(const blocking_pool&) = delete; + blocking_pool& operator=(const blocking_pool&) = delete; + + // Submit a task for execution. Thread-safe. + // If num_threads == 0, spawns a detached thread directly. + // Otherwise enqueues and wakes one worker. + void submit(std::function task) { + if (num_threads_ == 0) { + std::thread(std::move(task)).detach(); + return; + } + { + std::lock_guard lock(mutex_); + queue_.push_back(std::move(task)); + } + cv_.notify_one(); + } + + // Graceful shutdown: signals stop, wakes all workers, joins threads. + // Pending tasks in queue are discarded. + void shutdown() { + if (stopped_.exchange(true)) return; // idempotent + cv_.notify_all(); + for (auto& t : threads_) { + if (t.joinable()) t.join(); + } + } + +private: + void worker_loop() { + while (!stopped_.load(std::memory_order_relaxed)) { + std::function task; + + // Block until task available or stopped + std::unique_lock lock(mutex_); + cv_.wait(lock, [this] { + return stopped_.load(std::memory_order_relaxed) || !queue_.empty(); + }); + if (stopped_.load(std::memory_order_relaxed) && queue_.empty()) return; + if (!queue_.empty()) { + task = std::move(queue_.front()); + queue_.pop_front(); + } + + lock.unlock(); + if (task) { + task(); + } + } + } + + std::vector threads_; + std::deque> queue_; + std::mutex mutex_; + std::condition_variable cv_; + std::atomic stopped_{false}; + size_t num_threads_; +}; + +} // namespace elio::runtime diff --git a/include/elio/runtime/scheduler.hpp b/include/elio/runtime/scheduler.hpp index 9208c7a..dc84304 100644 --- a/include/elio/runtime/scheduler.hpp +++ b/include/elio/runtime/scheduler.hpp @@ -1,17 +1,32 @@ #pragma once #include "worker_thread.hpp" +#include "blocking_pool.hpp" #include #include +#include +#include #include #include #include #include #include #include +#include namespace elio::runtime { +namespace detail { + // Type traits for task + template struct task_value; + template struct task_value> { using type = T; }; + template using task_value_t = typename task_value::type; + + template struct is_task : std::false_type {}; + template struct is_task> : std::true_type {}; + template inline constexpr bool is_task_v = is_task::value; +} // namespace detail + /// Work-stealing scheduler for coroutines class scheduler { friend class worker_thread; // Allow workers to set current_scheduler_ @@ -20,12 +35,14 @@ class scheduler { static constexpr size_t MAX_THREADS = 256; explicit scheduler(size_t num_threads = std::thread::hardware_concurrency(), - wait_strategy strategy = wait_strategy::blocking()) + wait_strategy strategy = wait_strategy::blocking(), + size_t blocking_threads = 4) : num_threads_(num_threads == 0 ? 1 : num_threads) , running_(false) , paused_(false) , spawn_index_(0) - , wait_strategy_(strategy) { + , wait_strategy_(strategy) + , blocking_pool_(std::make_unique(blocking_threads)) { size_t n = num_threads_.load(std::memory_order_relaxed); // Pre-reserve to MAX_THREADS to prevent reallocation during runtime @@ -66,7 +83,12 @@ class scheduler { return; } - // First stop all workers (sets running_=false and joins threads) + // First shutdown blocking pool (before stopping workers) + if (blocking_pool_) { + blocking_pool_->shutdown(); + } + + // Then stop all workers (sets running_=false and joins threads) for (auto& worker : workers_) { worker->stop(); } @@ -91,6 +113,12 @@ class scheduler { handle.destroy(); return; } + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + auto* promise = coro::get_promise_base(handle.address()); + if (promise) { + promise->detach_from_parent(); + } do_spawn(handle); } @@ -102,6 +130,67 @@ class scheduler { spawn(std::forward(t).release()); } + /// High-level API: fire-and-forget, spawn to this scheduler + /// @param f Callable that returns a task + /// @param args Arguments to forward to the callable + template + requires (std::invocable && detail::is_task_v>) + void go(F&& f, Args&&... args) { + coro::detail::heap_alloc_guard guard; + auto t = std::invoke(std::forward(f), std::forward(args)...); + auto handle = coro::detail::task_access::release(t); + handle.promise().detached_ = true; + auto* vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + handle.promise().detach_from_parent(); + do_spawn(handle); + } + + /// High-level API: fire-and-forget, spawn to specific worker + /// @param worker_id Target worker index + /// @param f Callable that returns a task + /// @param args Arguments to forward to the callable + template + requires (std::invocable && detail::is_task_v>) + void go_to(size_t worker_id, F&& f, Args&&... args) { + coro::detail::heap_alloc_guard guard; + auto t = std::invoke(std::forward(f), std::forward(args)...); + auto handle = coro::detail::task_access::release(t); + handle.promise().detached_ = true; + auto* vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + handle.promise().detach_from_parent(); + spawn_to(worker_id, handle); + } + + /// High-level API: spawn + join, spawn to this scheduler + /// @param f Callable that returns a task + /// @param args Arguments to forward to the callable + /// @return join_handle that can be awaited to get the result + template + requires (std::invocable && detail::is_task_v>) + auto go_joinable(F&& f, Args&&... args) + -> coro::join_handle>> + { + using T = detail::task_value_t>; + coro::detail::heap_alloc_guard guard; + auto t = std::invoke(std::forward(f), std::forward(args)...); + auto handle = coro::detail::task_access::release(t); + auto state = std::make_shared>(); + handle.promise().join_state_ = state; + auto* vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + handle.promise().detach_from_parent(); + do_spawn(handle); + return coro::join_handle(std::move(state)); + } + void spawn_to(size_t worker_id, std::coroutine_handle<> handle) { if (!handle) [[unlikely]] return; if (!running_.load(std::memory_order_relaxed)) [[unlikely]] { @@ -109,6 +198,13 @@ class scheduler { return; } + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + auto* promise = coro::get_promise_base(handle.address()); + if (promise) { + promise->detach_from_parent(); + } + size_t n = num_threads_.load(std::memory_order_acquire); workers_[worker_id % n]->schedule(handle); } @@ -214,6 +310,11 @@ class scheduler { return wait_strategy_; } + /// Get the blocking pool for spawn_blocking operations + [[nodiscard]] blocking_pool* get_blocking_pool() noexcept { + return blocking_pool_.get(); + } + private: void do_spawn(std::coroutine_handle<> handle) { // Release fence ensures all writes to the coroutine frame (including @@ -276,6 +377,9 @@ class scheduler { alignas(64) mutable std::mutex workers_mutex_; wait_strategy wait_strategy_; + // Blocking pool for spawn_blocking operations + std::unique_ptr blocking_pool_; + static inline thread_local scheduler* current_scheduler_ = nullptr; }; @@ -313,14 +417,15 @@ inline void worker_thread::stop() { if (thread_.joinable()) thread_.join(); } -/// Drain and destroy remaining tasks - only call after ALL workers have stopped +/// Final cleanup for any orphaned tasks - only call after ALL workers have stopped. +/// This is a safety net for edge cases where tasks might still exist after drain phase. inline void worker_thread::drain_remaining_tasks() noexcept { // First drain inbox to deque void* addr; while ((addr = inbox_->pop()) != nullptr) { queue_->push(addr); } - // Then destroy all tasks in the deque + // Destroy any remaining tasks (should be rare after drain phase in run()) while ((addr = queue_->pop()) != nullptr) { auto handle = std::coroutine_handle<>::from_address(addr); if (handle) { @@ -377,12 +482,25 @@ inline void worker_thread::run() { } } + // Drain phase: after running_ becomes false, continue executing all + // remaining tasks until both local queue and inbox are empty. + // This ensures shutdown() returns only when all submitted tasks have + // fully completed (including coroutine cleanup and lambda destruction). + while (true) { + drain_inbox(); + void* addr = queue_->pop_local(false); // No concurrent stealers, workers are stopping + if (!addr) break; + + auto handle = std::coroutine_handle<>::from_address(addr); + if (handle && !handle.done()) { + needs_sync_ = true; // Conservatively ensure memory visibility for drained tasks + run_task(handle); + } + } + // Clear the references when done scheduler::current_scheduler_ = nullptr; current_worker_ = nullptr; - - // Note: Cleanup of remaining tasks is handled in stop() AFTER join - // to avoid race conditions with work stealing } inline std::coroutine_handle<> worker_thread::get_next_task() noexcept { @@ -432,8 +550,20 @@ inline void worker_thread::run_task(std::coroutine_handle<> handle) noexcept { } if (!handle || handle.done()) [[unlikely]] return; - + + // Context switch: set vstack and current_frame before resume, restore after + auto* promise = coro::get_promise_base(handle.address()); + auto* prev_vstack = coro::vthread_stack::current(); + auto* prev_frame = coro::promise_base::current_frame(); + if (promise) { + coro::vthread_stack::set_current(promise->vstack()); + coro::promise_base::set_current_frame(promise); + } + handle.resume(); + + coro::vthread_stack::set_current(prev_vstack); + coro::promise_base::set_current_frame(prev_frame); tasks_executed_.fetch_add(1, std::memory_order_relaxed); update_last_task_time(); diff --git a/include/elio/runtime/serve.hpp b/include/elio/runtime/serve.hpp index e91476c..b170d4e 100644 --- a/include/elio/runtime/serve.hpp +++ b/include/elio/runtime/serve.hpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -75,9 +76,9 @@ inline coro::task wait_shutdown_signal( /// task to complete. /// /// @tparam Server Server type (must have stop() method) -/// @tparam ListenTask The awaitable returned by server.listen() +/// @tparam ListenFunc Callable that returns a listen coroutine task /// @param server Reference to the server (used to call stop()) -/// @param listen_task The listen coroutine task +/// @param listen_func Function that returns the listen coroutine task /// @param signals Signals to wait for shutdown (defaults to SIGINT, SIGTERM) /// /// Example: @@ -87,23 +88,31 @@ inline coro::task wait_shutdown_signal( /// r.get("/", handler); /// /// http::server srv(r); -/// co_await serve(srv, srv.listen(net::ipv4_address(8080))); +/// co_await serve(srv, [&]() { return srv.listen(net::ipv4_address(8080)); }); /// /// co_return 0; /// } /// /// ELIO_ASYNC_MAIN(async_main) /// @endcode -template -coro::task serve(Server& server, ListenTask listen_task, +template + requires std::invocable +coro::task serve(Server& server, ListenFunc listen_func, std::initializer_list signals = default_shutdown_signals) { // Set up signal handling signal::signal_set sigs(signals); signal::signal_fd sigfd(sigs); - // Spawn the listen task - auto listen_handle = std::move(listen_task).spawn(); + // Get the scheduler + auto* sched = runtime::scheduler::current(); + if (!sched) { + ELIO_LOG_ERROR("serve() must be called within a scheduler context"); + co_return; + } + + // Spawn the listen task as a joinable coroutine + auto listen_handle = sched->go_joinable(std::move(listen_func)); // Wait for shutdown signal auto info = co_await sigfd.wait(); @@ -153,9 +162,9 @@ coro::task serve(Server& server, ListenTask listen_task, /// When signal is received, stops all servers. /// /// @tparam Servers Variadic server types -/// @tparam ListenTasks Variadic listen task types +/// @tparam ListenFuncs Variadic listen function types /// @param servers Tuple of server references -/// @param listen_tasks Tuple of listen tasks +/// @param listen_funcs Tuple of listen functions (each returning a task) /// @param signals Signals to wait for shutdown /// /// Example: @@ -167,26 +176,33 @@ coro::task serve(Server& server, ListenTask listen_task, /// co_await serve_all( /// std::tie(http_srv, ws_srv), /// std::make_tuple( -/// http_srv.listen(addr1), -/// ws_srv.listen(addr2) +/// [&]() { return http_srv.listen(addr1); }, +/// [&]() { return ws_srv.listen(addr2); } /// ) /// ); /// } /// @endcode -template +template coro::task serve_all(std::tuple servers, - std::tuple listen_tasks, + std::tuple listen_funcs, std::initializer_list signals = default_shutdown_signals) { // Set up signal handling signal::signal_set sigs(signals); signal::signal_fd sigfd(sigs); - // Spawn all listen tasks - auto spawn_tasks = [](auto&&... tasks) { - return std::make_tuple(std::move(tasks).spawn()...); + // Get the scheduler + auto* sched = runtime::scheduler::current(); + if (!sched) { + ELIO_LOG_ERROR("serve_all() must be called within a scheduler context"); + co_return; + } + + // Spawn all listen tasks as joinable coroutines + auto spawn_tasks = [sched](auto&&... funcs) { + return std::make_tuple(sched->go_joinable(std::move(funcs))...); }; - auto handles = std::apply(spawn_tasks, std::move(listen_tasks)); + auto handles = std::apply(spawn_tasks, std::move(listen_funcs)); // Wait for shutdown signal auto info = co_await sigfd.wait(); diff --git a/include/elio/runtime/spawn.hpp b/include/elio/runtime/spawn.hpp new file mode 100644 index 0000000..a60bcc3 --- /dev/null +++ b/include/elio/runtime/spawn.hpp @@ -0,0 +1,95 @@ +#pragma once + +#include +#include +#include +#include "../coro/task.hpp" +#include "../coro/vthread_stack.hpp" +#include "scheduler.hpp" + +namespace elio { + +namespace detail { + // Type traits for task + template struct task_value; + template struct task_value> { using type = T; }; + template using task_value_t = typename task_value::type; + + template struct is_task : std::false_type {}; + template struct is_task> : std::true_type {}; + template inline constexpr bool is_task_v = is_task::value; +} // namespace detail + +/// Fire-and-forget: spawn a coroutine without awaiting its result. +/// The coroutine runs independently and self-destructs on completion. +/// +/// @tparam F Callable type that returns a task +/// @tparam Args Argument types +/// @param f Callable to invoke (must return a task) +/// @param args Arguments to forward to the callable +/// +/// Example: +/// elio::go(async_work); +/// elio::go(async_work_with_args, 1, 2, 3); +/// elio::go([&]() -> coro::task { co_await some_async_op(); }); +template + requires (std::invocable && detail::is_task_v>) +void go(F&& f, Args&&... args) { + coro::detail::heap_alloc_guard guard; + auto t = std::invoke(std::forward(f), std::forward(args)...); + + auto handle = coro::detail::task_access::release(t); + handle.promise().detached_ = true; + auto* vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + handle.promise().detach_from_parent(); + runtime::schedule_handle(handle); +} + +/// Spawn a coroutine and return a join_handle to await its result. +/// The coroutine runs concurrently and the result can be retrieved via co_await. +/// +/// @tparam F Callable type that returns a task +/// @tparam Args Argument types +/// @param f Callable to invoke (must return a task) +/// @param args Arguments to forward to the callable +/// @return join_handle that can be awaited to get the result +/// +/// Example: +/// auto handle = elio::spawn(compute_async, input); +/// auto result = co_await handle; +template + requires (std::invocable && detail::is_task_v>) +auto spawn(F&& f, Args&&... args) + -> coro::join_handle>> +{ + using T = detail::task_value_t>; + coro::detail::heap_alloc_guard guard; + auto t = std::invoke(std::forward(f), std::forward(args)...); + + auto handle = coro::detail::task_access::release(t); + auto state = std::make_shared>(); + handle.promise().join_state_ = state; + auto* vstack = new coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + // Detach from current thread's frame chain before spawning to another thread + // to avoid use-after-free when this thread creates another coroutine. + handle.promise().detach_from_parent(); + runtime::schedule_handle(handle); + return coro::join_handle(std::move(state)); +} + +} // namespace elio + +// Macros — syntactic sugar for inline lambda coroutines +// These capture by reference and wrap the expression in a lambda returning task + +/// Fire-and-forget macro for inline coroutine expressions +/// Usage: ELIO_GO(some_async_operation()) +#define ELIO_GO(...) elio::go([&]() { return __VA_ARGS__; }) + +/// Spawn macro for inline coroutine expressions, returns join_handle +/// Usage: auto h = ELIO_SPAWN(compute_async()); auto result = co_await h; +#define ELIO_SPAWN(...) elio::spawn([&]() { return __VA_ARGS__; }) diff --git a/include/elio/runtime/spawn_blocking.hpp b/include/elio/runtime/spawn_blocking.hpp new file mode 100644 index 0000000..92bde1d --- /dev/null +++ b/include/elio/runtime/spawn_blocking.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include "scheduler.hpp" +#include "blocking_pool.hpp" +#include +#include +#include +#include +#include + +namespace elio { +namespace detail { + +// State for non-void results +template +struct blocking_state { + std::optional result; + std::exception_ptr exception; +}; + +// State for void results (avoid std::optional) +template<> +struct blocking_state { + bool completed = false; + std::exception_ptr exception; +}; + +template +class blocking_awaitable { +public: + explicit blocking_awaitable(F&& f) : func_(std::forward(f)) {} + blocking_awaitable(blocking_awaitable&&) = default; + blocking_awaitable(const blocking_awaitable&) = delete; + blocking_awaitable& operator=(const blocking_awaitable&) = delete; + + bool await_ready() const noexcept { return false; } + + void await_suspend(std::coroutine_handle<> caller) { + auto* state = &state_; + // Capture scheduler pointer to ensure we resume on the right scheduler, + // not directly on the blocking pool thread. + auto* sched = runtime::get_current_scheduler(); + auto work = [state, caller, sched, f = std::move(func_)]() mutable { + try { + if constexpr (std::is_void_v) { + f(); + state->completed = true; + } else { + state->result.emplace(f()); + } + } catch (...) { + state->exception = std::current_exception(); + } + // Resume caller via scheduler to ensure it runs on the right thread. + // If no scheduler, fall back to direct resume (single-threaded case). + if (sched && sched->is_running()) { + sched->spawn(caller); + } else if (caller && !caller.done()) { + caller.resume(); + } + }; + + // Try blocking pool first, fallback to detached thread + if (sched && sched->is_running()) { + if (auto* pool = sched->get_blocking_pool()) { + pool->submit(std::move(work)); + return; + } + } + std::thread(std::move(work)).detach(); + } + + T await_resume() { + if (state_.exception) { + std::rethrow_exception(state_.exception); + } + if constexpr (std::is_void_v) { + return; + } else { + return std::move(*state_.result); + } + } + +private: + F func_; + blocking_state state_; +}; + +} // namespace detail + +/// Spawn a blocking operation on a dedicated thread pool. +/// The calling coroutine suspends until the operation completes. +/// Any exception thrown by f() is propagated to the awaiting coroutine. +/// +/// Example: +/// int fd = co_await elio::spawn_blocking([&] { +/// return ::open("/path/to/file", O_RDONLY); +/// }); +template +auto spawn_blocking(F&& f) { + using R = std::invoke_result_t>; + static_assert(!std::is_reference_v, + "spawn_blocking does not support callables returning references"); + return detail::blocking_awaitable>(std::forward(f)); +} + +} // namespace elio diff --git a/include/elio/sync/primitives.hpp b/include/elio/sync/primitives.hpp index d65f6ac..f12a1f6 100644 --- a/include/elio/sync/primitives.hpp +++ b/include/elio/sync/primitives.hpp @@ -52,7 +52,12 @@ class mutex { /// mutex's intrusive waiter list. class lock_awaitable { public: - explicit lock_awaitable(mutex& m) noexcept : mutex_(m) {} + explicit lock_awaitable(mutex& m) noexcept : mutex_(m) { + // Use release stores to ensure writes are visible to other threads + // This also helps TSAN understand the synchronization + next_.store(nullptr, std::memory_order_release); + handle_.store(nullptr, std::memory_order_release); + } bool await_ready() const noexcept { return mutex_.try_lock(); @@ -63,7 +68,7 @@ class mutex { /// true (suspend). Loops until one of these two outcomes is achieved /// via lock-free CAS. bool await_suspend(std::coroutine_handle<> h) noexcept { - handle_ = h; + handle_.store(h.address(), std::memory_order_relaxed); void* old_state = mutex_.state_.load(std::memory_order_acquire); while (true) { if (old_state == nullptr) { @@ -77,9 +82,10 @@ class mutex { // CAS failed, old_state refreshed — retry } else { // Locked — push this awaitable onto the LIFO stack - next_ = (old_state == mutex_.locked_no_waiters()) + next_.store((old_state == mutex_.locked_no_waiters()) ? nullptr - : static_cast(old_state); + : static_cast(old_state), + std::memory_order_relaxed); if (mutex_.state_.compare_exchange_weak( old_state, this, std::memory_order_release, @@ -96,8 +102,8 @@ class mutex { private: friend class mutex; mutex& mutex_; - lock_awaitable* next_{nullptr}; // intrusive LIFO linkage - std::coroutine_handle<> handle_; // handle to resume on unlock + std::atomic next_; // intrusive LIFO linkage + std::atomic handle_; // handle to resume on unlock }; /// Acquire the mutex @@ -130,13 +136,15 @@ class mutex { // Pop head waiter and transfer lock ownership to it (LIFO) auto* head = static_cast(state); - void* next_state = (head->next_ == nullptr) + auto* next = head->next_.load(std::memory_order_acquire); + void* next_state = (next == nullptr) ? locked_no_waiters() - : static_cast(head->next_); + : static_cast(next); state_.store(next_state, std::memory_order_release); // Schedule the waiter — it now holds the lock - runtime::schedule_handle(head->handle_); + auto handle_addr = head->handle_.load(std::memory_order_acquire); + runtime::schedule_handle(std::coroutine_handle<>::from_address(handle_addr)); } /// Check if mutex is currently locked diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9922f40..a498df5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,7 @@ set(TEST_SOURCES test_main.cpp unit/test_logger.cpp unit/test_virtual_stack.cpp + unit/test_vthread_stack.cpp unit/test_task.cpp unit/test_awaitable_base.cpp unit/test_chase_lev_deque.cpp diff --git a/tests/integration/test_dynamic_threads.cpp b/tests/integration/test_dynamic_threads.cpp index 016b417..fbfb736 100644 --- a/tests/integration/test_dynamic_threads.cpp +++ b/tests/integration/test_dynamic_threads.cpp @@ -29,8 +29,7 @@ TEST_CASE("Dynamic thread pool growth under load", "[dynamic_threads]") { // Spawn initial batch for (int i = 0; i < 50; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(100)); @@ -41,8 +40,7 @@ TEST_CASE("Dynamic thread pool growth under load", "[dynamic_threads]") { // Spawn more tasks for (int i = 50; i < num_tasks; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(1000)); @@ -72,8 +70,7 @@ TEST_CASE("Dynamic thread pool shrink under load", "[dynamic_threads]") { // Spawn initial batch for (int i = 0; i < 50; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(100)); @@ -84,8 +81,7 @@ TEST_CASE("Dynamic thread pool shrink under load", "[dynamic_threads]") { // Spawn more tasks for (int i = 50; i < num_tasks; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(1000)); @@ -114,8 +110,7 @@ TEST_CASE("Multiple thread pool adjustments", "[dynamic_threads]") { // Start with 2 threads for (int i = 0; i < 20; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(50)); @@ -125,8 +120,7 @@ TEST_CASE("Multiple thread pool adjustments", "[dynamic_threads]") { REQUIRE(sched.num_threads() == 4); for (int i = 0; i < 20; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(50)); @@ -136,8 +130,7 @@ TEST_CASE("Multiple thread pool adjustments", "[dynamic_threads]") { REQUIRE(sched.num_threads() == 8); for (int i = 0; i < 20; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(50)); @@ -147,8 +140,7 @@ TEST_CASE("Multiple thread pool adjustments", "[dynamic_threads]") { REQUIRE(sched.num_threads() == 4); for (int i = 0; i < 20; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(50)); @@ -158,8 +150,7 @@ TEST_CASE("Multiple thread pool adjustments", "[dynamic_threads]") { REQUIRE(sched.num_threads() == 2); for (int i = 0; i < 20; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } // Active wait for completion with timeout @@ -191,8 +182,7 @@ TEST_CASE("Thread pool growth from 1 to many", "[dynamic_threads]") { // With 1 thread, tasks execute slowly for (int i = 0; i < 50; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(100)); @@ -229,8 +219,7 @@ TEST_CASE("Thread pool maintains correctness during resize", "[dynamic_threads]" // Spawn tasks while resizing std::thread spawner([&]() { for (int i = 0; i < num_tasks; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); // Resize periodically if (i % 10 == 0) { @@ -269,8 +258,7 @@ TEST_CASE("Thread pool resize to 0 treated as 1", "[dynamic_threads]") { co_return; }; - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); std::this_thread::sleep_for(scaled_ms(100)); @@ -299,8 +287,7 @@ TEST_CASE("Rapid thread pool adjustments", "[dynamic_threads]") { // Spawn some tasks for (int i = 0; i < 10; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(20)); diff --git a/tests/integration/test_exception_propagation.cpp b/tests/integration/test_exception_propagation.cpp index e414a0d..b6a4b38 100644 --- a/tests/integration/test_exception_propagation.cpp +++ b/tests/integration/test_exception_propagation.cpp @@ -33,8 +33,7 @@ TEST_CASE("Exception propagation through single level", "[exception]") { co_return; }; - auto t = catcher(); - sched.spawn(t.release()); + sched.go(catcher); std::this_thread::sleep_for(scaled_ms(200)); @@ -79,8 +78,7 @@ TEST_CASE("Exception propagation through multiple levels", "[exception]") { co_return; }; - auto t = level1(); - sched.spawn(t.release()); + sched.go(level1); std::this_thread::sleep_for(scaled_ms(300)); @@ -111,8 +109,7 @@ TEST_CASE("Exception propagation with void tasks", "[exception]") { co_return; }; - auto t = catcher(); - sched.spawn(t.release()); + sched.go(catcher); std::this_thread::sleep_for(scaled_ms(200)); @@ -143,8 +140,7 @@ TEST_CASE("Multiple exceptions in different coroutines", "[exception]") { const int num_tasks = 10; for (int i = 0; i < num_tasks; ++i) { - auto t = catcher(i); - sched.spawn(t.release()); + sched.go([&, i]() { return catcher(i); }); } // Active wait for completion with timeout @@ -193,8 +189,7 @@ TEST_CASE("Exception in middle of chain", "[exception]") { co_return; }; - auto t = level1(); - sched.spawn(t.release()); + sched.go(level1); std::this_thread::sleep_for(scaled_ms(300)); @@ -232,8 +227,7 @@ TEST_CASE("Exception with custom exception type", "[exception]") { co_return; }; - auto t = catcher(); - sched.spawn(t.release()); + sched.go(catcher); std::this_thread::sleep_for(scaled_ms(200)); @@ -251,8 +245,7 @@ TEST_CASE("Uncaught exception in coroutine", "[exception]") { co_return; }; - auto t = thrower(); - sched.spawn(t.release()); + sched.go(thrower); // Should not crash the scheduler std::this_thread::sleep_for(scaled_ms(200)); @@ -291,8 +284,7 @@ TEST_CASE("Exception propagation preserves exception message", "[exception]") { co_return; }; - auto t = level1(); - sched.spawn(t.release()); + sched.go(level1); std::this_thread::sleep_for(scaled_ms(300)); diff --git a/tests/integration/test_parallel_tasks.cpp b/tests/integration/test_parallel_tasks.cpp index 1de83e5..5e060bf 100644 --- a/tests/integration/test_parallel_tasks.cpp +++ b/tests/integration/test_parallel_tasks.cpp @@ -25,8 +25,7 @@ TEST_CASE("Parallel task execution stress test", "[parallel]") { }; for (int i = 0; i < num_tasks; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } // Wait for completion with scaled timeout @@ -68,13 +67,11 @@ TEST_CASE("Parallel tasks with varying workloads", "[parallel]") { // Mix of light and heavy tasks for (int i = 0; i < 100; ++i) { - auto t = light_task(); - sched.spawn(t.release()); + sched.go(light_task); } for (int i = 0; i < 20; ++i) { - auto t = heavy_task(); - sched.spawn(t.release()); + sched.go(heavy_task); } std::this_thread::sleep_for(scaled_ms(1000)); @@ -107,8 +104,7 @@ TEST_CASE("Parallel tasks with dependencies", "[parallel]") { const int num_chains = 50; for (int i = 0; i < num_chains; ++i) { - auto t = stage2_task(); - sched.spawn(t.release()); + sched.go(stage2_task); } // Active wait for completion with timeout @@ -147,8 +143,7 @@ TEST_CASE("Work stealing under heavy load", "[parallel]") { // Spawn all tasks quickly for (int i = 0; i < num_tasks; ++i) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); } std::this_thread::sleep_for(scaled_ms(1500)); @@ -179,8 +174,7 @@ TEST_CASE("Concurrent spawn and execution", "[parallel]") { for (int i = 0; i < spawner_threads; ++i) { spawners.emplace_back([&]() { for (int j = 0; j < tasks_per_thread; ++j) { - auto t = task_func(); - sched.spawn(t.release()); + sched.go(task_func); std::this_thread::yield(); } }); @@ -213,8 +207,7 @@ TEST_CASE("Parallel tasks with shared atomic counter", "[parallel]") { }; for (int i = 0; i < num_tasks; ++i) { - auto t = increment_task(); - sched.spawn(t.release()); + sched.go(increment_task); } std::this_thread::sleep_for(scaled_ms(800)); @@ -250,8 +243,7 @@ TEST_CASE("Nested parallel tasks", "[parallel]") { const int num_outer = 20; for (int i = 0; i < num_outer; ++i) { - auto t = outer_task(); - sched.spawn(t.release()); + sched.go(outer_task); } std::this_thread::sleep_for(scaled_ms(800)); diff --git a/tests/integration/test_scheduler_integration.cpp b/tests/integration/test_scheduler_integration.cpp index 183e572..e846a49 100644 --- a/tests/integration/test_scheduler_integration.cpp +++ b/tests/integration/test_scheduler_integration.cpp @@ -33,8 +33,7 @@ TEST_CASE("Chained coroutines integration", "[integration]") { co_return; }; - auto t = outer(); - sched.spawn(t.release()); + sched.go(outer); // Wait for completion std::this_thread::sleep_for(scaled_ms(200)); @@ -67,8 +66,7 @@ TEST_CASE("Deep coroutine chain", "[integration]") { co_return; }; - auto t = level1(); - sched.spawn(t.release()); + sched.go(level1); std::this_thread::sleep_for(scaled_ms(300)); @@ -103,8 +101,7 @@ TEST_CASE("Parallel independent coroutines", "[integration]") { // Spawn all tasks for (int i = 0; i < num_tasks; ++i) { - auto t = task_func(i); - sched.spawn(t.release()); + sched.go([&, i]() { return task_func(i); }); } // Wait for all to complete @@ -143,8 +140,7 @@ TEST_CASE("Mixed chain and parallel coroutines", "[integration]") { co_return; }; - auto t = aggregator(); - sched.spawn(t.release()); + sched.go(aggregator); std::this_thread::sleep_for(scaled_ms(300)); @@ -179,8 +175,7 @@ TEST_CASE("Virtual stack tracking in scheduler", "[integration]") { co_return; }; - auto t = outer(); - sched.spawn(t.release()); + sched.go(outer); std::this_thread::sleep_for(scaled_ms(200)); @@ -208,8 +203,7 @@ TEST_CASE("Scheduler load distribution", "[integration]") { // Spawn all tasks at once for (int i = 0; i < num_tasks; ++i) { - auto t = heavy_task(); - sched.spawn(t.release()); + sched.go(heavy_task); } // Wait for completion diff --git a/tests/unit/test_affinity.cpp b/tests/unit/test_affinity.cpp index 55d89da..4b6c754 100644 --- a/tests/unit/test_affinity.cpp +++ b/tests/unit/test_affinity.cpp @@ -9,6 +9,28 @@ using namespace elio::runtime; using namespace elio::coro; using namespace elio::test; +// Helper to access handle from task +template +auto get_handle(task& t) { + return elio::coro::detail::task_access::handle(t); +} + +// Helper to spawn a task to scheduler +template +void spawn_task(scheduler& sched, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + sched.spawn(handle); +} + +// Helper to spawn a task to specific worker +template +void spawn_task_to(scheduler& sched, size_t worker_id, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + sched.spawn_to(worker_id, handle); +} + TEST_CASE("Affinity constants", "[affinity]") { REQUIRE(NO_AFFINITY == std::numeric_limits::max()); } @@ -19,7 +41,7 @@ TEST_CASE("Promise base affinity default", "[affinity]") { }; auto t = coro(); - auto& promise = t.handle().promise(); + auto& promise = get_handle(t).promise(); // Default should be NO_AFFINITY REQUIRE(promise.affinity() == NO_AFFINITY); @@ -32,7 +54,7 @@ TEST_CASE("Promise base affinity set/get/clear", "[affinity]") { }; auto t = coro(); - auto& promise = t.handle().promise(); + auto& promise = get_handle(t).promise(); // Set affinity promise.set_affinity(2); @@ -64,7 +86,7 @@ TEST_CASE("current_worker_id inside scheduler", "[affinity]") { }; auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); // Wait for execution auto start = std::chrono::steady_clock::now(); @@ -97,7 +119,7 @@ TEST_CASE("set_affinity awaitable binds to worker", "[affinity]") { }; auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); // Wait for execution auto start = std::chrono::steady_clock::now(); @@ -133,7 +155,7 @@ TEST_CASE("set_affinity without migration", "[affinity]") { }; auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); // Wait for execution auto start = std::chrono::steady_clock::now(); @@ -177,7 +199,7 @@ TEST_CASE("clear_affinity allows migration", "[affinity]") { }; auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); // Wait for execution auto start = std::chrono::steady_clock::now(); @@ -223,7 +245,7 @@ TEST_CASE("bind_to_current_worker pins to current", "[affinity]") { }; auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); // Wait for execution auto start = std::chrono::steady_clock::now(); @@ -275,7 +297,7 @@ TEST_CASE("Affinity prevents work stealing", "[affinity]") { // Spawn many tasks for (int i = 0; i < num_iterations; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for all to complete @@ -307,13 +329,13 @@ TEST_CASE("Affinity with spawn_to respects binding", "[affinity]") { auto t = coro(); // Explicitly spawn to worker 2 - sched.spawn_to(2, t.release()); + spawn_task_to(sched, 2, t); // Wait for execution - auto start = std::chrono::steady_clock::now(); + auto start2 = std::chrono::steady_clock::now(); while (!completed.load()) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); - if (std::chrono::steady_clock::now() - start > scaled_sec(5)) break; + if (std::chrono::steady_clock::now() - start2 > scaled_sec(5)) break; } REQUIRE(completed.load()); @@ -328,7 +350,7 @@ TEST_CASE("get_promise_base from handle address", "[affinity]") { }; auto t = coro(); - void* addr = t.handle().address(); + void* addr = get_handle(t).address(); // Should be able to extract promise_base auto* promise = get_promise_base(addr); @@ -348,7 +370,7 @@ TEST_CASE("check_affinity_allows with NO_AFFINITY", "[affinity]") { }; auto t = coro(); - void* addr = t.handle().address(); + void* addr = get_handle(t).address(); // With NO_AFFINITY, any worker should be allowed REQUIRE(check_affinity_allows(addr, 0)); @@ -374,14 +396,14 @@ TEST_CASE("Multiple tasks with different affinities", "[affinity]") { // Spawn tasks with different affinities for (size_t i = 0; i < 4; ++i) { auto t = make_coro(i); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for all to complete - auto start = std::chrono::steady_clock::now(); + auto start3 = std::chrono::steady_clock::now(); while (completed.load() < 4) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); - if (std::chrono::steady_clock::now() - start > scaled_sec(5)) break; + if (std::chrono::steady_clock::now() - start3 > scaled_sec(5)) break; } REQUIRE(completed.load() == 4); diff --git a/tests/unit/test_awaitable_base.cpp b/tests/unit/test_awaitable_base.cpp index 7452ae2..852b26f 100644 --- a/tests/unit/test_awaitable_base.cpp +++ b/tests/unit/test_awaitable_base.cpp @@ -5,6 +5,18 @@ using namespace elio::coro; +// Helper to access handle from task +template +auto get_handle(task& t) { + return elio::coro::detail::task_access::handle(t); +} + +// Helper to access promise value from task +template +auto& get_promise(task& t) { + return get_handle(t).promise(); +} + // Test awaitable that returns an int class test_awaitable : public awaitable_base { public: @@ -68,7 +80,7 @@ TEST_CASE("awaitable_base forwards await_suspend", "[awaitable_base]") { }; auto t = coro(); - t.handle().resume(); + get_handle(t).resume(); REQUIRE(suspended == true); } @@ -82,9 +94,9 @@ TEST_CASE("awaitable_base forwards await_resume with return value", "[awaitable_ }; auto t = coro(); - t.handle().resume(); + get_handle(t).resume(); - REQUIRE(t.handle().promise().value_.value() == 123); + REQUIRE(get_promise(t).value_.value() == 123); } TEST_CASE("awaitable_base works with void return", "[awaitable_base]") { @@ -96,9 +108,9 @@ TEST_CASE("awaitable_base works with void return", "[awaitable_base]") { }; auto t = coro(); - t.handle().resume(); + get_handle(t).resume(); - REQUIRE(t.handle().done()); + REQUIRE(get_handle(t).done()); } TEST_CASE("awaitable_base in nested coroutines", "[awaitable_base]") { @@ -114,10 +126,10 @@ TEST_CASE("awaitable_base in nested coroutines", "[awaitable_base]") { }; auto t = outer(); - t.handle().resume(); + get_handle(t).resume(); // Should be (50 * 2) + 10 = 110 - REQUIRE(t.handle().promise().value_.value() == 110); + REQUIRE(get_promise(t).value_.value() == 110); } // Test awaitable with symmetric transfer @@ -150,7 +162,7 @@ TEST_CASE("awaitable_base supports symmetric transfer", "[awaitable_base]") { }; auto t = coro(); - t.handle().resume(); + get_handle(t).resume(); - REQUIRE(t.handle().promise().value_.value() == 999); + REQUIRE(get_promise(t).value_.value() == 999); } diff --git a/tests/unit/test_frame_allocator.cpp b/tests/unit/test_frame_allocator.cpp deleted file mode 100644 index aaf60c4..0000000 --- a/tests/unit/test_frame_allocator.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -#include -#include - -using namespace elio::coro; - -TEST_CASE("Frame allocator basic allocation/deallocation", "[frame_allocator]") { - // Test basic allocation - void* ptr = frame_allocator::allocate(128); - REQUIRE(ptr != nullptr); - - // Test deallocation (same thread) - frame_allocator::deallocate(ptr, 128); -} - -TEST_CASE("Frame allocator cross-thread deallocation", "[frame_allocator]") { - // This test verifies the lookup-then-push race is fixed by holding - // the registry mutex during the entire operation - - std::atomic allocated_ptr{nullptr}; - std::atomic ready{false}; - - // Thread 1: Allocate a frame - std::thread allocator_thread([&]() { - void* ptr = frame_allocator::allocate(128); - allocated_ptr.store(ptr); - - // Signal that we have a frame ready - ready.store(true); - - // Wait for the other thread to deallocate - while (ready.load()) { - std::this_thread::yield(); - } - }); - - // Wait for allocation - while (!ready.load()) { - std::this_thread::yield(); - } - - // Thread 2: Deallocate from different thread (simulating work-stealing) - std::thread deallocator_thread([&]() { - // Wait for the frame to be ready - while (!ready.load()) { - std::this_thread::yield(); - } - - void* ptr = allocated_ptr.load(); - if (ptr) { - // This should trigger cross-thread deallocation - // The race condition fix holds the mutex during lookup-then-push - frame_allocator::deallocate(ptr, 128); - } - - // Signal completion - ready.store(false); - }); - - allocator_thread.join(); - deallocator_thread.join(); -} - -TEST_CASE("Frame allocator multiple frames", "[frame_allocator]") { - constexpr size_t num_frames = 100; - std::vector frames; - - // Allocate multiple frames - for (size_t i = 0; i < num_frames; ++i) { - void* ptr = frame_allocator::allocate(128); - REQUIRE(ptr != nullptr); - frames.push_back(ptr); - } - - // Deallocate all frames - for (void* ptr : frames) { - frame_allocator::deallocate(ptr, 128); - } -} - -TEST_CASE("Frame allocator size limits", "[frame_allocator]") { - // Test allocation within size limit - void* small = frame_allocator::allocate(256); - REQUIRE(small != nullptr); - frame_allocator::deallocate(small, 256); - - // Test allocation above size limit falls back to malloc - void* large = frame_allocator::allocate(512); - REQUIRE(large != nullptr); - frame_allocator::deallocate(large, 512); -} diff --git a/tests/unit/test_io.cpp b/tests/unit/test_io.cpp index 0bcd8d3..d2e30fc 100644 --- a/tests/unit/test_io.cpp +++ b/tests/unit/test_io.cpp @@ -106,8 +106,7 @@ TEST_CASE("Pipe read/write with epoll", "[io][epoll][pipe]") { completed = true; }; - auto t = read_coro(); - sched.spawn(t.release()); + sched.go(read_coro); // Wait for completion for (int i = 0; i < 100 && !completed; ++i) { @@ -181,8 +180,7 @@ TEST_CASE("Socket pair with epoll", "[io][epoll][socket]") { completed = true; }; - auto t = recv_coro(); - sched.spawn(t.release()); + sched.go(recv_coro); for (int i = 0; i < 100 && !completed; ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -239,8 +237,7 @@ TEST_CASE("Cancel operation", "[io][epoll][cancel]") { completed = true; }; - auto t = recv_coro(); - sched.spawn(t.release()); + sched.go(recv_coro); // Wait for coroutine to start for (int i = 0; i < 100 && !started; ++i) { @@ -288,11 +285,8 @@ TEST_CASE("Multiple concurrent operations", "[io][epoll][concurrent]") { completed++; }; - auto t1 = recv_coro1(); - auto t2 = recv_coro2(); - - sched.spawn(t1.release()); - sched.spawn(t2.release()); + sched.go(recv_coro1); + sched.go(recv_coro2); // Wait until both complete for (int i = 0; i < 100 && completed < 2; ++i) { @@ -341,8 +335,7 @@ TEST_CASE("epoll_backend registers fd before data available", "[io][epoll][regis completed = true; }; - auto t = recv_coro(); - sched.spawn(t.release()); + sched.go(recv_coro); // Wait for coroutine to start and register the operation for (int i = 0; i < 100 && !started; ++i) { @@ -399,11 +392,8 @@ TEST_CASE("epoll_backend handles multiple pending ops on same fd", "[io][epoll][ completed++; }; - auto t1 = recv_coro1(); - auto t2 = recv_coro2(); - - sched.spawn(t1.release()); - sched.spawn(t2.release()); + sched.go(recv_coro1); + sched.go(recv_coro2); // Give operations time to be registered std::this_thread::sleep_for(std::chrono::milliseconds(50)); @@ -452,8 +442,7 @@ TEST_CASE("epoll_backend write operation registration", "[io][epoll][write]") { completed = true; }; - auto t = send_coro(); - sched.spawn(t.release()); + sched.go(send_coro); // Wait for completion for (int i = 0; i < 100 && !completed; ++i) { @@ -577,8 +566,7 @@ TEST_CASE("UDS listener bind and accept", "[uds][listener]") { co_return; }; - auto t = accept_coro(); - sched.spawn(t.release()); + sched.go(accept_coro); // Wait for completion for (int i = 0; i < 200 && !accepted; ++i) { @@ -626,11 +614,8 @@ TEST_CASE("UDS connect", "[uds][connect]") { co_return; }; - auto accept_task = accept_coro(); - auto connect_task = connect_coro(); - - sched.spawn(accept_task.release()); - sched.spawn(connect_task.release()); + sched.go(accept_coro); + sched.go(connect_coro); // Wait until both complete for (int i = 0; i < 200 && (!server_accepted || !client_connected); ++i) { @@ -675,10 +660,8 @@ TEST_CASE("UDS stream read/write", "[uds][stream]") { co_return; }; - auto accept_task = accept_coro(); - auto connect_task = connect_coro(); - sched.spawn(accept_task.release()); - sched.spawn(connect_task.release()); + sched.go(accept_coro); + sched.go(connect_coro); for (int i = 0; i < 200 && setup_complete < 2; ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -706,10 +689,8 @@ TEST_CASE("UDS stream read/write", "[uds][stream]") { read_done = true; }; - auto write_task = write_coro(); - auto read_task = read_coro(); - sched.spawn(write_task.release()); - sched.spawn(read_task.release()); + sched.go(write_coro); + sched.go(read_coro); for (int i = 0; i < 200 && (!write_done || !read_done); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -742,10 +723,8 @@ TEST_CASE("UDS stream read/write", "[uds][stream]") { read_done = true; }; - auto write_task = write_coro(); - auto read_task = read_coro(); - sched.spawn(write_task.release()); - sched.spawn(read_task.release()); + sched.go(write_coro); + sched.go(read_coro); for (int i = 0; i < 200 && (!write_done || !read_done); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -818,16 +797,13 @@ TEST_CASE("UDS multiple concurrent connections", "[uds][concurrent]") { co_return; }; - auto a0 = accept0(); auto a1 = accept1(); auto a2 = accept2(); - auto c0 = connect0(); auto c1 = connect1(); auto c2 = connect2(); - // Start all coroutines - sched.spawn(a0.release()); - sched.spawn(a1.release()); - sched.spawn(a2.release()); - sched.spawn(c0.release()); - sched.spawn(c1.release()); - sched.spawn(c2.release()); + sched.go(accept0); + sched.go(accept1); + sched.go(accept2); + sched.go(connect0); + sched.go(connect1); + sched.go(connect2); // Wait until all connections are made for (int i = 0; i < 500 && (accepts_done < NUM_CLIENTS || connects_done < NUM_CLIENTS); ++i) { @@ -887,10 +863,8 @@ TEST_CASE("UDS filesystem socket", "[uds][filesystem]") { co_return; }; - auto accept_task = accept_coro(); - auto connect_task = connect_coro(); - sched.spawn(accept_task.release()); - sched.spawn(connect_task.release()); + sched.go(accept_coro); + sched.go(connect_coro); for (int i = 0; i < 200 && (!connected || !accepted); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -941,8 +915,7 @@ TEST_CASE("UDS echo test", "[uds][echo]") { server_done = true; }; - auto server_task = server_coro(); - sched.spawn(server_task.release()); + sched.go(server_coro); // Client in a thread (to avoid coroutine complexity) std::thread client_thread([&]() { @@ -1268,10 +1241,8 @@ TEST_CASE("TCP IPv6 listener and connect", "[tcp][ipv6][integration]") { co_return; }; - auto accept_task = accept_coro(); - auto connect_task = connect_coro(); - sched.spawn(accept_task.release()); - sched.spawn(connect_task.release()); + sched.go(accept_coro); + sched.go(connect_coro); for (int i = 0; i < 200 && (!accepted || !connected); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -1302,8 +1273,7 @@ TEST_CASE("TCP connect regression avoids double connect", "[tcp][connect][regres constexpr int kAttempts = 64; for (int i = 0; i < kAttempts; ++i) { - auto t = tcp_connect_regression_attempt(port, connected, failed, first_error); - sched.spawn(t.release()); + sched.go([&]() { return tcp_connect_regression_attempt(port, connected, failed, first_error); }); } for (int i = 0; i < 500 && (connected + failed) < kAttempts; ++i) { @@ -1325,8 +1295,7 @@ TEST_CASE("explicit hostname resolution", "[tcp][address][dns]") { std::optional resolved; std::atomic done{false}; - auto task = resolve_hostname_attempt("localhost", 80, resolved, done); - sched.spawn(task.release()); + sched.go([&]() { return resolve_hostname_attempt("localhost", 80, resolved, done); }); for (int i = 0; i < 200 && !done.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -1360,11 +1329,9 @@ TEST_CASE("tcp_connect hostname resolution uses cache", "[tcp][connect][dns][cac auto stats_before = default_resolve_cache().stats(); - auto accept_task = accept_n_connections(*listener, 2, accepted); - sched.spawn(accept_task.release()); + sched.go([&]() { return accept_n_connections(*listener, 2, accepted); }); - auto first_task = tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); - sched.spawn(first_task.release()); + sched.go([&]() { return tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); }); for (int i = 0; i < 300 && connected.load(std::memory_order_relaxed) < 1; ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -1372,8 +1339,7 @@ TEST_CASE("tcp_connect hostname resolution uses cache", "[tcp][connect][dns][cac auto stats_after_first = default_resolve_cache().stats(); - auto second_task = tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); - sched.spawn(second_task.release()); + sched.go([&]() { return tcp_connect_hostname_attempt("localhost", port, connected, failed, first_error); }); for (int i = 0; i < 300 && (accepted.load(std::memory_order_relaxed) < 2 || connected.load(std::memory_order_relaxed) < 2 @@ -1424,17 +1390,15 @@ TEST_CASE("resolve_options can disable cache", "[tcp][dns][cache][config]") { std::atomic done_first{false}; std::atomic done_second{false}; - auto first = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_first, done_first); - sched.spawn(first.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); }); for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } - auto second = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_second, done_second); - sched.spawn(second.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); }); for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -1469,17 +1433,15 @@ TEST_CASE("resolve_options can use custom cache instance", "[tcp][dns][cache][co std::atomic done_first{false}; std::atomic done_second{false}; - auto first = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_first, done_first); - sched.spawn(first.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); }); for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } - auto second = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_second, done_second); - sched.spawn(second.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); }); for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -1517,17 +1479,15 @@ TEST_CASE("resolve_options ttl controls cache expiry", "[tcp][dns][cache][config std::atomic done_first{false}; std::atomic done_second{false}; - auto first = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_first, done_first); - sched.spawn(first.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_first, done_first); }); for (int i = 0; i < 200 && !done_first.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } - auto second = resolve_all_attempt_with_options( - "localhost", 80, options, resolved_second, done_second); - sched.spawn(second.release()); + sched.go([&]() { return resolve_all_attempt_with_options( + "localhost", 80, options, resolved_second, done_second); }); for (int i = 0; i < 200 && !done_second.load(std::memory_order_relaxed); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); diff --git a/tests/unit/test_scheduler.cpp b/tests/unit/test_scheduler.cpp index 91bd846..08f77a5 100644 --- a/tests/unit/test_scheduler.cpp +++ b/tests/unit/test_scheduler.cpp @@ -9,6 +9,16 @@ using namespace elio::runtime; using namespace elio::coro; using namespace elio::test; +// Helper to spawn a task to scheduler +template +void spawn_task(scheduler& sched, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + auto* vstack = new elio::coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + sched.spawn(handle); +} + TEST_CASE("Scheduler construction", "[scheduler]") { scheduler sched(4); REQUIRE(sched.num_threads() == 4); @@ -38,7 +48,7 @@ TEST_CASE("Scheduler spawn and execute simple coroutine", "[scheduler]") { }; auto t = coro(); - sched.spawn(t.release()); // Transfer ownership to scheduler + spawn_task(sched, t); // Transfer ownership to scheduler // Wait for execution std::this_thread::sleep_for(scaled_ms(100)); @@ -78,7 +88,7 @@ TEST_CASE("Scheduler spawn multiple coroutines", "[scheduler]") { // Spawn many tasks - scheduler takes ownership via release() for (int i = 0; i < num_tasks; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Active wait for completion with timeout @@ -117,7 +127,7 @@ TEST_CASE("Scheduler work stealing occurs", "[scheduler]") { // Spawn many tasks quickly - scheduler takes ownership for (int i = 0; i < num_tasks; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for all to complete @@ -151,7 +161,7 @@ TEST_CASE("Scheduler dynamic thread pool growth", "[scheduler]") { for (int i = 0; i < 50; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } std::this_thread::sleep_for(scaled_ms(200)); @@ -179,7 +189,7 @@ TEST_CASE("Scheduler dynamic thread pool shrink", "[scheduler]") { for (int i = 0; i < 50; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } std::this_thread::sleep_for(scaled_ms(200)); @@ -202,7 +212,7 @@ TEST_CASE("Scheduler statistics", "[scheduler]") { for (int i = 0; i < num_tasks; ++i) { auto t = coro(); - sched.spawn(t.release()); + spawn_task(sched, t); } std::this_thread::sleep_for(scaled_ms(200)); @@ -245,8 +255,8 @@ TEST_CASE("Scheduler handles spawn before start", "[scheduler]") { auto t = coro(); // Should not crash, but task won't execute (scheduler not running) - // We still need to release() since spawn stores the handle - sched.spawn(t.release()); + // We still need to spawn since it stores the handle + spawn_task(sched, t); // Now start - but the task was already queued sched.start(); diff --git a/tests/unit/test_signalfd.cpp b/tests/unit/test_signalfd.cpp index 070e857..68ca201 100644 --- a/tests/unit/test_signalfd.cpp +++ b/tests/unit/test_signalfd.cpp @@ -14,6 +14,16 @@ using namespace elio::runtime; using namespace elio::io; using namespace std::chrono_literals; +// Helper to spawn a task to scheduler +template +void spawn_task(scheduler& sched, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + auto* vstack = new elio::coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + sched.spawn(handle); +} + TEST_CASE("signal_set basic operations", "[signal][signal_set]") { SECTION("default constructor creates empty set") { signal_set sigs; @@ -213,7 +223,7 @@ TEST_CASE("signal_fd async wait", "[signal][signal_fd]") { { auto t = wait_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Give the coroutine time to start and enter wait @@ -265,7 +275,7 @@ TEST_CASE("signal_fd multiple signals", "[signal][signal_fd]") { { auto t = wait_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } std::this_thread::sleep_for(50ms); @@ -384,7 +394,7 @@ TEST_CASE("wait_signal convenience function", "[signal][wait_signal]") { { auto t = wait_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } std::this_thread::sleep_for(50ms); diff --git a/tests/unit/test_sync.cpp b/tests/unit/test_sync.cpp index 58a5db6..5058caf 100644 --- a/tests/unit/test_sync.cpp +++ b/tests/unit/test_sync.cpp @@ -12,6 +12,16 @@ using namespace elio::sync; using namespace elio::coro; using namespace elio::runtime; +// Helper to spawn a task to scheduler +template +void spawn_task(scheduler& sched, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + auto* vstack = new elio::coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + sched.spawn(handle); +} + TEST_CASE("mutex basic operations", "[sync][mutex]") { mutex m; @@ -35,7 +45,7 @@ TEST_CASE("mutex basic operations", "[sync][mutex]") { TEST_CASE("mutex with coroutines", "[sync][mutex][coro]") { mutex m; - int counter = 0; + std::atomic counter{0}; std::atomic completed{0}; scheduler sched(2); @@ -43,19 +53,18 @@ TEST_CASE("mutex with coroutines", "[sync][mutex][coro]") { auto increment_task = [&]() -> task { co_await m.lock(); - int temp = counter; - std::this_thread::yield(); // Give other coroutines a chance - counter = temp + 1; + // Use fetch_add for atomic increment + counter.fetch_add(1, std::memory_order_relaxed); m.unlock(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; - // Create and spawn tasks - use release() to transfer ownership to scheduler + // Create and spawn tasks - use spawn_task helper to transfer ownership to scheduler // We track completion via the atomic counter constexpr int NUM_TASKS = 10; for (int i = 0; i < NUM_TASKS; ++i) { auto t = increment_task(); - sched.spawn(t.release()); // Transfer ownership - scheduler will manage lifetime + spawn_task(sched, t); // Transfer ownership - scheduler will manage lifetime } // Wait for completion @@ -200,12 +209,12 @@ TEST_CASE("channel with coroutines", "[sync][channel][coro]") { scheduler sched(2); sched.start(); - // Use release() to transfer ownership to scheduler + // Use spawn_task helper to transfer ownership to scheduler { auto p = producer(); auto c = consumer(); - sched.spawn(p.release()); - sched.spawn(c.release()); + spawn_task(sched, p); + spawn_task(sched, c); } // Wait for completion @@ -307,11 +316,11 @@ TEST_CASE("shared_mutex with coroutines", "[sync][shared_mutex][coro]") { // Spawn readers and writers for (int i = 0; i < NUM_READERS; ++i) { auto t = reader_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } for (int i = 0; i < NUM_WRITERS; ++i) { auto t = writer_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for completion @@ -399,7 +408,7 @@ TEST_CASE("spinlock with coroutines", "[sync][spinlock][coro]") { constexpr int NUM_TASKS = 10; for (int i = 0; i < NUM_TASKS; ++i) { auto t = increment_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } for (int i = 0; i < 200 && completed < NUM_TASKS; ++i) { @@ -456,7 +465,7 @@ TEST_CASE("condition_variable has_waiters", "[sync][condvar]") { TEST_CASE("condition_variable with mutex notify_one", "[sync][condvar][coro]") { mutex mtx; condition_variable cv; - bool ready = false; + std::atomic ready{false}; std::atomic completed{0}; scheduler sched(2); @@ -464,31 +473,31 @@ TEST_CASE("condition_variable with mutex notify_one", "[sync][condvar][coro]") { auto waiter = [&]() -> task { co_await mtx.lock(); - while (!ready) { + while (!ready.load(std::memory_order_acquire)) { co_await co_await cv.wait(mtx); } mtx.unlock(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; auto notifier = [&]() -> task { co_await mtx.lock(); - ready = true; + ready.store(true, std::memory_order_release); mtx.unlock(); cv.notify_one(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; { auto w = waiter(); - sched.spawn(w.release()); + spawn_task(sched, w); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); { auto n = notifier(); - sched.spawn(n.release()); + spawn_task(sched, n); } for (int i = 0; i < 200 && completed < 2; ++i) { @@ -498,13 +507,13 @@ TEST_CASE("condition_variable with mutex notify_one", "[sync][condvar][coro]") { sched.shutdown(); REQUIRE(completed == 2); - REQUIRE(ready); + REQUIRE(ready.load()); } TEST_CASE("condition_variable with mutex notify_all", "[sync][condvar][coro]") { mutex mtx; condition_variable cv; - bool ready = false; + std::atomic ready{false}; std::atomic completed{0}; scheduler sched(4); @@ -514,30 +523,30 @@ TEST_CASE("condition_variable with mutex notify_all", "[sync][condvar][coro]") { auto waiter = [&]() -> task { co_await mtx.lock(); - while (!ready) { + while (!ready.load(std::memory_order_acquire)) { co_await co_await cv.wait(mtx); } mtx.unlock(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; for (int i = 0; i < NUM_WAITERS; ++i) { auto w = waiter(); - sched.spawn(w.release()); + spawn_task(sched, w); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); auto notifier = [&]() -> task { co_await mtx.lock(); - ready = true; + ready.store(true, std::memory_order_release); mtx.unlock(); cv.notify_all(); co_return; }; { auto n = notifier(); - sched.spawn(n.release()); + spawn_task(sched, n); } for (int i = 0; i < 300 && completed < NUM_WAITERS; ++i) { @@ -552,7 +561,7 @@ TEST_CASE("condition_variable with mutex notify_all", "[sync][condvar][coro]") { TEST_CASE("condition_variable with spinlock", "[sync][condvar][coro]") { spinlock sl; condition_variable cv; - bool ready = false; + std::atomic ready{false}; std::atomic completed{0}; scheduler sched(2); @@ -560,32 +569,32 @@ TEST_CASE("condition_variable with spinlock", "[sync][condvar][coro]") { auto waiter = [&]() -> task { sl.lock(); - while (!ready) { + while (!ready.load(std::memory_order_acquire)) { co_await cv.wait(sl); } sl.unlock(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; auto notifier = [&]() -> task { sl.lock(); - ready = true; + ready.store(true, std::memory_order_release); sl.unlock(); cv.notify_one(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); co_return; }; { auto w = waiter(); - sched.spawn(w.release()); + spawn_task(sched, w); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); { auto n = notifier(); - sched.spawn(n.release()); + spawn_task(sched, n); } for (int i = 0; i < 200 && completed < 2; ++i) { @@ -599,7 +608,7 @@ TEST_CASE("condition_variable with spinlock", "[sync][condvar][coro]") { TEST_CASE("condition_variable unlocked", "[sync][condvar][coro]") { condition_variable cv; - bool ready = false; + std::atomic ready{false}; std::atomic completed{0}; // Single worker: all coroutines run on the same thread @@ -607,29 +616,29 @@ TEST_CASE("condition_variable unlocked", "[sync][condvar][coro]") { sched.start(); auto waiter = [&]() -> task { - while (!ready) { + while (!ready.load(std::memory_order_acquire)) { co_await cv.wait_unlocked(); } - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; auto notifier = [&]() -> task { - ready = true; + ready.store(true, std::memory_order_release); cv.notify_one(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); co_return; }; { auto w = waiter(); - sched.spawn(w.release()); + spawn_task(sched, w); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); { auto n = notifier(); - sched.spawn(n.release()); + spawn_task(sched, n); } for (int i = 0; i < 200 && completed < 2; ++i) { @@ -644,7 +653,7 @@ TEST_CASE("condition_variable unlocked", "[sync][condvar][coro]") { TEST_CASE("condition_variable notify_one wakes exactly one", "[sync][condvar][coro]") { mutex mtx; condition_variable cv; - int phase = 0; + std::atomic phase{0}; std::atomic woken{0}; std::atomic completed{0}; @@ -655,17 +664,17 @@ TEST_CASE("condition_variable notify_one wakes exactly one", "[sync][condvar][co auto waiter = [&]() -> task { co_await mtx.lock(); - while (phase == 0) { + while (phase.load(std::memory_order_acquire) == 0) { co_await co_await cv.wait(mtx); } - woken++; + woken.fetch_add(1, std::memory_order_relaxed); mtx.unlock(); - completed++; + completed.fetch_add(1, std::memory_order_relaxed); }; for (int i = 0; i < NUM_WAITERS; ++i) { auto w = waiter(); - sched.spawn(w.release()); + spawn_task(sched, w); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -673,14 +682,14 @@ TEST_CASE("condition_variable notify_one wakes exactly one", "[sync][condvar][co // Set condition and notify exactly one auto notifier = [&]() -> task { co_await mtx.lock(); - phase = 1; + phase.store(1, std::memory_order_release); mtx.unlock(); cv.notify_one(); co_return; }; { auto n = notifier(); - sched.spawn(n.release()); + spawn_task(sched, n); } // Wait for exactly one to wake @@ -750,12 +759,12 @@ TEST_CASE("condition_variable producer-consumer", "[sync][condvar][coro]") { { auto c = consumer(); - sched.spawn(c.release()); + spawn_task(sched, c); } std::this_thread::sleep_for(std::chrono::milliseconds(20)); { auto p = producer(); - sched.spawn(p.release()); + spawn_task(sched, p); } for (int i = 0; i < 300 && completed < 2; ++i) { diff --git a/tests/unit/test_task.cpp b/tests/unit/test_task.cpp index 82587bd..bb2c908 100644 --- a/tests/unit/test_task.cpp +++ b/tests/unit/test_task.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include "../test_main.cpp" // For scaled timeouts @@ -10,6 +11,12 @@ using namespace elio::coro; using namespace elio::runtime; using namespace elio::test; +// Helper: access handle from immovable task (for testing only) +template +auto get_handle(task& t) { + return elio::coro::detail::task_access::handle(t); +} + // Helper: Simple coroutine that returns a value task simple_return_value() { co_return 42; @@ -39,50 +46,53 @@ task nested_outer() { TEST_CASE("task construction and destruction", "[task]") { { auto t = simple_return_value(); - REQUIRE(t.handle() != nullptr); + REQUIRE(get_handle(t) != nullptr); } // Task should destroy handle in destructor } -TEST_CASE("task move semantics", "[task]") { - auto t1 = simple_return_value(); - auto h1 = t1.handle(); - REQUIRE(h1 != nullptr); - - auto t2 = std::move(t1); - REQUIRE(t1.handle() == nullptr); // Moved-from - REQUIRE(t2.handle() == h1); // Moved-to +TEST_CASE("task is non-movable", "[task]") { + // Verify task is non-movable and non-copyable + STATIC_REQUIRE_FALSE(std::is_move_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_move_assignable_v>); + STATIC_REQUIRE_FALSE(std::is_copy_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_copy_assignable_v>); + + STATIC_REQUIRE_FALSE(std::is_move_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_move_assignable_v>); + STATIC_REQUIRE_FALSE(std::is_copy_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_copy_assignable_v>); } TEST_CASE("task co_return value", "[task]") { auto t = simple_return_value(); // Start the coroutine - t.handle().resume(); + get_handle(t).resume(); // The promise should have the value - REQUIRE(t.handle().promise().value_.has_value()); - REQUIRE(t.handle().promise().value_.value() == 42); + REQUIRE(get_handle(t).promise().value_.has_value()); + REQUIRE(get_handle(t).promise().value_.value() == 42); } TEST_CASE("task co_return void", "[task]") { auto t = simple_void(); // Start the coroutine - t.handle().resume(); + get_handle(t).resume(); // Should complete without error - REQUIRE(t.handle().done()); + REQUIRE(get_handle(t).done()); } TEST_CASE("task stores exception", "[task]") { auto t = throwing_coroutine(); // Start the coroutine - t.handle().resume(); + get_handle(t).resume(); // The promise should have an exception - REQUIRE(t.handle().promise().exception() != nullptr); + REQUIRE(get_handle(t).promise().exception() != nullptr); } TEST_CASE("task co_await basic", "[task]") { @@ -94,18 +104,18 @@ TEST_CASE("task co_await basic", "[task]") { }; auto t = outer(); - t.handle().resume(); + get_handle(t).resume(); // The outer task should have result 43 - REQUIRE(t.handle().promise().value_.value() == 43); + REQUIRE(get_handle(t).promise().value_.value() == 43); } TEST_CASE("task nested co_await", "[task]") { auto t = nested_outer(); - t.handle().resume(); + get_handle(t).resume(); // The outer coroutine should return 20 (10 * 2) - REQUIRE(t.handle().promise().value_.value() == 20); + REQUIRE(get_handle(t).promise().value_.value() == 20); } TEST_CASE("task exception propagation via co_await", "[task]") { @@ -120,32 +130,54 @@ TEST_CASE("task exception propagation via co_await", "[task]") { }; auto t = outer(); - t.handle().resume(); + get_handle(t).resume(); // Should complete without unhandled exception - REQUIRE(t.handle().done()); + REQUIRE(get_handle(t).done()); } -TEST_CASE("task virtual stack integration", "[task]") { - auto inner = []() -> task { - // Inside inner coroutine, virtual stack should be at least 1 deep - size_t depth = get_stack_depth(); - REQUIRE(depth >= 1); - co_return 100; - }; +TEST_CASE("task virtual stack integration", "[task][.integration]") { + // This test requires scheduler context, run with elio::run() + scheduler sched(2); + sched.start(); + + std::atomic passed{false}; - auto outer = [&]() -> task { - size_t outer_depth = get_stack_depth(); - int result = co_await inner(); - size_t inner_depth = get_stack_depth(); + auto test_coro = []() -> task { + auto inner = []() -> task { + // Inside inner coroutine, virtual stack should be at least 1 deep + size_t depth = get_stack_depth(); + REQUIRE(depth >= 1); + co_return 100; + }; + + auto outer = [&]() -> task { + size_t outer_depth = get_stack_depth(); + int result = co_await inner(); + size_t after_depth = get_stack_depth(); + + // After co_await, we should be back to outer depth + REQUIRE(after_depth == outer_depth); + co_return result; + }; - // After co_await, we should be back to outer depth - REQUIRE(inner_depth == outer_depth); - co_return result; + int result = co_await outer(); + REQUIRE(result == 100); + co_return; }; - auto t = outer(); - t.handle().resume(); + auto driver = [&]() -> task { + co_await test_coro(); + passed.store(true); + }; + + elio::go(driver); + + std::this_thread::sleep_for(scaled_ms(200)); + + sched.shutdown(); + + REQUIRE(passed.load()); } TEST_CASE("task multiple levels", "[task]") { @@ -160,9 +192,9 @@ TEST_CASE("task multiple levels", "[task]") { }; auto t = level1(); - t.handle().resume(); + get_handle(t).resume(); - REQUIRE(t.handle().promise().value_.value() == 3); + REQUIRE(get_handle(t).promise().value_.value() == 3); } TEST_CASE("task exception propagation", "[task]") { @@ -181,15 +213,15 @@ TEST_CASE("task exception propagation", "[task]") { }; auto t = catcher(); - t.handle().resume(); - REQUIRE(t.handle().done()); + get_handle(t).resume(); + REQUIRE(get_handle(t).done()); } // ============================================================================ -// Tests for new task spawning API: go(), spawn(), join_handle +// Tests for new task spawning API: elio::go(), elio::spawn(), join_handle // ============================================================================ -TEST_CASE("task::go() spawns fire-and-forget task", "[task][spawn]") { +TEST_CASE("elio::go() spawns fire-and-forget task", "[task][spawn]") { scheduler sched(2); sched.start(); @@ -200,8 +232,8 @@ TEST_CASE("task::go() spawns fire-and-forget task", "[task][spawn]") { co_return; }; - // Use go() to spawn fire-and-forget - coro().go(); + // Use elio::go() to spawn fire-and-forget + elio::go(coro); // Wait for execution std::this_thread::sleep_for(scaled_ms(100)); @@ -211,7 +243,7 @@ TEST_CASE("task::go() spawns fire-and-forget task", "[task][spawn]") { sched.shutdown(); } -TEST_CASE("task::go() spawns fire-and-forget task with value", "[task][spawn]") { +TEST_CASE("elio::go() spawns fire-and-forget task with value", "[task][spawn]") { scheduler sched(2); sched.start(); @@ -222,7 +254,7 @@ TEST_CASE("task::go() spawns fire-and-forget task with value", "[task][spaw co_return 42; // Value is discarded in fire-and-forget }; - coro().go(); + elio::go(coro); std::this_thread::sleep_for(scaled_ms(100)); @@ -231,7 +263,7 @@ TEST_CASE("task::go() spawns fire-and-forget task with value", "[task][spaw sched.shutdown(); } -TEST_CASE("task::spawn() returns joinable handle", "[task][spawn][join_handle]") { +TEST_CASE("elio::spawn() returns joinable handle", "[task][spawn][join_handle]") { scheduler sched(2); sched.start(); @@ -242,13 +274,13 @@ TEST_CASE("task::spawn() returns joinable handle", "[task][spawn][join_handle]") }; auto driver = [&]() -> task { - auto handle = compute().spawn(); + auto handle = elio::spawn(compute); int result = co_await handle; REQUIRE(result == 100); completed.store(true); }; - driver().go(); + elio::go(driver); std::this_thread::sleep_for(scaled_ms(200)); @@ -257,7 +289,7 @@ TEST_CASE("task::spawn() returns joinable handle", "[task][spawn][join_handle]") sched.shutdown(); } -TEST_CASE("task::spawn() returns joinable handle", "[task][spawn][join_handle]") { +TEST_CASE("elio::spawn() with void task returns joinable handle", "[task][spawn][join_handle]") { scheduler sched(2); sched.start(); @@ -270,13 +302,13 @@ TEST_CASE("task::spawn() returns joinable handle", "[task][spawn][join_han }; auto driver = [&]() -> task { - auto handle = work().spawn(); + auto handle = elio::spawn(work); co_await handle; REQUIRE(counter.load() == 1); completed.store(true); }; - driver().go(); + elio::go(driver); std::this_thread::sleep_for(scaled_ms(200)); @@ -298,7 +330,7 @@ TEST_CASE("join_handle propagates exceptions", "[task][spawn][join_handle]") { auto catcher = [&]() -> task { try { - auto handle = thrower().spawn(); + auto handle = elio::spawn(thrower); co_await handle; FAIL("Should have thrown"); } catch (const std::runtime_error& e) { @@ -307,7 +339,7 @@ TEST_CASE("join_handle propagates exceptions", "[task][spawn][join_handle]") { } }; - catcher().go(); + elio::go(catcher); std::this_thread::sleep_for(scaled_ms(200)); @@ -316,7 +348,7 @@ TEST_CASE("join_handle propagates exceptions", "[task][spawn][join_handle]") { sched.shutdown(); } -TEST_CASE("multiple spawn() tasks run concurrently", "[task][spawn][join_handle]") { +TEST_CASE("multiple elio::spawn() tasks run concurrently", "[task][spawn][join_handle]") { scheduler sched(4); sched.start(); @@ -336,9 +368,9 @@ TEST_CASE("multiple spawn() tasks run concurrently", "[task][spawn][join_handle] }; auto driver = [&]() -> task { - auto h1 = work().spawn(); - auto h2 = work().spawn(); - auto h3 = work().spawn(); + auto h1 = elio::spawn(work); + auto h2 = elio::spawn(work); + auto h3 = elio::spawn(work); co_await h1; co_await h2; @@ -347,7 +379,7 @@ TEST_CASE("multiple spawn() tasks run concurrently", "[task][spawn][join_handle] completed.store(true); }; - driver().go(); + elio::go(driver); std::this_thread::sleep_for(scaled_ms(500)); @@ -370,7 +402,7 @@ TEST_CASE("join_handle::is_ready() reflects completion state", "[task][spawn][jo }; auto driver = [&]() -> task { - auto handle = slow_task().spawn(); + auto handle = elio::spawn(slow_task); // Initially not ready bool was_not_ready = !handle.is_ready(); @@ -384,7 +416,7 @@ TEST_CASE("join_handle::is_ready() reflects completion state", "[task][spawn][jo test_passed.store(was_not_ready && is_now_ready && result == 42); }; - driver().go(); + elio::go(driver); std::this_thread::sleep_for(scaled_ms(300)); @@ -393,7 +425,7 @@ TEST_CASE("join_handle::is_ready() reflects completion state", "[task][spawn][jo sched.shutdown(); } -TEST_CASE("scheduler::spawn() accepts task directly", "[scheduler][spawn]") { +TEST_CASE("elio::go() works with scheduler context", "[scheduler][spawn]") { scheduler sched(2); sched.start(); @@ -404,8 +436,8 @@ TEST_CASE("scheduler::spawn() accepts task directly", "[scheduler][spawn]") { co_return; }; - // New API: spawn task directly without calling release() - sched.spawn(coro()); + // Use elio::go() to spawn fire-and-forget task + elio::go(coro); std::this_thread::sleep_for(scaled_ms(100)); @@ -414,7 +446,7 @@ TEST_CASE("scheduler::spawn() accepts task directly", "[scheduler][spawn]") { sched.shutdown(); } -TEST_CASE("scheduler::spawn() accepts task directly", "[scheduler][spawn]") { +TEST_CASE("elio::go() works with task", "[scheduler][spawn]") { scheduler sched(2); sched.start(); @@ -425,7 +457,7 @@ TEST_CASE("scheduler::spawn() accepts task directly", "[scheduler][spawn]") co_return 99; }; - sched.spawn(coro()); + elio::go(coro); std::this_thread::sleep_for(scaled_ms(100)); diff --git a/tests/unit/test_timer.cpp b/tests/unit/test_timer.cpp index dee502f..485a027 100644 --- a/tests/unit/test_timer.cpp +++ b/tests/unit/test_timer.cpp @@ -11,6 +11,22 @@ using namespace elio::coro; using namespace elio::runtime; using namespace std::chrono_literals; +// Helper to access handle from task +template +auto get_handle(task& t) { + return elio::coro::detail::task_access::handle(t); +} + +// Helper to spawn a task to scheduler +template +void spawn_task(scheduler& sched, task& t) { + elio::coro::detail::heap_alloc_guard guard; + auto handle = elio::coro::detail::task_access::release(t); + auto* vstack = new elio::coro::vthread_stack(); + handle.promise().set_vstack_owner(vstack); + sched.spawn(handle); +} + TEST_CASE("sleep_for basic", "[time][sleep]") { std::atomic completed{false}; @@ -31,7 +47,7 @@ TEST_CASE("sleep_for basic", "[time][sleep]") { { auto t = sleep_task(); - sched.spawn(t.release()); // Transfer ownership to scheduler + spawn_task(sched, t); // Transfer ownership to scheduler } // Wait for completion @@ -53,7 +69,7 @@ TEST_CASE("sleep_for zero duration", "[time][sleep]") { }; auto t = sleep_task(); - t.handle().resume(); + get_handle(t).resume(); REQUIRE(completed); } @@ -76,8 +92,8 @@ TEST_CASE("yield execution", "[time][yield]") { { auto t1 = yield_task(); auto t2 = yield_task(); - sched.spawn(t1.release()); - sched.spawn(t2.release()); + spawn_task(sched, t1); + spawn_task(sched, t2); } // Wait for completion @@ -114,7 +130,7 @@ TEST_CASE("multiple sleeps sequential", "[time][sleep]") { { auto t = multi_sleep(); - sched.spawn(t.release()); + spawn_task(sched, t); } for (int i = 0; i < 100 && !completed; ++i) { @@ -145,7 +161,7 @@ TEST_CASE("sleep_until", "[time][sleep]") { { auto t = sleep_until_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } for (int i = 0; i < 100 && !completed; ++i) { @@ -167,7 +183,7 @@ TEST_CASE("sleep_until past time", "[time][sleep]") { }; auto t = past_sleep(); - t.handle().resume(); + get_handle(t).resume(); REQUIRE(completed); } @@ -189,7 +205,7 @@ TEST_CASE("cancellable sleep - normal completion", "[time][sleep][cancel]") { { auto t = sleep_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for completion without cancelling @@ -226,7 +242,7 @@ TEST_CASE("cancellable sleep - cancelled early", "[time][sleep][cancel]") { { auto t = sleep_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait a bit then cancel @@ -268,7 +284,7 @@ TEST_CASE("cancellable sleep - already cancelled token", "[time][sleep][cancel]" { auto t = sleep_task(); - sched.spawn(t.release()); + spawn_task(sched, t); } // Wait for completion diff --git a/tests/unit/test_vthread_stack.cpp b/tests/unit/test_vthread_stack.cpp new file mode 100644 index 0000000..af9f255 --- /dev/null +++ b/tests/unit/test_vthread_stack.cpp @@ -0,0 +1,543 @@ +#include +#include +#include +#include +#include +#include +#include +#include "../test_main.cpp" // For scaled timeouts + +using namespace elio::coro; +using namespace elio::runtime; +using namespace elio::test; + +// ============================================================================ +// vthread_stack basic allocation/deallocation (LIFO correctness) +// ============================================================================ + +TEST_CASE("vthread_stack basic allocation and deallocation", "[vthread_stack]") { + vthread_stack stack; + + // Allocate several blocks + void* p1 = stack.push(64); + void* p2 = stack.push(128); + void* p3 = stack.push(256); + + REQUIRE(p1 != nullptr); + REQUIRE(p2 != nullptr); + REQUIRE(p3 != nullptr); + + // All pointers should be different + REQUIRE(p1 != p2); + REQUIRE(p2 != p3); + REQUIRE(p1 != p3); + + // Pop in reverse order (LIFO) + stack.pop(p3, 256); + stack.pop(p2, 128); + stack.pop(p1, 64); +} + +TEST_CASE("vthread_stack LIFO order verification", "[vthread_stack]") { + vthread_stack stack; + + // Push multiple allocations and store their addresses + std::vector> allocs; + for (size_t i = 1; i <= 10; ++i) { + size_t size = i * 16; + void* p = stack.push(size); + allocs.push_back({p, size}); + } + + // Pop in reverse order - should not fail assertions + for (auto it = allocs.rbegin(); it != allocs.rend(); ++it) { + stack.pop(it->first, it->second); + } +} + +// ============================================================================ +// Segment growth test +// ============================================================================ + +TEST_CASE("vthread_stack segment growth", "[vthread_stack]") { + vthread_stack stack; + + // Default segment size is 16KB, allocate more to trigger new segment + constexpr size_t large_size = 8192; // 8KB each + + void* p1 = stack.push(large_size); + void* p2 = stack.push(large_size); + void* p3 = stack.push(large_size); // This should trigger a new segment + + REQUIRE(p1 != nullptr); + REQUIRE(p2 != nullptr); + REQUIRE(p3 != nullptr); + + // All should be valid allocations + // Pop in reverse order + stack.pop(p3, large_size); + stack.pop(p2, large_size); + stack.pop(p1, large_size); +} + +TEST_CASE("vthread_stack oversized allocation", "[vthread_stack]") { + vthread_stack stack; + + // Allocate larger than default segment size + constexpr size_t huge_size = 32768; // 32KB > 16KB default + + void* p = stack.push(huge_size); + REQUIRE(p != nullptr); + + stack.pop(p, huge_size); +} + +// ============================================================================ +// vthread_stack static API (thread-local current) +// ============================================================================ + +TEST_CASE("vthread_stack thread-local current", "[vthread_stack]") { + // Initially no current stack + REQUIRE(vthread_stack::current() == nullptr); + + vthread_stack stack; + vthread_stack::set_current(&stack); + REQUIRE(vthread_stack::current() == &stack); + + vthread_stack::set_current(nullptr); + REQUIRE(vthread_stack::current() == nullptr); +} + +TEST_CASE("vthread_stack thread-local isolation", "[vthread_stack]") { + vthread_stack main_stack; + vthread_stack::set_current(&main_stack); + + std::atomic worker_isolated{false}; + + std::thread worker([&]() { + // Worker thread should have no current stack + worker_isolated = (vthread_stack::current() == nullptr); + + vthread_stack worker_stack; + vthread_stack::set_current(&worker_stack); + + // Worker's current should be different from main's + worker_isolated = worker_isolated && (vthread_stack::current() != &main_stack); + + vthread_stack::set_current(nullptr); + }); + + worker.join(); + + REQUIRE(worker_isolated.load()); + REQUIRE(vthread_stack::current() == &main_stack); + + vthread_stack::set_current(nullptr); +} + +// ============================================================================ +// task on-site co_await evaluation +// ============================================================================ + +namespace { +task compute(int x) { + co_return x * 2; +} + +task test_basic_await_impl() { + int val = co_await compute(21); + REQUIRE(val == 42); +} +} + +TEST_CASE("task co_await basic", "[vthread_stack]") { + elio::run(test_basic_await_impl); +} + +// ============================================================================ +// task symmetric test +// ============================================================================ + +namespace { +task void_work() { + co_return; +} + +task test_void_await_impl() { + co_await void_work(); + // If we reach here, void await worked +} +} + +TEST_CASE("task co_await", "[vthread_stack]") { + elio::run(test_void_await_impl); +} + +// ============================================================================ +// Nested call chain: LIFO correctness for multi-layer co_await +// ============================================================================ + +namespace { +task level3() { co_return 1; } +task level2() { co_return co_await level3() + 1; } +task level1() { co_return co_await level2() + 1; } + +task test_nested_impl() { + int result = co_await level1(); + REQUIRE(result == 3); // 1 + 1 + 1 +} +} + +TEST_CASE("nested co_await chain LIFO", "[vthread_stack]") { + elio::run(test_nested_impl); +} + +// ============================================================================ +// elio::go(func) — no-argument function +// ============================================================================ + +TEST_CASE("elio::go() with no-arg function", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic done{false}; + + auto work = [&done]() -> task { + done.store(true); + co_return; + }; + + elio::go(work); + + // Wait for completion + std::this_thread::sleep_for(scaled_ms(100)); + + REQUIRE(done.load()); + + sched.shutdown(); +} + +// ============================================================================ +// elio::go(func, args...) — with arguments +// ============================================================================ + +namespace { +task work_with_args(std::atomic* counter, int increment) { + counter->fetch_add(increment); + co_return; +} +} + +TEST_CASE("elio::go() with arguments", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic counter{0}; + + elio::go(work_with_args, &counter, 10); + + std::this_thread::sleep_for(scaled_ms(100)); + + REQUIRE(counter.load() == 10); + + sched.shutdown(); +} + +// ============================================================================ +// elio::spawn(func) — returns join_handle +// ============================================================================ + +TEST_CASE("elio::spawn() returns join_handle", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic completed{false}; + + auto driver = [&]() -> task { + auto h = elio::spawn(compute, 21); + int result = co_await h; + REQUIRE(result == 42); + completed.store(true); + }; + + elio::go(driver); + + std::this_thread::sleep_for(scaled_ms(200)); + + REQUIRE(completed.load()); + + sched.shutdown(); +} + +// ============================================================================ +// elio::spawn(func, args...) — with arguments +// ============================================================================ + +namespace { +task add_values(int a, int b) { + co_return a + b; +} +} + +TEST_CASE("elio::spawn() with arguments", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic completed{false}; + + auto driver = [&]() -> task { + auto h = elio::spawn(add_values, 10, 20); + int result = co_await h; + REQUIRE(result == 30); + completed.store(true); + }; + + elio::go(driver); + + std::this_thread::sleep_for(scaled_ms(200)); + + REQUIRE(completed.load()); + + sched.shutdown(); +} + +// ============================================================================ +// ELIO_GO(expr) / ELIO_SPAWN(expr) macro forms +// ============================================================================ + +TEST_CASE("ELIO_GO macro", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic done{false}; + + auto work = [&done]() -> task { + done.store(true); + co_return; + }; + + ELIO_GO(work()); + + std::this_thread::sleep_for(scaled_ms(100)); + + REQUIRE(done.load()); + + sched.shutdown(); +} + +TEST_CASE("ELIO_SPAWN macro", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic completed{false}; + + auto driver = [&]() -> task { + auto h = ELIO_SPAWN(compute(21)); + int result = co_await h; + REQUIRE(result == 42); + completed.store(true); + }; + + elio::go(driver); + + std::this_thread::sleep_for(scaled_ms(200)); + + REQUIRE(completed.load()); + + sched.shutdown(); +} + +// ============================================================================ +// elio::run(func) — entry execution +// ============================================================================ + +TEST_CASE("elio::run() executes task to completion", "[vthread_stack]") { + auto main_task = []() -> task { + co_return 42; + }; + + int result = elio::run(main_task); + REQUIRE(result == 42); +} + +TEST_CASE("elio::run() with void task", "[vthread_stack]") { + std::atomic executed{false}; + + auto main_task = [&executed]() -> task { + executed.store(true); + co_return; + }; + + elio::run(main_task); + REQUIRE(executed.load()); +} + +// ============================================================================ +// Mixed scenario: elio::go + internal co_await chain +// ============================================================================ + +TEST_CASE("elio::go with internal co_await chain", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic final_result{0}; + + auto complex_task = [&]() -> task { + // Multi-level co_await chain inside a go'd task + int r1 = co_await level1(); // Returns 3 + int r2 = co_await compute(r1); // 3 * 2 = 6 + int r3 = co_await add_values(r2, 4); // 6 + 4 = 10 + final_result.store(r3); + }; + + elio::go(complex_task); + + std::this_thread::sleep_for(scaled_ms(200)); + + REQUIRE(final_result.load() == 10); + + sched.shutdown(); +} + +// ============================================================================ +// Independent vstack isolation verification +// ============================================================================ + +TEST_CASE("independent vstack isolation between spawned coroutines", "[vthread_stack][spawn]") { + scheduler sched(4); + sched.start(); + + // Each spawned coroutine should have its own vstack + std::atomic unique_vstacks{0}; + std::atomic completed{0}; + std::vector observed_vstacks; + std::mutex vstacks_mutex; + + auto worker = [&]([[maybe_unused]] int id) -> task { + auto* my_vstack = vthread_stack::current(); + + { + std::lock_guard lock(vstacks_mutex); + // Check if this vstack was seen before + bool is_unique = true; + for (auto* vs : observed_vstacks) { + if (vs == my_vstack) { + is_unique = false; + break; + } + } + if (is_unique && my_vstack != nullptr) { + observed_vstacks.push_back(my_vstack); + unique_vstacks.fetch_add(1); + } + } + + completed.fetch_add(1); + co_return; + }; + + // Spawn multiple tasks + constexpr int num_tasks = 10; + for (int i = 0; i < num_tasks; ++i) { + elio::go([&worker, i]() { return worker(i); }); + } + + // Wait for all to complete + while (completed.load() < num_tasks) { + std::this_thread::sleep_for(scaled_ms(10)); + } + + // Each task should have had a unique vstack + REQUIRE(unique_vstacks.load() == num_tasks); + + sched.shutdown(); +} + +// ============================================================================ +// Exception propagation +// ============================================================================ + +namespace { +task throwing_task() { + throw std::runtime_error("test exception"); + co_return 0; +} +} + +TEST_CASE("exception propagation through co_await", "[vthread_stack]") { + auto test_task = []() -> task { + bool caught = false; + try { + co_await throwing_task(); + } catch (const std::runtime_error& e) { + REQUIRE(std::string(e.what()) == "test exception"); + caught = true; + } + REQUIRE(caught); + }; + + elio::run(test_task); +} + +TEST_CASE("exception propagation through spawn", "[vthread_stack][spawn]") { + scheduler sched(2); + sched.start(); + + std::atomic caught_exception{false}; + + auto catcher = [&]() -> task { + try { + auto h = elio::spawn(throwing_task); + co_await h; + } catch (const std::runtime_error& e) { + REQUIRE(std::string(e.what()) == "test exception"); + caught_exception.store(true); + } + }; + + elio::go(catcher); + + std::this_thread::sleep_for(scaled_ms(200)); + + REQUIRE(caught_exception.load()); + + sched.shutdown(); +} + +// ============================================================================ +// task immovability compile-time verification +// ============================================================================ + +TEST_CASE("task is non-movable and non-copyable", "[vthread_stack]") { + STATIC_REQUIRE_FALSE(std::is_move_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_move_assignable_v>); + STATIC_REQUIRE_FALSE(std::is_copy_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_copy_assignable_v>); + + STATIC_REQUIRE_FALSE(std::is_move_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_move_assignable_v>); + STATIC_REQUIRE_FALSE(std::is_copy_constructible_v>); + STATIC_REQUIRE_FALSE(std::is_copy_assignable_v>); +} + +// ============================================================================ +// Deep nested co_await test +// ============================================================================ + +namespace { +task deep_recursion(int depth) { + if (depth <= 0) { + co_return 1; + } + co_return co_await deep_recursion(depth - 1) + 1; +} +} + +TEST_CASE("deep nested co_await chain", "[vthread_stack]") { + auto test = []() -> task { + int result = co_await deep_recursion(10); + REQUIRE(result == 11); // 10 levels + 1 base + }; + + elio::run(test); +}