From 04f4cedddf94af3b9ef25dc5e5b5966884b0edfd Mon Sep 17 00:00:00 2001
From: MOZGIII <mozgiii@mzg.io>
Date: Wed, 25 Feb 2026 17:54:39 +0400
Subject: [PATCH 1/5] Split out ir parser into a separate crate

---
 Cargo.lock                                             | 10 ++++++++++
 Cargo.toml                                             |  2 ++
 crates/fuzzer/Cargo.toml                               |  1 +
 crates/fuzzer/src/harness.rs                           |  2 +-
 crates/ir-parser/Cargo.toml                            |  8 ++++++++
 .../waymark_core/ir_parser.rs => ir-parser/src/lib.rs} |  2 +-
 crates/waymark/Cargo.toml                              |  1 +
 crates/waymark/src/bin/soak-harness.rs                 |  2 +-
 crates/waymark/src/waymark_core/cli/smoke.rs           |  2 +-
 crates/waymark/src/waymark_core/mod.rs                 |  1 -
 10 files changed, 26 insertions(+), 5 deletions(-)
 create mode 100644 crates/ir-parser/Cargo.toml
 rename crates/{waymark/src/waymark_core/ir_parser.rs => ir-parser/src/lib.rs} (99%)

diff --git a/Cargo.lock b/Cargo.lock
index e57393b9..ef968a32 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3367,6 +3367,7 @@ dependencies = [
  "tracing-subscriber",
  "uuid",
  "waymark-dag",
+ "waymark-ir-parser",
  "waymark-observability-macros",
  "waymark-proto",
 ]
@@ -3397,6 +3398,15 @@ dependencies = [
  "uuid",
  "waymark",
  "waymark-dag",
+ "waymark-ir-parser",
+]
+
+[[package]]
+name = "waymark-ir-parser"
+version = "0.1.0"
+dependencies = [
+ "regex",
+ "waymark-proto",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index ee36ff5d..e7fd75f8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ members = ["crates/*"]
 [workspace.dependencies]
 waymark = { path = "crates/waymark" }
 waymark-dag = { path = "crates/dag" }
+waymark-ir-parser = { path = "crates/ir-parser" }
 waymark-proto = { path = "crates/proto" }
 
 anyhow = "1"
@@ -12,6 +13,7 @@ clap = "4.5"
 proptest = "1.9"
 prost = "0.12"
 prost-types = "0.12"
+regex = "1"
 rustc-hash = "2"
 serde = "1"
 serde_json = "1"
diff --git a/crates/fuzzer/Cargo.toml b/crates/fuzzer/Cargo.toml
index e31f9970..4e0fd1e0 100644
--- a/crates/fuzzer/Cargo.toml
+++ b/crates/fuzzer/Cargo.toml
@@ -14,3 +14,4 @@ uuid = { workspace = true, features = ["serde", "v4"] }
 tokio = { workspace = true }
 waymark = { workspace = true }
 waymark-dag = { workspace = true }
+waymark-ir-parser = { workspace = true }
diff --git a/crates/fuzzer/src/harness.rs b/crates/fuzzer/src/harness.rs
index 242d2924..59dc38c2 100644
--- a/crates/fuzzer/src/harness.rs
+++ b/crates/fuzzer/src/harness.rs
@@ -15,11 +15,11 @@ use waymark::backends::{
     MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
 };
 use waymark::messages::ast as ir;
-use waymark::waymark_core::ir_parser::parse_program;
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
 use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError};
 use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
 
 pub async fn run_case(case_index: usize, case: &GeneratedCase) -> Result<()> {
     let program = parse_program(case.source.trim()).map_err(|err| {
diff --git a/crates/ir-parser/Cargo.toml b/crates/ir-parser/Cargo.toml
new file mode 100644
index 00000000..fee9b094
--- /dev/null
+++ b/crates/ir-parser/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-ir-parser"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+waymark-proto = { workspace = true }
+regex = { workspace = true }
diff --git a/crates/waymark/src/waymark_core/ir_parser.rs b/crates/ir-parser/src/lib.rs
similarity index 99%
rename from crates/waymark/src/waymark_core/ir_parser.rs
rename to crates/ir-parser/src/lib.rs
index a3f2bbf7..1af43324 100644
--- a/crates/waymark/src/waymark_core/ir_parser.rs
+++ b/crates/ir-parser/src/lib.rs
@@ -2,7 +2,7 @@
 
 use std::fmt;
 
-use crate::messages::ast as ir;
+use waymark_proto::ast as ir;
 
 /// Raised when parsing the IR source representation fails.
 #[derive(Debug, Clone)]
diff --git a/crates/waymark/Cargo.toml b/crates/waymark/Cargo.toml
index d04b394a..83c06cd3 100644
--- a/crates/waymark/Cargo.toml
+++ b/crates/waymark/Cargo.toml
@@ -19,6 +19,7 @@ path = "src/bin/smoke.rs"
 [dependencies]
 waymark-proto = { workspace = true, features = ["serde", "client", "server"] }
 waymark-dag = { workspace = true }
+waymark-ir-parser = { workspace = true }
 
 anyhow = "1"
 axum = "0.8"
diff --git a/crates/waymark/src/bin/soak-harness.rs b/crates/waymark/src/bin/soak-harness.rs
index 3503fe94..d4e77388 100644
--- a/crates/waymark/src/bin/soak-harness.rs
+++ b/crates/waymark/src/bin/soak-harness.rs
@@ -34,9 +34,9 @@ use waymark::backends::{
 };
 use waymark::db;
 use waymark::messages::ast as ir;
-use waymark::waymark_core::ir_parser::parse_program;
 use waymark::waymark_core::runner::RunnerState;
 use waymark_dag::{DAG, convert_to_dag};
+use waymark_ir_parser::parse_program;
 
 const DEFAULT_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
 const DEFAULT_WORKFLOW_NAME: &str = "waymark_soak_timeout_mix_v1";
diff --git a/crates/waymark/src/waymark_core/cli/smoke.rs b/crates/waymark/src/waymark_core/cli/smoke.rs
index 3625e952..abd34109 100644
--- a/crates/waymark/src/waymark_core/cli/smoke.rs
+++ b/crates/waymark/src/waymark_core/cli/smoke.rs
@@ -18,11 +18,11 @@ use crate::backends::{
 use crate::messages::ast as ir;
 use crate::waymark_core::dag_viz::render_dag_image;
 use crate::waymark_core::ir_format::format_program;
-use crate::waymark_core::ir_parser::parse_program;
 use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
 use crate::waymark_core::runner::RunnerState;
 use crate::workers::{PythonWorkerConfig, RemoteWorkerPool};
 use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
 
 #[derive(Parser, Debug)]
 #[command(name = "waymark-smoke", about = "Smoke check core-python components.")]
diff --git a/crates/waymark/src/waymark_core/mod.rs b/crates/waymark/src/waymark_core/mod.rs
index 85f0c008..5e3b9090 100644
--- a/crates/waymark/src/waymark_core/mod.rs
+++ b/crates/waymark/src/waymark_core/mod.rs
@@ -4,7 +4,6 @@ pub mod cli;
 pub mod commit_barrier;
 pub mod dag_viz;
 pub mod ir_format;
-pub mod ir_parser;
 pub mod lock;
 pub mod runloop;
 pub mod runner;

From 3bc4b79bc9f0b3dad6528612127c5cadd5d076ba Mon Sep 17 00:00:00 2001
From: MOZGIII <mozgiii@mzg.io>
Date: Wed, 25 Feb 2026 21:37:23 +0400
Subject: [PATCH 2/5] Annotate prost for cargo-shear exclusion

---
 crates/proto/Cargo.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/crates/proto/Cargo.toml b/crates/proto/Cargo.toml
index 277e7df3..79ea21bf 100644
--- a/crates/proto/Cargo.toml
+++ b/crates/proto/Cargo.toml
@@ -3,6 +3,9 @@ name = "waymark-proto"
 version = "0.1.0"
 edition = "2024"
 
+[package.metadata.cargo-shear]
+ignored = ["prost"]
+
 [dependencies]
 prost = "0.12"
 prost-types = "0.12"

From e0bb14fb2e20e30ea4c839a634cb13f9a1eed246 Mon Sep 17 00:00:00 2001
From: MOZGIII <mozgiii@mzg.io>
Date: Wed, 25 Feb 2026 17:55:14 +0400
Subject: [PATCH 3/5] Split out runner, runner state and backends into separate
 crates

---
 Cargo.lock                                    |  158 +-
 Cargo.toml                                    |   21 +
 crates/backend-memory/Cargo.toml              |   36 +
 crates/backend-memory/src/core_backend.rs     |  158 +
 .../src/garbage_collector_backend.rs          |   15 +
 crates/backend-memory/src/lib.rs              |  111 +
 .../backend-memory/src/scheduler_backend.rs   |  209 ++
 crates/backend-memory/src/webapp_backend.rs   |  294 ++
 .../src/worker_status_backend.rs              |   13 +
 .../src/workflow_registry_backend.rs          |   58 +
 crates/backends-core/Cargo.toml               |   15 +
 crates/backends-core/src/lib.rs               |   29 +
 crates/core-backend/Cargo.toml                |   14 +
 crates/core-backend/src/data.rs               |  150 +
 crates/core-backend/src/lib.rs                |   58 +
 crates/garbage-collector-backend/Cargo.toml   |    9 +
 crates/garbage-collector-backend/src/lib.rs   |   20 +
 crates/observability-macros/src/lib.rs        |    4 +-
 crates/observability/Cargo.toml               |    8 +
 crates/observability/src/lib.rs               |    8 +
 crates/runner-state/Cargo.toml                |   16 +
 crates/runner-state/src/lib.rs                |    5 +
 crates/runner-state/src/state.rs              | 2206 ++++++++++++
 crates/runner-state/src/util.rs               |   12 +
 crates/runner-state/src/value_visitor.rs      |  533 +++
 crates/runner/Cargo.toml                      |   24 +
 crates/runner/src/executor.rs                 | 3015 +++++++++++++++++
 crates/runner/src/expression_evaluator.rs     | 1056 ++++++
 crates/runner/src/lib.rs                      |   12 +
 crates/runner/src/replay.rs                   |  659 ++++
 crates/runner/src/retry.rs                    |  137 +
 crates/runner/src/synthetic_exceptions.rs     |   90 +
 crates/scheduler-backend/Cargo.toml           |   10 +
 crates/scheduler-backend/src/lib.rs           |   29 +
 crates/scheduler-core/Cargo.toml              |   14 +
 crates/scheduler-core/src/lib.rs              |    6 +
 crates/scheduler-core/src/types.rs            |  139 +
 crates/scheduler-core/src/utils.rs            |  181 +
 crates/webapp-backend/Cargo.toml              |   10 +
 crates/webapp-backend/src/lib.rs              |   54 +
 crates/webapp-core/Cargo.toml                 |    9 +
 crates/webapp-core/src/lib.rs                 |  299 ++
 crates/worker-status-backend/Cargo.toml       |   10 +
 crates/worker-status-backend/src/lib.rs       |   32 +
 crates/workflow-registry-backend/Cargo.toml   |    9 +
 crates/workflow-registry-backend/src/lib.rs   |   35 +
 46 files changed, 9984 insertions(+), 6 deletions(-)
 create mode 100644 crates/backend-memory/Cargo.toml
 create mode 100644 crates/backend-memory/src/core_backend.rs
 create mode 100644 crates/backend-memory/src/garbage_collector_backend.rs
 create mode 100644 crates/backend-memory/src/lib.rs
 create mode 100644 crates/backend-memory/src/scheduler_backend.rs
 create mode 100644 crates/backend-memory/src/webapp_backend.rs
 create mode 100644 crates/backend-memory/src/worker_status_backend.rs
 create mode 100644 crates/backend-memory/src/workflow_registry_backend.rs
 create mode 100644 crates/backends-core/Cargo.toml
 create mode 100644 crates/backends-core/src/lib.rs
 create mode 100644 crates/core-backend/Cargo.toml
 create mode 100644 crates/core-backend/src/data.rs
 create mode 100644 crates/core-backend/src/lib.rs
 create mode 100644 crates/garbage-collector-backend/Cargo.toml
 create mode 100644 crates/garbage-collector-backend/src/lib.rs
 create mode 100644 crates/observability/Cargo.toml
 create mode 100644 crates/observability/src/lib.rs
 create mode 100644 crates/runner-state/Cargo.toml
 create mode 100644 crates/runner-state/src/lib.rs
 create mode 100644 crates/runner-state/src/state.rs
 create mode 100644 crates/runner-state/src/util.rs
 create mode 100644 crates/runner-state/src/value_visitor.rs
 create mode 100644 crates/runner/Cargo.toml
 create mode 100644 crates/runner/src/executor.rs
 create mode 100644 crates/runner/src/expression_evaluator.rs
 create mode 100644 crates/runner/src/lib.rs
 create mode 100644 crates/runner/src/replay.rs
 create mode 100644 crates/runner/src/retry.rs
 create mode 100644 crates/runner/src/synthetic_exceptions.rs
 create mode 100644 crates/scheduler-backend/Cargo.toml
 create mode 100644 crates/scheduler-backend/src/lib.rs
 create mode 100644 crates/scheduler-core/Cargo.toml
 create mode 100644 crates/scheduler-core/src/lib.rs
 create mode 100644 crates/scheduler-core/src/types.rs
 create mode 100644 crates/scheduler-core/src/utils.rs
 create mode 100644 crates/webapp-backend/Cargo.toml
 create mode 100644 crates/webapp-backend/src/lib.rs
 create mode 100644 crates/webapp-core/Cargo.toml
 create mode 100644 crates/webapp-core/src/lib.rs
 create mode 100644 crates/worker-status-backend/Cargo.toml
 create mode 100644 crates/worker-status-backend/src/lib.rs
 create mode 100644 crates/workflow-registry-backend/Cargo.toml
 create mode 100644 crates/workflow-registry-backend/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index ef968a32..4b31811b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1517,9 +1517,9 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
 [[package]]
 name = "metrics"
-version = "0.24.2"
+version = "0.24.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25dea7ac8057892855ec285c440160265225438c3c45072613c25a4b26e98ef5"
+checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
 dependencies = [
  "ahash",
  "portable-atomic",
@@ -1847,9 +1847,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "portable-atomic"
-version = "1.11.1"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
 [[package]]
 name = "potential_utf"
@@ -3372,6 +3372,48 @@ dependencies = [
  "waymark-proto",
 ]
 
+[[package]]
+name = "waymark-backend-memory"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "rmp-serde",
+ "serde_json",
+ "uuid",
+ "waymark-core-backend",
+ "waymark-garbage-collector-backend",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backends-core"
+version = "0.1.0"
+dependencies = [
+ "serde_json",
+ "sqlx",
+ "thiserror",
+]
+
+[[package]]
+name = "waymark-core-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "serde",
+ "serde_json",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-dag",
+ "waymark-runner-state",
+]
+
 [[package]]
 name = "waymark-dag"
 version = "0.1.0"
@@ -3401,6 +3443,15 @@ dependencies = [
  "waymark-ir-parser",
 ]
 
+[[package]]
+name = "waymark-garbage-collector-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "waymark-backends-core",
+]
+
 [[package]]
 name = "waymark-ir-parser"
 version = "0.1.0"
@@ -3409,6 +3460,14 @@ dependencies = [
  "waymark-proto",
 ]
 
+[[package]]
+name = "waymark-observability"
+version = "0.1.0"
+dependencies = [
+ "tracing",
+ "waymark-observability-macros",
+]
+
 [[package]]
 name = "waymark-observability-macros"
 version = "0.1.0"
@@ -3429,6 +3488,97 @@ dependencies = [
  "tonic-build",
 ]
 
+[[package]]
+name = "waymark-runner"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "rustc-hash",
+ "serde_json",
+ "thiserror",
+ "tracing",
+ "uuid",
+ "waymark-backend-memory",
+ "waymark-core-backend",
+ "waymark-dag",
+ "waymark-ir-parser",
+ "waymark-observability",
+ "waymark-proto",
+ "waymark-runner-state",
+]
+
+[[package]]
+name = "waymark-runner-state"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "uuid",
+ "waymark-dag",
+ "waymark-proto",
+]
+
+[[package]]
+name = "waymark-scheduler-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-scheduler-core",
+]
+
+[[package]]
+name = "waymark-scheduler-core"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "cron",
+ "rand 0.8.5",
+ "serde",
+ "uuid",
+]
+
+[[package]]
+name = "waymark-webapp-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-webapp-core",
+]
+
+[[package]]
+name = "waymark-webapp-core"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "serde",
+ "uuid",
+]
+
+[[package]]
+name = "waymark-worker-status-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "uuid",
+ "waymark-backends-core",
+]
+
+[[package]]
+name = "waymark-workflow-registry-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+]
+
 [[package]]
 name = "webpki-roots"
 version = "0.26.11"
diff --git a/Cargo.toml b/Cargo.toml
index e7fd75f8..aa2f4ab0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,22 +4,43 @@ members = ["crates/*"]
 
 [workspace.dependencies]
 waymark = { path = "crates/waymark" }
+waymark-backend-memory = { path = "crates/backend-memory" }
+waymark-backends-core = { path = "crates/backends-core" }
+waymark-core-backend = { path = "crates/core-backend" }
 waymark-dag = { path = "crates/dag" }
+waymark-garbage-collector-backend = { path = "crates/garbage-collector-backend" }
 waymark-ir-parser = { path = "crates/ir-parser" }
+waymark-observability = { path = "crates/observability" }
+waymark-observability-macros = { path = "crates/observability-macros" }
 waymark-proto = { path = "crates/proto" }
+waymark-runner = { path = "crates/runner" }
+waymark-runner-state = { path = "crates/runner-state" }
+waymark-scheduler-backend = { path = "crates/scheduler-backend" }
+waymark-scheduler-core = { path = "crates/scheduler-core" }
+waymark-webapp-backend = { path = "crates/webapp-backend" }
+waymark-webapp-core = { path = "crates/webapp-core" }
+waymark-worker-status-backend = { path = "crates/worker-status-backend" }
+waymark-workflow-registry-backend = { path = "crates/workflow-registry-backend" }
 
 anyhow = "1"
+async-trait = "0.1"
+chrono = { version = "0.4", default-features = false }
 clap = "4.5"
+cron = "0.12"
 proptest = "1.9"
 prost = "0.12"
 prost-types = "0.12"
+rand = "0.8"
 regex = "1"
+rmp-serde = "1"
 rustc-hash = "2"
 serde = "1"
 serde_json = "1"
 sha2 = "0.10"
+sqlx = { version = "0.8", default-features = false }
 thiserror = "2"
 tokio = "1"
 tonic = "0.11"
 tonic-build = "0.11"
+tracing = "0.1"
 uuid = "1"
diff --git a/crates/backend-memory/Cargo.toml b/crates/backend-memory/Cargo.toml
new file mode 100644
index 00000000..4346bbda
--- /dev/null
+++ b/crates/backend-memory/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+name = "waymark-backend-memory"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+rmp-serde = { workspace = true }
+serde_json = { workspace = true }
+uuid = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true, optional = true }
+waymark-scheduler-backend = { workspace = true, optional = true }
+waymark-scheduler-core = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+waymark-webapp-backend = { workspace = true, optional = true }
+waymark-webapp-core = { workspace = true, optional = true }
+
+[features]
+default = [
+  "core-backend",
+  "worker-status-backend",
+  "workflow-registry-backend",
+  "scheduler-backend",
+  "garbage-collector-backend",
+  "webapp-backend",
+]
+
+core-backend = []
+garbage-collector-backend = ["dep:waymark-garbage-collector-backend"]
+scheduler-backend = ["dep:waymark-scheduler-backend"]
+worker-status-backend = []
+workflow-registry-backend = []
+webapp-backend = ["dep:waymark-webapp-backend", "dep:waymark-webapp-core"]
diff --git a/crates/backend-memory/src/core_backend.rs b/crates/backend-memory/src/core_backend.rs
new file mode 100644
index 00000000..d6e2da1e
--- /dev/null
+++ b/crates/backend-memory/src/core_backend.rs
@@ -0,0 +1,158 @@
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_core_backend::{
+    ActionDone, BackendError, BackendResult, GraphUpdate, InstanceDone, InstanceLockStatus,
+    LockClaim, QueuedInstance, QueuedInstanceBatch,
+};
+
+#[async_trait::async_trait]
+impl waymark_core_backend::CoreBackend for crate::MemoryBackend {
+    fn clone_box(&self) -> Box<dyn waymark_core_backend::CoreBackend> {
+        Box::new(self.clone())
+    }
+
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        let mut stored = self.graph_updates.lock().expect("graph updates poisoned");
+        stored.extend(graphs.iter().cloned());
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        let mut locks = Vec::with_capacity(graphs.len());
+        for graph in graphs {
+            if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id)
+                && *lock_uuid == claim.lock_uuid
+                && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at)
+            {
+                *lock_expires_at = Some(claim.lock_expires_at);
+            }
+            let (lock_uuid, lock_expires_at) = guard
+                .get(&graph.instance_id)
+                .cloned()
+                .unwrap_or((None, None));
+            locks.push(InstanceLockStatus {
+                instance_id: graph.instance_id,
+                lock_uuid,
+                lock_expires_at,
+            });
+        }
+        Ok(locks)
+    }
+
+    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
+        let mut stored = self.actions_done.lock().expect("actions done poisoned");
+        stored.extend(actions.iter().cloned());
+        Ok(())
+    }
+
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        let mut stored = self.instances_done.lock().expect("instances done poisoned");
+        stored.extend(instances.iter().cloned());
+        if !instances.is_empty() {
+            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
+            for instance in instances {
+                locks.remove(&instance.executor_id);
+            }
+        }
+        Ok(())
+    }
+
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        if size == 0 {
+            return Ok(QueuedInstanceBatch {
+                instances: Vec::new(),
+            });
+        }
+        let queue = match &self.instance_queue {
+            Some(queue) => queue,
+            None => {
+                return Ok(QueuedInstanceBatch {
+                    instances: Vec::new(),
+                });
+            }
+        };
+        let mut guard = queue.lock().expect("instance queue poisoned");
+        let now = Utc::now();
+        let mut instances = Vec::new();
+        while instances.len() < size {
+            let Some(instance) = guard.front() else {
+                break;
+            };
+            if let Some(scheduled_at) = instance.scheduled_at
+                && scheduled_at > now
+            {
+                break;
+            }
+            let instance = guard.pop_front().expect("instance queue empty");
+            instances.push(instance);
+        }
+        if !instances.is_empty() {
+            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
+            for instance in &instances {
+                locks.insert(
+                    instance.instance_id,
+                    (Some(claim.lock_uuid), Some(claim.lock_expires_at)),
+                );
+            }
+        }
+        Ok(QueuedInstanceBatch { instances })
+    }
+
+    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
+        if instances.is_empty() {
+            return Ok(());
+        }
+        let queue = self.instance_queue.as_ref().ok_or_else(|| {
+            BackendError::Message("memory backend missing instance queue".to_string())
+        })?;
+        let mut guard = queue.lock().expect("instance queue poisoned");
+        for instance in instances {
+            guard.push_back(instance.clone());
+        }
+        Ok(())
+    }
+
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        let mut locks = Vec::new();
+        for instance_id in instance_ids {
+            let entry = guard
+                .entry(*instance_id)
+                .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at)));
+            if entry.0 == Some(claim.lock_uuid) {
+                entry.1 = Some(claim.lock_expires_at);
+            }
+            locks.push(InstanceLockStatus {
+                instance_id: *instance_id,
+                lock_uuid: entry.0,
+                lock_expires_at: entry.1,
+            });
+        }
+        Ok(locks)
+    }
+
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()> {
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        for instance_id in instance_ids {
+            if let Some((current_lock, _)) = guard.get(instance_id)
+                && *current_lock == Some(lock_uuid)
+            {
+                guard.remove(instance_id);
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/crates/backend-memory/src/garbage_collector_backend.rs b/crates/backend-memory/src/garbage_collector_backend.rs
new file mode 100644
index 00000000..a1274935
--- /dev/null
+++ b/crates/backend-memory/src/garbage_collector_backend.rs
@@ -0,0 +1,15 @@
+use chrono::{DateTime, Utc};
+use waymark_garbage_collector_backend::{
+    BackendResult, GarbageCollectionResult, GarbageCollectorBackend,
+};
+
+#[async_trait::async_trait]
+impl GarbageCollectorBackend for crate::MemoryBackend {
+    async fn collect_done_instances(
+        &self,
+        _older_than: DateTime<Utc>,
+        _limit: usize,
+    ) -> BackendResult<GarbageCollectionResult> {
+        Ok(GarbageCollectionResult::default())
+    }
+}
diff --git a/crates/backend-memory/src/lib.rs b/crates/backend-memory/src/lib.rs
new file mode 100644
index 00000000..e2ef56e4
--- /dev/null
+++ b/crates/backend-memory/src/lib.rs
@@ -0,0 +1,111 @@
+//! In-memory backend that prints persistence operations.
+
+#[cfg(feature = "core-backend")]
+mod core_backend;
+
+#[cfg(feature = "garbage-collector-backend")]
+mod garbage_collector_backend;
+
+#[cfg(feature = "scheduler-backend")]
+mod scheduler_backend;
+
+#[cfg(feature = "webapp-backend")]
+mod webapp_backend;
+
+#[cfg(feature = "worker-status-backend")]
+mod worker_status_backend;
+
+#[cfg(feature = "workflow-registry-backend")]
+mod workflow_registry_backend;
+
+use std::collections::{HashMap, VecDeque};
+use std::sync::{Arc, Mutex};
+
+use chrono::{DateTime, Utc};
+use uuid::Uuid;
+
+use waymark_core_backend::{ActionDone, GraphUpdate, InstanceDone, QueuedInstance};
+use waymark_scheduler_core::{ScheduleId, WorkflowSchedule};
+use waymark_worker_status_backend::WorkerStatusUpdate;
+use waymark_workflow_registry_backend::WorkflowRegistration;
+
+type WorkflowVersionKey = (String, String);
+type WorkflowVersionValue = (Uuid, WorkflowRegistration);
+type WorkflowVersionStore = HashMap<WorkflowVersionKey, WorkflowVersionValue>;
+type InstanceLockStore = HashMap<Uuid, (Option<Uuid>, Option<DateTime<Utc>>)>;
+
+/// Backend that stores updates in memory for tests or local runs.
+#[derive(Clone)]
+pub struct MemoryBackend {
+    instance_queue: Option<Arc<Mutex<VecDeque<QueuedInstance>>>>,
+    graph_updates: Arc<Mutex<Vec<GraphUpdate>>>,
+    actions_done: Arc<Mutex<Vec<ActionDone>>>,
+    instances_done: Arc<Mutex<Vec<InstanceDone>>>,
+    worker_status_updates: Arc<Mutex<Vec<WorkerStatusUpdate>>>,
+    #[cfg_attr(not(feature = "workflow-registry-backend"), allow(dead_code))]
+    workflow_versions: Arc<Mutex<WorkflowVersionStore>>,
+    #[cfg_attr(not(feature = "scheduler-backend"), allow(dead_code))]
+    schedules: Arc<Mutex<HashMap<ScheduleId, WorkflowSchedule>>>,
+    #[cfg_attr(not(feature = "core-backend"), allow(dead_code))]
+    instance_locks: Arc<Mutex<InstanceLockStore>>,
+}
+
+impl Default for MemoryBackend {
+    fn default() -> Self {
+        Self {
+            instance_queue: None,
+            graph_updates: Arc::new(Mutex::new(Vec::new())),
+            actions_done: Arc::new(Mutex::new(Vec::new())),
+            instances_done: Arc::new(Mutex::new(Vec::new())),
+            worker_status_updates: Arc::new(Mutex::new(Vec::new())),
+            workflow_versions: Arc::new(Mutex::new(HashMap::new())),
+            schedules: Arc::new(Mutex::new(HashMap::new())),
+            instance_locks: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+}
+
+impl MemoryBackend {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_queue(queue: Arc<Mutex<VecDeque<QueuedInstance>>>) -> Self {
+        Self {
+            instance_queue: Some(queue),
+            ..Self::default()
+        }
+    }
+
+    pub fn instance_queue(&self) -> Option<Arc<Mutex<VecDeque<QueuedInstance>>>> {
+        self.instance_queue.clone()
+    }
+
+    pub fn graph_updates(&self) -> Vec<GraphUpdate> {
+        self.graph_updates
+            .lock()
+            .expect("graph updates poisoned")
+            .clone()
+    }
+
+    pub fn actions_done(&self) -> Vec<ActionDone> {
+        self.actions_done
+            .lock()
+            .expect("actions done poisoned")
+            .clone()
+    }
+
+    pub fn instances_done(&self) -> Vec<InstanceDone> {
+        self.instances_done
+            .lock()
+            .expect("instances done poisoned")
+            .clone()
+    }
+
+    pub fn worker_status_updates(&self) -> Vec<WorkerStatusUpdate> {
+        self.worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned")
+            .clone()
+    }
+}
diff --git a/crates/backend-memory/src/scheduler_backend.rs b/crates/backend-memory/src/scheduler_backend.rs
new file mode 100644
index 00000000..a69aa94e
--- /dev/null
+++ b/crates/backend-memory/src/scheduler_backend.rs
@@ -0,0 +1,209 @@
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_core_backend::{BackendError, BackendResult};
+use waymark_scheduler_backend::SchedulerBackend;
+use waymark_scheduler_core::{
+    CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule, compute_next_run,
+};
+
+#[async_trait::async_trait]
+impl SchedulerBackend for crate::MemoryBackend {
+    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let existing_schedule = guard.iter().find_map(|(id, schedule)| {
+            if schedule.workflow_name == params.workflow_name
+                && schedule.schedule_name == params.schedule_name
+            {
+                Some((*id, schedule.clone()))
+            } else {
+                None
+            }
+        });
+        let schedule_id = existing_schedule
+            .as_ref()
+            .map(|(id, _)| *id)
+            .unwrap_or_else(ScheduleId::new);
+        let now = Utc::now();
+        let next_run_at = match existing_schedule
+            .as_ref()
+            .and_then(|(_, schedule)| schedule.next_run_at)
+        {
+            Some(next_run_at) => Some(next_run_at),
+            None => Some(
+                compute_next_run(
+                    params.schedule_type,
+                    params.cron_expression.as_deref(),
+                    params.interval_seconds,
+                    params.jitter_seconds,
+                    None,
+                )
+                .map_err(BackendError::Message)?,
+            ),
+        };
+        let schedule = WorkflowSchedule {
+            id: schedule_id.0,
+            workflow_name: params.workflow_name.clone(),
+            schedule_name: params.schedule_name.clone(),
+            schedule_type: params.schedule_type.as_str().to_string(),
+            cron_expression: params.cron_expression.clone(),
+            interval_seconds: params.interval_seconds,
+            jitter_seconds: params.jitter_seconds,
+            input_payload: params.input_payload.clone(),
+            status: "active".to_string(),
+            next_run_at,
+            last_run_at: existing_schedule
+                .as_ref()
+                .and_then(|(_, schedule)| schedule.last_run_at),
+            last_instance_id: existing_schedule
+                .as_ref()
+                .and_then(|(_, schedule)| schedule.last_instance_id),
+            created_at: existing_schedule
+                .as_ref()
+                .map(|(_, schedule)| schedule.created_at)
+                .unwrap_or(now),
+            updated_at: now,
+            priority: params.priority,
+            allow_duplicate: params.allow_duplicate,
+        };
+        guard.insert(schedule_id, schedule);
+        Ok(schedule_id)
+    }
+
+    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        guard
+            .get(&id)
+            .cloned()
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}")))
+    }
+
+    async fn get_schedule_by_name(
+        &self,
+        workflow_name: &str,
+        schedule_name: &str,
+    ) -> BackendResult<Option<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .find(|schedule| {
+                schedule.workflow_name == workflow_name
+                    && schedule.schedule_name == schedule_name
+                    && schedule.status != "deleted"
+            })
+            .cloned())
+    }
+
+    async fn list_schedules(
+        &self,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .cloned()
+            .collect();
+        schedules.sort_by(|a, b| {
+            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
+        });
+        let start = offset.max(0) as usize;
+        let end = start.saturating_add(limit.max(0) as usize);
+        Ok(schedules
+            .into_iter()
+            .skip(start)
+            .take(end - start)
+            .collect())
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .count() as i64)
+    }
+
+    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        if let Some(schedule) = guard.get_mut(&id) {
+            schedule.status = status.to_string();
+            schedule.updated_at = Utc::now();
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
+        SchedulerBackend::update_schedule_status(self, id, "deleted").await
+    }
+
+    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let now = Utc::now();
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| {
+                schedule.status == "active"
+                    && schedule
+                        .next_run_at
+                        .map(|next| next <= now)
+                        .unwrap_or(false)
+            })
+            .cloned()
+            .collect();
+        schedules.sort_by_key(|schedule| schedule.next_run_at);
+        Ok(schedules.into_iter().take(limit as usize).collect())
+    }
+
+    async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult<bool> {
+        Ok(false)
+    }
+
+    async fn mark_schedule_executed(
+        &self,
+        schedule_id: ScheduleId,
+        instance_id: Uuid,
+    ) -> BackendResult<()> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .get_mut(&schedule_id)
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+        schedule.last_run_at = Some(Utc::now());
+        schedule.last_instance_id = Some(instance_id);
+        schedule.next_run_at = Some(next_run_at);
+        schedule.updated_at = Utc::now();
+        Ok(())
+    }
+
+    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .get_mut(&schedule_id)
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+        schedule.next_run_at = Some(next_run_at);
+        schedule.updated_at = Utc::now();
+        Ok(())
+    }
+}
diff --git a/crates/backend-memory/src/webapp_backend.rs b/crates/backend-memory/src/webapp_backend.rs
new file mode 100644
index 00000000..883f4076
--- /dev/null
+++ b/crates/backend-memory/src/webapp_backend.rs
@@ -0,0 +1,294 @@
+use std::collections::HashMap;
+
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_webapp_backend::{BackendError, BackendResult, WebappBackend};
+use waymark_webapp_core::{
+    ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail,
+    ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow,
+    WorkerAggregateStats, WorkerStatus,
+};
+use waymark_worker_status_backend::WorkerStatusUpdate;
+
+#[async_trait::async_trait]
+impl WebappBackend for crate::MemoryBackend {
+    async fn count_instances(&self, _search: Option<&str>) -> BackendResult<i64> {
+        Ok(0)
+    }
+
+    async fn list_instances(
+        &self,
+        _search: Option<&str>,
+        _limit: i64,
+        _offset: i64,
+    ) -> BackendResult<Vec<InstanceSummary>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
+        Err(BackendError::Message(format!(
+            "instance not found: {instance_id}"
+        )))
+    }
+
+    async fn get_execution_graph(
+        &self,
+        _instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        Ok(None)
+    }
+
+    async fn get_workflow_graph(
+        &self,
+        _instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        Ok(None)
+    }
+
+    async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec![
+            InstanceStatus::Queued.to_string(),
+            InstanceStatus::Running.to_string(),
+            InstanceStatus::Completed.to_string(),
+            InstanceStatus::Failed.to_string(),
+        ])
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .count() as i64)
+    }
+
+    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .cloned()
+            .collect();
+        schedules.sort_by(|a, b| {
+            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
+        });
+
+        let start = offset.max(0) as usize;
+        let page_limit = limit.max(0) as usize;
+        Ok(schedules
+            .into_iter()
+            .skip(start)
+            .take(page_limit)
+            .map(|schedule| ScheduleSummary {
+                id: schedule.id.to_string(),
+                workflow_name: schedule.workflow_name,
+                schedule_name: schedule.schedule_name,
+                schedule_type: schedule.schedule_type,
+                cron_expression: schedule.cron_expression,
+                interval_seconds: schedule.interval_seconds,
+                status: schedule.status,
+                next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
+                last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
+                created_at: schedule.created_at.to_rfc3339(),
+            })
+            .collect())
+    }
+
+    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .values()
+            .find(|schedule| schedule.id == schedule_id)
+            .cloned()
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+
+        let input_payload = schedule.input_payload.as_ref().and_then(|bytes| {
+            rmp_serde::from_slice::<serde_json::Value>(bytes)
+                .ok()
+                .and_then(|value| serde_json::to_string_pretty(&value).ok())
+        });
+
+        Ok(ScheduleDetail {
+            id: schedule.id.to_string(),
+            workflow_name: schedule.workflow_name,
+            schedule_name: schedule.schedule_name,
+            schedule_type: schedule.schedule_type,
+            cron_expression: schedule.cron_expression,
+            interval_seconds: schedule.interval_seconds,
+            jitter_seconds: schedule.jitter_seconds,
+            status: schedule.status,
+            next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
+            last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
+            last_instance_id: schedule.last_instance_id.map(|id| id.to_string()),
+            created_at: schedule.created_at.to_rfc3339(),
+            updated_at: schedule.updated_at.to_rfc3339(),
+            priority: schedule.priority,
+            allow_duplicate: schedule.allow_duplicate,
+            input_payload,
+        })
+    }
+
+    async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult<i64> {
+        Ok(0)
+    }
+
+    async fn list_schedule_invocations(
+        &self,
+        _schedule_id: Uuid,
+        _limit: i64,
+        _offset: i64,
+    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
+        Ok(Vec::new())
+    }
+
+    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let Some(schedule) = guard
+            .values_mut()
+            .find(|schedule| schedule.id == schedule_id)
+        else {
+            return Ok(false);
+        };
+        schedule.status = status.to_string();
+        schedule.updated_at = Utc::now();
+        Ok(true)
+    }
+
+    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["active".to_string(), "paused".to_string()])
+    }
+
+    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["cron".to_string(), "interval".to_string()])
+    }
+
+    async fn get_worker_action_stats(
+        &self,
+        _window_minutes: i64,
+    ) -> BackendResult<Vec<WorkerActionRow>> {
+        let statuses = latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        );
+
+        Ok(statuses
+            .into_iter()
+            .map(|status| WorkerActionRow {
+                pool_id: status.pool_id.to_string(),
+                active_workers: status.active_workers as i64,
+                actions_per_sec: format!("{:.1}", status.actions_per_sec),
+                throughput_per_min: status.throughput_per_min as i64,
+                total_completed: status.total_completed,
+                median_dequeue_ms: status.median_dequeue_ms,
+                median_handling_ms: status.median_handling_ms,
+                last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()),
+                updated_at: status.updated_at.to_rfc3339(),
+            })
+            .collect())
+    }
+
+    async fn get_worker_aggregate_stats(
+        &self,
+        _window_minutes: i64,
+    ) -> BackendResult<WorkerAggregateStats> {
+        let statuses = latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        );
+
+        let active_worker_count = statuses
+            .iter()
+            .map(|status| status.active_workers as i64)
+            .sum();
+        let total_in_flight = statuses
+            .iter()
+            .filter_map(|status| status.total_in_flight)
+            .sum();
+        let total_queue_depth = statuses
+            .iter()
+            .filter_map(|status| status.dispatch_queue_size)
+            .sum();
+        let actions_per_sec = statuses
+            .iter()
+            .map(|status| status.actions_per_sec)
+            .sum::<f64>();
+
+        Ok(WorkerAggregateStats {
+            active_worker_count,
+            actions_per_sec: format!("{:.1}", actions_per_sec),
+            total_in_flight,
+            total_queue_depth,
+        })
+    }
+
+    async fn worker_status_table_exists(&self) -> bool {
+        !self
+            .worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned")
+            .is_empty()
+    }
+
+    async fn schedules_table_exists(&self) -> bool {
+        !self
+            .schedules
+            .lock()
+            .expect("schedules poisoned")
+            .is_empty()
+    }
+
+    async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
+        Ok(latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        ))
+    }
+}
+
+fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec<WorkerStatus> {
+    let mut by_pool: HashMap<Uuid, WorkerStatusUpdate> = HashMap::new();
+    for update in updates {
+        by_pool.insert(update.pool_id, update.clone());
+    }
+
+    let now = Utc::now();
+    let mut statuses: Vec<_> = by_pool
+        .into_values()
+        .map(|status| WorkerStatus {
+            pool_id: status.pool_id,
+            active_workers: status.active_workers,
+            throughput_per_min: status.throughput_per_min,
+            actions_per_sec: status.actions_per_sec,
+            total_completed: status.total_completed,
+            last_action_at: status.last_action_at,
+            updated_at: now,
+            median_dequeue_ms: status.median_dequeue_ms,
+            median_handling_ms: status.median_handling_ms,
+            dispatch_queue_size: Some(status.dispatch_queue_size),
+            total_in_flight: Some(status.total_in_flight),
+            median_instance_duration_secs: status.median_instance_duration_secs,
+            active_instance_count: status.active_instance_count,
+            total_instances_completed: status.total_instances_completed,
+            instances_per_sec: status.instances_per_sec,
+            instances_per_min: status.instances_per_min,
+            time_series: status.time_series,
+        })
+        .collect();
+
+    statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec));
+    statuses
+}
diff --git a/crates/backend-memory/src/worker_status_backend.rs b/crates/backend-memory/src/worker_status_backend.rs
new file mode 100644
index 00000000..dbca9794
--- /dev/null
+++ b/crates/backend-memory/src/worker_status_backend.rs
@@ -0,0 +1,13 @@
+use waymark_worker_status_backend::{BackendResult, WorkerStatusBackend, WorkerStatusUpdate};
+
+#[async_trait::async_trait]
+impl WorkerStatusBackend for crate::MemoryBackend {
+    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
+        let mut stored = self
+            .worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned");
+        stored.push(status.clone());
+        Ok(())
+    }
+}
diff --git a/crates/backend-memory/src/workflow_registry_backend.rs b/crates/backend-memory/src/workflow_registry_backend.rs
new file mode 100644
index 00000000..e820b5a9
--- /dev/null
+++ b/crates/backend-memory/src/workflow_registry_backend.rs
@@ -0,0 +1,58 @@
+use uuid::Uuid;
+use waymark_workflow_registry_backend::{
+    BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
+
+#[async_trait::async_trait]
+impl WorkflowRegistryBackend for crate::MemoryBackend {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid> {
+        let mut guard = self
+            .workflow_versions
+            .lock()
+            .expect("workflow versions poisoned");
+        let key = (
+            registration.workflow_name.clone(),
+            registration.workflow_version.clone(),
+        );
+        if let Some((id, existing)) = guard.get(&key) {
+            if existing.ir_hash != registration.ir_hash {
+                return Err(BackendError::Message(format!(
+                    "workflow version already exists with different IR hash: {}@{}",
+                    registration.workflow_name, registration.workflow_version
+                )));
+            }
+            return Ok(*id);
+        }
+
+        let id = Uuid::new_v4();
+        guard.insert(key, (id, registration.clone()));
+        Ok(id)
+    }
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
+        if ids.is_empty() {
+            return Ok(Vec::new());
+        }
+        let guard = self
+            .workflow_versions
+            .lock()
+            .expect("workflow versions poisoned");
+        let mut versions = Vec::new();
+        for (id, registration) in guard.values() {
+            if ids.contains(id) {
+                versions.push(WorkflowVersion {
+                    id: *id,
+                    workflow_name: registration.workflow_name.clone(),
+                    workflow_version: registration.workflow_version.clone(),
+                    ir_hash: registration.ir_hash.clone(),
+                    program_proto: registration.program_proto.clone(),
+                    concurrent: registration.concurrent,
+                });
+            }
+        }
+        Ok(versions)
+    }
+}
diff --git a/crates/backends-core/Cargo.toml b/crates/backends-core/Cargo.toml
new file mode 100644
index 00000000..194062c0
--- /dev/null
+++ b/crates/backends-core/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "waymark-backends-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+sqlx = { workspace = true, optional = true }
+
+[features]
+default = ["sqlx-error"]
+
+# TODO: this has to abstracted away since not all backends will use sqlx.
+sqlx-error = ["dep:sqlx"]
diff --git a/crates/backends-core/src/lib.rs b/crates/backends-core/src/lib.rs
new file mode 100644
index 00000000..50f807b4
--- /dev/null
+++ b/crates/backends-core/src/lib.rs
@@ -0,0 +1,29 @@
+//! Core primitives for various waymark subsystem backends.
+
+/// The common backend error.
+///
+/// TODO: move away from a shared notion of backend error to use concrete error
+/// type per-operation (rather than per-subsystem or per-crate).
+#[derive(Debug, thiserror::Error)]
+pub enum BackendError<Inner = InnerError> {
+    #[error("{0}")]
+    Message(String),
+
+    #[error(transparent)]
+    Inner(Inner),
+
+    #[error(transparent)]
+    Serialization(serde_json::Error),
+}
+
+#[cfg(feature = "sqlx-error")]
+pub type InnerError = sqlx::Error;
+
+#[cfg(not(feature = "sqlx-error"))]
+pub type InnerError = ();
+
+/// Utility type alias for backend results.
+///
+/// TODO: move away from the single-`Result` type aliases as we want to vary
+/// rrors per-call.
+pub type BackendResult<T, E = InnerError> = Result<T, BackendError<E>>;
diff --git a/crates/core-backend/Cargo.toml b/crates/core-backend/Cargo.toml
new file mode 100644
index 00000000..da2aa394
--- /dev/null
+++ b/crates/core-backend/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "waymark-core-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+chrono = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/core-backend/src/data.rs b/crates/core-backend/src/data.rs
new file mode 100644
index 00000000..d9320e71
--- /dev/null
+++ b/crates/core-backend/src/data.rs
@@ -0,0 +1,150 @@
+// The models that we use for our backends are similar to the ones that we
+// have specified in our database/Postgres backend, but not 1:1. It's better for
+// us to internally convert within the given backend
+
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+};
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+use waymark_dag::DAG;
+use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Queued instance payload for the run loop.
+pub struct QueuedInstance {
+    pub workflow_version_id: Uuid,
+    #[serde(default)]
+    pub schedule_id: Option<Uuid>,
+    #[serde(skip, default)]
+    pub dag: Option<Arc<DAG>>,
+    pub entry_node: Uuid,
+    pub state: Option<RunnerState>,
+    #[serde(
+        default = "default_action_results",
+        deserialize_with = "deserialize_action_results"
+    )]
+    pub action_results: HashMap<Uuid, serde_json::Value>,
+    #[serde(default = "default_instance_id")]
+    pub instance_id: Uuid,
+    #[serde(default)]
+    pub scheduled_at: Option<DateTime<Utc>>,
+}
+
+#[derive(Clone, Debug)]
+/// Result payload for queued instance polling.
+pub struct QueuedInstanceBatch {
+    pub instances: Vec<QueuedInstance>,
+}
+
+#[derive(Clone, Debug)]
+/// Lock claim settings for owned instances.
+pub struct LockClaim {
+    pub lock_uuid: Uuid,
+    pub lock_expires_at: DateTime<Utc>,
+}
+
+#[derive(Clone, Debug)]
+/// Current lock status for an instance.
+pub struct InstanceLockStatus {
+    pub instance_id: Uuid,
+    pub lock_uuid: Option<Uuid>,
+    pub lock_expires_at: Option<DateTime<Utc>>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Completed instance payload with result or exception.
+pub struct InstanceDone {
+    pub executor_id: Uuid,
+    pub entry_node: Uuid,
+    pub result: Option<serde_json::Value>,
+    pub error: Option<serde_json::Value>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Batch payload representing an updated execution graph snapshot.
+///
+/// This intentionally stores only runtime nodes and edges (no DAG template or
+/// derived caches) so persistence stays lightweight.
+pub struct GraphUpdate {
+    pub instance_id: Uuid,
+    pub nodes: HashMap<Uuid, ExecutionNode>,
+    pub edges: HashSet<ExecutionEdge>,
+}
+
+impl GraphUpdate {
+    pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self {
+        Self {
+            instance_id,
+            nodes: state.nodes.clone(),
+            edges: state.edges.clone(),
+        }
+    }
+
+    pub fn next_scheduled_at(&self) -> DateTime<Utc> {
+        let mut next: Option<DateTime<Utc>> = None;
+        for node in self.nodes.values() {
+            if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) {
+                continue;
+            }
+            if let Some(scheduled_at) = node.scheduled_at {
+                next = Some(match next {
+                    Some(existing) => existing.min(scheduled_at),
+                    None => scheduled_at,
+                });
+            }
+        }
+        next.unwrap_or_else(Utc::now)
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Batch payload representing a finished action attempt (success or failure).
+pub struct ActionDone {
+    pub execution_id: Uuid,
+    pub attempt: i32,
+    pub status: ActionAttemptStatus,
+    pub started_at: Option<DateTime<Utc>>,
+    pub completed_at: Option<DateTime<Utc>>,
+    pub duration_ms: Option<i64>,
+    pub result: serde_json::Value,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ActionAttemptStatus {
+    Completed,
+    Failed,
+    TimedOut,
+}
+
+impl std::fmt::Display for ActionAttemptStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+            Self::TimedOut => write!(f, "timed_out"),
+        }
+    }
+}
+
+fn default_instance_id() -> Uuid {
+    Uuid::new_v4()
+}
+
+fn default_action_results() -> HashMap<Uuid, serde_json::Value> {
+    HashMap::new()
+}
+
+fn deserialize_action_results<'de, D>(
+    deserializer: D,
+) -> Result<HashMap<Uuid, serde_json::Value>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    let value = Option::<HashMap<Uuid, serde_json::Value>>::deserialize(deserializer)?;
+    Ok(value.unwrap_or_default())
+}
diff --git a/crates/core-backend/src/lib.rs b/crates/core-backend/src/lib.rs
new file mode 100644
index 00000000..e38f5cdd
--- /dev/null
+++ b/crates/core-backend/src/lib.rs
@@ -0,0 +1,58 @@
+//! Core backend traits for waymark.
+
+mod data;
+
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+pub use self::data::*;
+
+/// Abstract persistence backend for runner state.
+#[async_trait::async_trait]
+pub trait CoreBackend: Send + Sync {
+    fn clone_box(&self) -> Box<dyn CoreBackend>;
+
+    /// Persist updated execution graphs.
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>>;
+
+    /// Persist finished action attempts (success or failure).
+    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>;
+
+    /// Return up to size queued instances without blocking.
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch>;
+
+    /// Refresh lock expiry for owned instances.
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>>;
+
+    /// Release instance locks when evicting from memory.
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()>;
+
+    /// Persist completed workflow instances.
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>;
+
+    /// Insert queued instances for run-loop consumption.
+    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>;
+}
+
+impl Clone for Box<dyn CoreBackend> {
+    fn clone(&self) -> Self {
+        self.clone_box()
+    }
+}
diff --git a/crates/garbage-collector-backend/Cargo.toml b/crates/garbage-collector-backend/Cargo.toml
new file mode 100644
index 00000000..e1e4f300
--- /dev/null
+++ b/crates/garbage-collector-backend/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-garbage-collector-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/garbage-collector-backend/src/lib.rs b/crates/garbage-collector-backend/src/lib.rs
new file mode 100644
index 00000000..d3f2e234
--- /dev/null
+++ b/crates/garbage-collector-backend/src/lib.rs
@@ -0,0 +1,20 @@
+use chrono::{DateTime, Utc};
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+#[derive(Clone, Copy, Debug, Default)]
+/// Summary of a garbage collection sweep.
+pub struct GarbageCollectionResult {
+    pub deleted_instances: usize,
+    pub deleted_actions: usize,
+}
+
+/// Backend capability for deleting old finished workflow data.
+#[async_trait::async_trait]
+pub trait GarbageCollectorBackend: Send + Sync {
+    async fn collect_done_instances(
+        &self,
+        older_than: DateTime<Utc>,
+        limit: usize,
+    ) -> BackendResult<GarbageCollectionResult>;
+}
diff --git a/crates/observability-macros/src/lib.rs b/crates/observability-macros/src/lib.rs
index 9fc1df7b..e10c19ec 100644
--- a/crates/observability-macros/src/lib.rs
+++ b/crates/observability-macros/src/lib.rs
@@ -6,10 +6,10 @@ use syn::{ItemFn, parse_macro_input};
 pub fn obs(args: TokenStream, input: TokenStream) -> TokenStream {
     let mut item = parse_macro_input!(input as ItemFn);
     let attr = if args.is_empty() {
-        syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(skip_all))])
+        syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(skip_all))])
     } else {
         let args = proc_macro2::TokenStream::from(args);
-        syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(#args))])
+        syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(#args))])
     };
     item.attrs.push(attr);
     TokenStream::from(quote!(#item))
diff --git a/crates/observability/Cargo.toml b/crates/observability/Cargo.toml
new file mode 100644
index 00000000..bc27b66c
--- /dev/null
+++ b/crates/observability/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-observability"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+waymark-observability-macros = { workspace = true }
+tracing = { workspace = true }
diff --git a/crates/observability/src/lib.rs b/crates/observability/src/lib.rs
new file mode 100644
index 00000000..d2fa50f6
--- /dev/null
+++ b/crates/observability/src/lib.rs
@@ -0,0 +1,8 @@
+pub use waymark_observability_macros::obs;
+
+#[doc(hidden)]
+pub mod __inner {
+    pub mod tracing {
+        pub use tracing::instrument;
+    }
+}
diff --git a/crates/runner-state/Cargo.toml b/crates/runner-state/Cargo.toml
new file mode 100644
index 00000000..6a64d994
--- /dev/null
+++ b/crates/runner-state/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "waymark-runner-state"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+chrono = { workspace = true, features = ["serde", "clock"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+uuid = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/runner-state/src/lib.rs b/crates/runner-state/src/lib.rs
new file mode 100644
index 00000000..5c7ae36b
--- /dev/null
+++ b/crates/runner-state/src/lib.rs
@@ -0,0 +1,5 @@
+mod state;
+mod util;
+pub mod value_visitor;
+
+pub use self::state::*;
diff --git a/crates/runner-state/src/state.rs b/crates/runner-state/src/state.rs
new file mode 100644
index 00000000..da418624
--- /dev/null
+++ b/crates/runner-state/src/state.rs
@@ -0,0 +1,2206 @@
+//! Execution-time DAG state with unrolled nodes and symbolic values.
+
+use std::collections::{HashMap, HashSet};
+use std::fmt;
+use std::sync::Arc;
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+use crate::util::is_truthy;
+use crate::value_visitor::{ValueExpr, collect_value_sources, resolve_value_tree};
+use waymark_dag::{
+    ActionCallNode, AggregatorNode, AssignmentNode, DAG, DAGNode, EdgeType, FnCallNode, JoinNode,
+    ReturnNode, SleepNode,
+};
+use waymark_proto::ast as ir;
+
+/// Raised when the runner state cannot be updated safely.
+#[derive(Debug, thiserror::Error)]
+#[error("{0}")]
+pub struct RunnerStateError(pub String);
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct ActionCallSpec {
+    pub action_name: String,
+    pub module_name: Option<String>,
+    pub kwargs: HashMap<String, ValueExpr>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct LiteralValue {
+    pub value: serde_json::Value,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct VariableValue {
+    pub name: String,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct ActionResultValue {
+    pub node_id: Uuid,
+    pub action_name: String,
+    pub iteration_index: Option<i32>,
+    pub result_index: Option<i32>,
+}
+
+impl ActionResultValue {
+    pub fn label(&self) -> String {
+        let mut label = self.action_name.clone();
+        if let Some(idx) = self.iteration_index {
+            label = format!("{label}[{idx}]");
+        }
+        if let Some(idx) = self.result_index {
+            label = format!("{label}[{idx}]");
+        }
+        label
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct BinaryOpValue {
+    pub left: Box<ValueExpr>,
+    pub op: i32,
+    pub right: Box<ValueExpr>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct UnaryOpValue {
+    pub op: i32,
+    pub operand: Box<ValueExpr>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct ListValue {
+    pub elements: Vec<ValueExpr>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct DictEntryValue {
+    pub key: ValueExpr,
+    pub value: ValueExpr,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct DictValue {
+    pub entries: Vec<DictEntryValue>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct IndexValue {
+    pub object: Box<ValueExpr>,
+    pub index: Box<ValueExpr>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct DotValue {
+    pub object: Box<ValueExpr>,
+    pub attribute: String,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct FunctionCallValue {
+    pub name: String,
+    pub args: Vec<ValueExpr>,
+    pub kwargs: HashMap<String, ValueExpr>,
+    pub global_function: Option<i32>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct SpreadValue {
+    pub collection: Box<ValueExpr>,
+    pub loop_var: String,
+    pub action: ActionCallSpec,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "type", content = "data")]
+pub enum NodeStatus {
+    Queued,
+    Running,
+    Completed,
+    Failed,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ExecutionNodeType {
+    Input,
+    Output,
+    Assignment,
+    ActionCall,
+    FnCall,
+    Parallel,
+    Aggregator,
+    Branch,
+    Join,
+    Return,
+    Break,
+    Continue,
+    Sleep,
+    Expression,
+}
+
+impl ExecutionNodeType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ExecutionNodeType::Input => "input",
+            ExecutionNodeType::Output => "output",
+            ExecutionNodeType::Assignment => "assignment",
+            ExecutionNodeType::ActionCall => "action_call",
+            ExecutionNodeType::FnCall => "fn_call",
+            ExecutionNodeType::Parallel => "parallel",
+            ExecutionNodeType::Aggregator => "aggregator",
+            ExecutionNodeType::Branch => "branch",
+            ExecutionNodeType::Join => "join",
+            ExecutionNodeType::Return => "return",
+            ExecutionNodeType::Break => "break",
+            ExecutionNodeType::Continue => "continue",
+            ExecutionNodeType::Sleep => "sleep",
+            ExecutionNodeType::Expression => "expression",
+        }
+    }
+}
+
+impl TryFrom<&str> for ExecutionNodeType {
+    type Error = RunnerStateError;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value {
+            "input" => Ok(ExecutionNodeType::Input),
+            "output" => Ok(ExecutionNodeType::Output),
+            "assignment" => Ok(ExecutionNodeType::Assignment),
+            "action_call" => Ok(ExecutionNodeType::ActionCall),
+            "fn_call" => Ok(ExecutionNodeType::FnCall),
+            "parallel" => Ok(ExecutionNodeType::Parallel),
+            "aggregator" => Ok(ExecutionNodeType::Aggregator),
+            "branch" => Ok(ExecutionNodeType::Branch),
+            "join" => Ok(ExecutionNodeType::Join),
+            "return" => Ok(ExecutionNodeType::Return),
+            "break" => Ok(ExecutionNodeType::Break),
+            "continue" => Ok(ExecutionNodeType::Continue),
+            "sleep" => Ok(ExecutionNodeType::Sleep),
+            "expression" => Ok(ExecutionNodeType::Expression),
+            _ => Err(RunnerStateError(format!(
+                "unknown execution node type: {value}"
+            ))),
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct ExecutionNode {
+    pub node_id: Uuid,
+    pub node_type: String,
+    pub label: String,
+    pub status: NodeStatus,
+    pub template_id: Option<String>,
+    pub targets: Vec<String>,
+    pub action: Option<ActionCallSpec>,
+    pub value_expr: Option<ValueExpr>,
+    pub assignments: HashMap<String, ValueExpr>,
+    pub action_attempt: i32,
+    #[serde(default)]
+    pub started_at: Option<DateTime<Utc>>,
+    #[serde(default)]
+    pub completed_at: Option<DateTime<Utc>>,
+    #[serde(default)]
+    pub scheduled_at: Option<DateTime<Utc>>,
+}
+
+impl ExecutionNode {
+    pub fn node_type_enum(&self) -> Result<ExecutionNodeType, RunnerStateError> {
+        ExecutionNodeType::try_from(self.node_type.as_str())
+    }
+
+    pub fn is_action_call(&self) -> bool {
+        matches!(
+            ExecutionNodeType::try_from(self.node_type.as_str()),
+            Ok(ExecutionNodeType::ActionCall)
+        )
+    }
+
+    pub fn is_sleep(&self) -> bool {
+        matches!(
+            ExecutionNodeType::try_from(self.node_type.as_str()),
+            Ok(ExecutionNodeType::Sleep)
+        )
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct QueueNodeParams {
+    pub node_id: Option<Uuid>,
+    pub template_id: Option<String>,
+    pub targets: Option<Vec<String>>,
+    pub action: Option<ActionCallSpec>,
+    pub value_expr: Option<ValueExpr>,
+    pub scheduled_at: Option<DateTime<Utc>>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct ExecutionEdge {
+    pub source: Uuid,
+    pub target: Uuid,
+    pub edge_type: EdgeType,
+}
+
+/// Track queued/executed DAG nodes with an unrolled, symbolic state.
+///
+/// Design overview:
+/// - The runner state is not a variable heap; it is the runtime graph itself,
+///   unrolled to the exact nodes that have been queued or executed.
+/// - Each execution node stores assignments as symbolic expressions so action
+///   results can be replayed later without having the concrete payloads.
+/// - Data-flow edges encode which execution node supplies a value to another,
+///   while state-machine edges encode execution ordering and control flow. This
+///   mirrors how the ground truth IR->DAG functions.
+///
+/// Expected usage:
+/// - Callers queue nodes as the program executes (ie. the DAG template is
+///   walked) so loops and spreads expand into explicit iterations.
+/// - Callers never mutate variables directly; they record assignments on nodes
+///   and let replay walk the graph to reconstruct values.
+/// - Persisted state can be rehydrated only with nodes/edges. The constructor will
+///   rebuild in-memory cache (like timeline ordering and latest assignment tracking).
+///
+/// In short, RunnerState is the ground-truth runtime DAG: symbolic assignments
+/// plus control/data edges, suitable for replay and visualization.
+///
+/// Action nodes represent our "frontier" nodes. Because of how we construct the graph and always
+/// greedily walk the state until we hit the next actions that are possible to run, we guarantee that
+/// leaf nodes are only ever actions.
+///
+/// Cycle walkthrough (mid-loop example):
+/// Suppose we are partway through:
+/// - results = []
+/// - for item in items:
+///     - action_result = @action(item)
+///     - results = results + [action_result + 1]
+///
+/// On a single iteration update:
+/// 1) The runner queues an action node for @action(item).
+///    - A new execution node is created with a UUID id.
+///    - Its assignments map action_result -> ActionResultValue(node_id).
+///    - Data-flow edges are added from the node that last defined `item`.
+/// 2) The runner queues the assignment node for results update.
+///    - The RHS expression is materialized:
+///      results + [action_result + 1] becomes a BinaryOpValue whose tree
+///      contains the ActionResultValue from step (1), plus a LiteralValue(1).
+///    - Data-flow edges are added from the prior results definition node and
+///      from the action node created in step (1).
+///    - Latest assignment tracking is updated so `results` now points to this
+///      new execution node.
+///
+/// After this iteration, the state graph has explicit nodes for the current
+/// action and the results update. Subsequent iterations repeat the same
+/// sequence, producing a chain of assignments where replay can reconstruct the
+/// incremental `results` value by following data-flow edges.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct RunnerState {
+    #[serde(skip, default)]
+    pub dag: Option<Arc<DAG>>,
+    pub nodes: HashMap<Uuid, ExecutionNode>,
+    pub edges: HashSet<ExecutionEdge>,
+    pub ready_queue: Vec<Uuid>,
+    pub timeline: Vec<Uuid>,
+    link_queued_nodes: bool,
+    latest_assignments: HashMap<String, Uuid>,
+    graph_dirty: bool,
+}
+
+impl RunnerState {
+    pub fn new(
+        dag: Option<Arc<DAG>>,
+        nodes: Option<HashMap<Uuid, ExecutionNode>>,
+        edges: Option<HashSet<ExecutionEdge>>,
+        link_queued_nodes: bool,
+    ) -> Self {
+        let mut state = Self {
+            dag,
+            nodes: nodes.unwrap_or_default(),
+            edges: edges.unwrap_or_default(),
+            ready_queue: Vec::new(),
+            timeline: Vec::new(),
+            link_queued_nodes,
+            latest_assignments: HashMap::new(),
+            graph_dirty: false,
+        };
+        if !state.nodes.is_empty() || !state.edges.is_empty() {
+            state.rehydrate_state();
+        }
+        state
+    }
+
+    /// TODO: make this `pub(crate)` again
+    pub fn set_link_queued_nodes(&mut self, value: bool) {
+        self.link_queued_nodes = value;
+    }
+
+    /// TODO: make this `pub(crate)` again
+    pub fn latest_assignment(&self, name: &str) -> Option<Uuid> {
+        self.latest_assignments.get(name).copied()
+    }
+
+    /// Queue a runtime node based on the DAG template and apply its effects.
+    ///
+    /// Use this when stepping through a compiled DAG so the runtime state mirrors
+    /// the template node (assignments, action results, and data-flow edges).
+    ///
+    /// Example IR:
+    /// - total = a + b
+    ///   When the AssignmentNode template is queued, the execution node records
+    ///   the symbolic BinaryOpValue and updates data-flow edges from a/b.
+    pub fn queue_template_node(
+        &mut self,
+        template_id: &str,
+        iteration_index: Option<i32>,
+    ) -> Result<ExecutionNode, RunnerStateError> {
+        let dag = self
+            .dag
+            .as_ref()
+            .ok_or_else(|| RunnerStateError("runner state has no DAG template".to_string()))?;
+        let template = dag
+            .nodes
+            .get(template_id)
+            .ok_or_else(|| RunnerStateError(format!("template node not found: {template_id}")))?
+            .clone();
+
+        let node_id = Uuid::new_v4();
+        let node = ExecutionNode {
+            node_id,
+            node_type: template.node_type().to_string(),
+            label: template.label(),
+            status: NodeStatus::Queued,
+            template_id: Some(template_id.to_string()),
+            targets: self.node_targets(&template),
+            action: if let DAGNode::ActionCall(action_node) = &template {
+                Some(self.action_spec_from_node(action_node))
+            } else {
+                None
+            },
+            value_expr: None,
+            assignments: HashMap::new(),
+            action_attempt: if matches!(template, DAGNode::ActionCall(_)) {
+                1
+            } else {
+                0
+            },
+            started_at: None,
+            completed_at: None,
+            scheduled_at: None,
+        };
+
+        self.register_node(node.clone())?;
+        self.apply_template_node(&node, &template, iteration_index)?;
+        Ok(node)
+    }
+
+    /// Create a runtime node directly without a DAG template.
+    ///
+    /// Use this for ad-hoc nodes (tests, synthetic steps) and as a common
+    /// builder for higher-level queue helpers like queue_action.
+    ///
+    /// Example:
+    /// - queue_node(node_type="assignment", label="results = []")
+    pub fn queue_node(
+        &mut self,
+        node_type: &str,
+        label: &str,
+        params: QueueNodeParams,
+    ) -> Result<ExecutionNode, RunnerStateError> {
+        let node_type_enum = ExecutionNodeType::try_from(node_type)?;
+        let QueueNodeParams {
+            node_id,
+            template_id,
+            targets,
+            action,
+            value_expr,
+            scheduled_at,
+        } = params;
+        let node_id = node_id.unwrap_or_else(Uuid::new_v4);
+        let action_attempt = if matches!(node_type_enum, ExecutionNodeType::ActionCall) {
+            1
+        } else {
+            0
+        };
+        let node = ExecutionNode {
+            node_id,
+            node_type: node_type.to_string(),
+            label: label.to_string(),
+            status: NodeStatus::Queued,
+            template_id,
+            targets: targets.unwrap_or_default(),
+            action,
+            value_expr,
+            assignments: HashMap::new(),
+            action_attempt,
+            started_at: None,
+            completed_at: None,
+            scheduled_at,
+        };
+        self.register_node(node.clone())?;
+        Ok(node)
+    }
+
+    /// Queue an action call from IR, respecting a local scope for loop vars.
+    ///
+    /// Use this during IR -> runner-state conversion (including spreads) so
+    /// action arguments are converted to symbolic expressions.
+    ///
+    /// Example IR:
+    /// - @double(value=item)
+    ///   With local_scope={"item": LiteralValue(2)}, the queued action uses a
+    ///   literal argument and links data-flow to the literal's source nodes.
+    pub fn queue_action_call(
+        &mut self,
+        action: &ir::ActionCall,
+        targets: Option<Vec<String>>,
+        iteration_index: Option<i32>,
+        local_scope: Option<&HashMap<String, ValueExpr>>,
+    ) -> Result<ActionResultValue, RunnerStateError> {
+        let spec = self.action_spec_from_ir(action, local_scope);
+        let node = self.queue_node(
+            ExecutionNodeType::ActionCall.as_str(),
+            &format!("@{}()", spec.action_name),
+            QueueNodeParams {
+                targets: targets.clone(),
+                action: Some(spec.clone()),
+                ..QueueNodeParams::default()
+            },
+        )?;
+        for value in spec.kwargs.values() {
+            self.record_data_flow_from_value(node.node_id, value);
+        }
+        let result = self.assign_action_results(
+            &node,
+            &spec.action_name,
+            targets.as_deref(),
+            iteration_index,
+            true,
+        )?;
+        if let Some(node_mut) = self.nodes.get_mut(&node.node_id) {
+            node_mut.value_expr = Some(ValueExpr::ActionResult(result.clone()));
+        }
+        Ok(result)
+    }
+
+    pub fn mark_running(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
+        let is_action = {
+            let node = self.get_node_mut(node_id)?;
+            node.status = NodeStatus::Running;
+            let is_action = node.is_action_call();
+            if is_action {
+                node.started_at = Some(Utc::now());
+                node.completed_at = None;
+            }
+            is_action
+        };
+        self.ready_queue.retain(|id| id != &node_id);
+        if is_action {
+            self.mark_graph_dirty();
+        }
+        Ok(())
+    }
+
+    pub fn mark_completed(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
+        let is_action = {
+            let node = self.get_node_mut(node_id)?;
+            node.status = NodeStatus::Completed;
+            let is_action = node.is_action_call();
+            if is_action {
+                node.completed_at = Some(Utc::now());
+            }
+            node.scheduled_at = None;
+            is_action
+        };
+        self.ready_queue.retain(|id| id != &node_id);
+        if is_action {
+            self.mark_graph_dirty();
+        }
+        Ok(())
+    }
+
+    pub fn mark_failed(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
+        let is_action = {
+            let node = self.get_node_mut(node_id)?;
+            node.status = NodeStatus::Failed;
+            let is_action = node.is_action_call();
+            if is_action {
+                node.completed_at = Some(Utc::now());
+            }
+            node.scheduled_at = None;
+            is_action
+        };
+        self.ready_queue.retain(|id| id != &node_id);
+        if is_action {
+            self.mark_graph_dirty();
+        }
+        Ok(())
+    }
+
+    pub fn set_node_scheduled_at(
+        &mut self,
+        node_id: Uuid,
+        scheduled_at: Option<DateTime<Utc>>,
+    ) -> Result<(), RunnerStateError> {
+        let node = self.get_node_mut(node_id)?;
+        node.scheduled_at = scheduled_at;
+        self.mark_graph_dirty();
+        Ok(())
+    }
+
+    pub fn increment_action_attempt(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
+        let node = self.get_node_mut(node_id)?;
+        if !node.is_action_call() {
+            return Err(RunnerStateError(
+                "action attempt increment requires an action_call node".to_string(),
+            ));
+        }
+        node.action_attempt += 1;
+        self.mark_graph_dirty();
+        Ok(())
+    }
+
+    /// Return and clear the graph dirty bit for durable execution.
+    ///
+    /// Only action nodes and their retry parameters must be persisted; other
+    /// nodes are deterministic from the ground-truth DAG definition.
+    pub fn consume_graph_dirty_for_durable_execution(&mut self) -> bool {
+        let dirty = self.graph_dirty;
+        self.graph_dirty = false;
+        dirty
+    }
+
+    pub fn add_edge(&mut self, source: Uuid, target: Uuid, edge_type: EdgeType) {
+        self.register_edge(ExecutionEdge {
+            source,
+            target,
+            edge_type,
+        });
+    }
+
+    /// Insert a node into the runtime bookkeeping and optional control flow.
+    ///
+    /// Use this for all queued nodes so the ready queue, timeline, and implicit
+    /// state-machine edge ordering remain consistent.
+    ///
+    /// Example:
+    /// - queue node A then node B with link_queued_nodes=True
+    ///   This creates a state-machine edge A -> B automatically.
+    fn register_node(&mut self, node: ExecutionNode) -> Result<(), RunnerStateError> {
+        if self.nodes.contains_key(&node.node_id) {
+            return Err(RunnerStateError(format!(
+                "execution node already queued: {}",
+                node.node_id
+            )));
+        }
+        self.nodes.insert(node.node_id, node.clone());
+        self.ready_queue.push(node.node_id);
+        if node.is_action_call() {
+            self.mark_graph_dirty();
+        }
+        if self.link_queued_nodes
+            && let Some(last) = self.timeline.last()
+        {
+            self.register_edge(ExecutionEdge {
+                source: *last,
+                target: node.node_id,
+                edge_type: EdgeType::StateMachine,
+            });
+        }
+        self.timeline.push(node.node_id);
+        Ok(())
+    }
+
+    fn register_edge(&mut self, edge: ExecutionEdge) {
+        self.edges.insert(edge);
+    }
+
+    fn mark_graph_dirty(&mut self) {
+        self.graph_dirty = true;
+    }
+
+    /// Rebuild derived structures from persisted nodes and edges.
+    ///
+    /// Use this when loading a snapshot so timeline ordering, latest assignment
+    /// tracking, and ready queue reflect the current node set.
+    ///
+    /// Example:
+    /// - Given nodes {A, B} and edge A -> B, rehydration restores timeline
+    ///   [A, B] and marks the latest assignment targets from node B.
+    fn rehydrate_state(&mut self) {
+        self.timeline = self.build_timeline();
+        self.latest_assignments.clear();
+        for node_id in &self.timeline {
+            if let Some(node) = self.nodes.get(node_id) {
+                for target in node.assignments.keys() {
+                    self.latest_assignments.insert(target.clone(), *node_id);
+                }
+            }
+        }
+        if self.ready_queue.is_empty() {
+            self.ready_queue = self
+                .timeline
+                .iter()
+                .filter(|node_id| {
+                    self.nodes
+                        .get(node_id)
+                        .map(|node| node.status == NodeStatus::Queued)
+                        .unwrap_or(false)
+                })
+                .cloned()
+                .collect();
+        }
+    }
+
+    fn build_timeline(&self) -> Vec<Uuid> {
+        if self.edges.is_empty() {
+            return self.nodes.keys().cloned().collect();
+        }
+        let mut adjacency: HashMap<Uuid, Vec<Uuid>> = self
+            .nodes
+            .keys()
+            .map(|node_id| (*node_id, Vec::new()))
+            .collect();
+        let mut in_degree: HashMap<Uuid, usize> =
+            self.nodes.keys().map(|node_id| (*node_id, 0)).collect();
+        let mut edges: Vec<&ExecutionEdge> = self.edges.iter().collect();
+        edges.sort_by_key(|edge| (edge.source, edge.target));
+        for edge in edges {
+            if edge.edge_type != EdgeType::StateMachine {
+                continue;
+            }
+            if adjacency.contains_key(&edge.source) && adjacency.contains_key(&edge.target) {
+                adjacency.entry(edge.source).or_default().push(edge.target);
+                *in_degree.entry(edge.target).or_insert(0) += 1;
+            }
+        }
+        let mut queue: Vec<Uuid> = in_degree
+            .iter()
+            .filter(|(_, degree)| **degree == 0)
+            .map(|(node_id, _)| *node_id)
+            .collect();
+        queue.sort_by_key(|id| id.to_string());
+        let mut order: Vec<Uuid> = Vec::new();
+        while !queue.is_empty() {
+            let node_id = queue.remove(0);
+            order.push(node_id);
+            if let Some(neighbors) = adjacency.get(&node_id) {
+                let mut sorted = neighbors.clone();
+                sorted.sort_by_key(|id| id.to_string());
+                for neighbor in sorted {
+                    if let Some(degree) = in_degree.get_mut(&neighbor) {
+                        *degree -= 1;
+                        if *degree == 0 {
+                            queue.push(neighbor);
+                        }
+                    }
+                }
+                queue.sort_by_key(|id| id.to_string());
+            }
+        }
+        let mut remaining: Vec<Uuid> = self
+            .nodes
+            .keys()
+            .filter(|node_id| !order.contains(node_id))
+            .cloned()
+            .collect();
+        remaining.sort_by_key(|id| id.to_string());
+        order.extend(remaining);
+        order
+    }
+
+    fn get_node_mut(&mut self, node_id: Uuid) -> Result<&mut ExecutionNode, RunnerStateError> {
+        self.nodes
+            .get_mut(&node_id)
+            .ok_or_else(|| RunnerStateError(format!("execution node not found: {node_id}")))
+    }
+
+    fn node_targets(&self, node: &DAGNode) -> Vec<String> {
+        match node {
+            DAGNode::Assignment(AssignmentNode {
+                targets, target, ..
+            }) => {
+                if !targets.is_empty() {
+                    return targets.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            DAGNode::ActionCall(ActionCallNode {
+                targets, target, ..
+            }) => {
+                if let Some(list) = targets
+                    && !list.is_empty()
+                {
+                    return list.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            DAGNode::FnCall(FnCallNode {
+                targets, target, ..
+            }) => {
+                if let Some(list) = targets
+                    && !list.is_empty()
+                {
+                    return list.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            DAGNode::Join(JoinNode {
+                targets, target, ..
+            }) => {
+                if let Some(list) = targets
+                    && !list.is_empty()
+                {
+                    return list.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            DAGNode::Aggregator(AggregatorNode {
+                targets, target, ..
+            }) => {
+                if let Some(list) = targets
+                    && !list.is_empty()
+                {
+                    return list.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            DAGNode::Return(ReturnNode {
+                targets, target, ..
+            }) => {
+                if let Some(list) = targets
+                    && !list.is_empty()
+                {
+                    return list.clone();
+                }
+                target.clone().map(|item| vec![item]).unwrap_or_default()
+            }
+            _ => Vec::new(),
+        }
+    }
+
+    /// Apply DAG template semantics to a queued execution node.
+    ///
+    /// Use this right after queue_template_node so assignments, action result
+    /// references, and data-flow edges are populated from the template.
+    ///
+    /// Example IR:
+    /// - total = @sum(values=items)
+    ///   The ActionCallNode template produces an ActionResultValue and defines
+    ///   total via assignments on the execution node.
+    fn apply_template_node(
+        &mut self,
+        exec_node: &ExecutionNode,
+        template: &DAGNode,
+        iteration_index: Option<i32>,
+    ) -> Result<(), RunnerStateError> {
+        match template {
+            DAGNode::Assignment(AssignmentNode {
+                assign_expr: Some(expr),
+                ..
+            }) => {
+                let value_expr = self.expr_to_value(expr, None)?;
+                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
+                    node_mut.value_expr = Some(value_expr.clone());
+                }
+                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
+                let assignments =
+                    self.build_assignments(&self.node_targets(template), &value_expr)?;
+                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
+                    node.assignments.extend(assignments.clone());
+                }
+                self.mark_latest_assignments(exec_node.node_id, &assignments);
+                return Ok(());
+            }
+            DAGNode::ActionCall(ActionCallNode {
+                action_name,
+                targets,
+                target,
+                ..
+            }) => {
+                let kwarg_values = self
+                    .nodes
+                    .get(&exec_node.node_id)
+                    .and_then(|node| node.action.as_ref())
+                    .map(|action| action.kwargs.values().cloned().collect::<Vec<_>>())
+                    .unwrap_or_default();
+                for expr in &kwarg_values {
+                    self.record_data_flow_from_value(exec_node.node_id, expr);
+                }
+                let targets = targets
+                    .clone()
+                    .or_else(|| target.clone().map(|item| vec![item]));
+                let result = self.assign_action_results(
+                    exec_node,
+                    action_name,
+                    targets.as_deref(),
+                    iteration_index,
+                    true,
+                )?;
+                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
+                    node_mut.value_expr = Some(ValueExpr::ActionResult(result));
+                }
+                return Ok(());
+            }
+            DAGNode::Sleep(SleepNode {
+                duration_expr: Some(expr),
+                ..
+            }) => {
+                let value_expr = self.expr_to_value(expr, None)?;
+                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
+                    node_mut.value_expr = Some(value_expr.clone());
+                }
+                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
+                return Ok(());
+            }
+            DAGNode::FnCall(FnCallNode {
+                assign_expr: Some(expr),
+                ..
+            }) => {
+                let value_expr = self.expr_to_value(expr, None)?;
+                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
+                    node_mut.value_expr = Some(value_expr.clone());
+                }
+                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
+                let assignments =
+                    self.build_assignments(&self.node_targets(template), &value_expr)?;
+                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
+                    node.assignments.extend(assignments.clone());
+                }
+                self.mark_latest_assignments(exec_node.node_id, &assignments);
+                return Ok(());
+            }
+            DAGNode::Return(ReturnNode {
+                assign_expr: Some(expr),
+                target,
+                ..
+            }) => {
+                let value_expr = self.expr_to_value(expr, None)?;
+                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
+                    node_mut.value_expr = Some(value_expr.clone());
+                }
+                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
+                let target = target.clone().unwrap_or_else(|| "result".to_string());
+                let assignments = self.build_assignments(&[target], &value_expr)?;
+                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
+                    node.assignments.extend(assignments.clone());
+                }
+                self.mark_latest_assignments(exec_node.node_id, &assignments);
+                return Ok(());
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    /// Create symbolic action results and map them to targets.
+    ///
+    /// Use this when an action produces one or more results that are assigned
+    /// to variables (including tuple unpacking).
+    ///
+    /// `update_latest` controls whether assigned targets are published into
+    /// `latest_assignments` for downstream variable/data-flow resolution.
+    ///
+    /// Use `update_latest = true` for user-visible assignments so later nodes
+    /// can resolve those target names through `latest_assignments`.
+    ///
+    /// Use `update_latest = false` for internal/synthetic bindings that should
+    /// not become globally visible variable definitions. Example: spread action
+    /// unroll nodes can bind an internal `_spread_result`, and the aggregator
+    /// later publishes the final user target.
+    ///
+    /// Example IR:
+    /// - a, b = @pair()
+    ///   This yields ActionResultValue(node_id, result_index=0/1) for a and b.
+    ///
+    /// TODO: make this `pub(crate)` again
+    pub fn assign_action_results(
+        &mut self,
+        node: &ExecutionNode,
+        action_name: &str,
+        targets: Option<&[String]>,
+        iteration_index: Option<i32>,
+        update_latest: bool,
+    ) -> Result<ActionResultValue, RunnerStateError> {
+        let result_ref = ActionResultValue {
+            node_id: node.node_id,
+            action_name: action_name.to_string(),
+            iteration_index,
+            result_index: None,
+        };
+        let targets = targets.unwrap_or(&[]);
+        let assignments =
+            self.build_assignments(targets, &ValueExpr::ActionResult(result_ref.clone()))?;
+        if !assignments.is_empty() {
+            if let Some(node) = self.nodes.get_mut(&node.node_id) {
+                node.assignments.extend(assignments.clone());
+            }
+            if update_latest {
+                self.mark_latest_assignments(node.node_id, &assignments);
+            }
+        }
+        Ok(result_ref)
+    }
+
+    /// Expand an assignment into per-target symbolic values.
+    ///
+    /// Use this for single-target assignments, tuple unpacking, and action
+    /// multi-result binding to keep definitions explicit.
+    ///
+    /// Example IR:
+    /// - a, b = [1, 2]
+    ///   Produces {"a": LiteralValue(1), "b": LiteralValue(2)}.
+    fn build_assignments(
+        &self,
+        targets: &[String],
+        value: &ValueExpr,
+    ) -> Result<HashMap<String, ValueExpr>, RunnerStateError> {
+        if targets.is_empty() {
+            return Ok(HashMap::new());
+        }
+        if targets.len() == 1 {
+            let mut map = HashMap::new();
+            // Keep single-target assignments symbolic to avoid recursively
+            // embedding prior values into each update (which can explode
+            // persisted runner_instances.state size/depth in loops).
+            map.insert(targets[0].clone(), value.clone());
+            return Ok(map);
+        }
+        let value = self.materialize_value(value.clone());
+
+        match value {
+            ValueExpr::List(ListValue { elements }) => {
+                if elements.len() != targets.len() {
+                    return Err(RunnerStateError("tuple unpacking mismatch".to_string()));
+                }
+                let mut map = HashMap::new();
+                for (target, item) in targets.iter().zip(elements.into_iter()) {
+                    map.insert(target.clone(), item);
+                }
+                Ok(map)
+            }
+            ValueExpr::ActionResult(action_value) => {
+                let mut map = HashMap::new();
+                for (idx, target) in targets.iter().enumerate() {
+                    map.insert(
+                        target.clone(),
+                        ValueExpr::ActionResult(ActionResultValue {
+                            node_id: action_value.node_id,
+                            action_name: action_value.action_name.clone(),
+                            iteration_index: action_value.iteration_index,
+                            result_index: Some(idx as i32),
+                        }),
+                    );
+                }
+                Ok(map)
+            }
+            ValueExpr::FunctionCall(func_value) => {
+                let mut map = HashMap::new();
+                for (idx, target) in targets.iter().enumerate() {
+                    map.insert(
+                        target.clone(),
+                        ValueExpr::Index(IndexValue {
+                            object: Box::new(ValueExpr::FunctionCall(func_value.clone())),
+                            index: Box::new(ValueExpr::Literal(LiteralValue {
+                                value: serde_json::Value::Number((idx as i64).into()),
+                            })),
+                        }),
+                    );
+                }
+                Ok(map)
+            }
+            ValueExpr::Index(index_value) => {
+                let mut map = HashMap::new();
+                for (idx, target) in targets.iter().enumerate() {
+                    map.insert(
+                        target.clone(),
+                        ValueExpr::Index(IndexValue {
+                            object: Box::new(ValueExpr::Index(index_value.clone())),
+                            index: Box::new(ValueExpr::Literal(LiteralValue {
+                                value: serde_json::Value::Number((idx as i64).into()),
+                            })),
+                        }),
+                    );
+                }
+                Ok(map)
+            }
+            _ => Err(RunnerStateError("tuple unpacking mismatch".to_string())),
+        }
+    }
+
+    /// Inline variable references and apply light constant folding.
+    ///
+    /// Use this before storing assignments so values are self-contained and
+    /// list concatenations are simplified.
+    ///
+    /// Example IR:
+    /// - xs = [1]
+    /// - ys = xs + [2]
+    ///   Materialization turns ys into ListValue([1, 2]) rather than keeping xs.
+    pub fn materialize_value(&self, value: ValueExpr) -> ValueExpr {
+        let resolved = resolve_value_tree(&value, &|name, seen| {
+            self.resolve_variable_value(name, seen)
+        });
+        if let ValueExpr::BinaryOp(BinaryOpValue { left, op, right }) = &resolved
+            && ir::BinaryOperator::try_from(*op).ok() == Some(ir::BinaryOperator::BinaryOpAdd)
+            && let (ValueExpr::List(left_list), ValueExpr::List(right_list)) = (&**left, &**right)
+        {
+            let mut elements = left_list.elements.clone();
+            elements.extend(right_list.elements.clone());
+            return ValueExpr::List(ListValue { elements });
+        }
+        resolved
+    }
+
+    /// Resolve a variable name to its latest symbolic definition.
+    ///
+    /// Use this when materializing expressions so variables become their
+    /// defining expression while guarding against cycles.
+    ///
+    /// Example IR:
+    /// - x = 1
+    /// - y = x + 2
+    ///   When materializing y, the VariableValue("x") is replaced with the
+    ///   LiteralValue(1), yielding a BinaryOpValue(1 + 2) instead of a reference
+    ///   to x. This makes downstream replay use the symbolic expression rather
+    ///   than requiring a separate variable lookup.
+    fn resolve_variable_value(&self, name: &str, seen: &mut HashSet<String>) -> ValueExpr {
+        if seen.contains(name) {
+            return ValueExpr::Variable(VariableValue {
+                name: name.to_string(),
+            });
+        }
+        let node_id = match self.latest_assignments.get(name) {
+            Some(node_id) => *node_id,
+            None => {
+                return ValueExpr::Variable(VariableValue {
+                    name: name.to_string(),
+                });
+            }
+        };
+        let node = match self.nodes.get(&node_id) {
+            Some(node) => node,
+            None => {
+                return ValueExpr::Variable(VariableValue {
+                    name: name.to_string(),
+                });
+            }
+        };
+        let assigned = match node.assignments.get(name) {
+            Some(value) => value.clone(),
+            None => {
+                return ValueExpr::Variable(VariableValue {
+                    name: name.to_string(),
+                });
+            }
+        };
+        // Avoid inlining self-referential updates such as `i = i + 1`.
+        // Returning the raw assignment here would inject one "extra step"
+        // into materialized consumers (e.g. loop guards), causing off-by-one
+        // behavior and deep recursive expression trees.
+        if value_expr_contains_variable(&assigned, name) {
+            return ValueExpr::Variable(VariableValue {
+                name: name.to_string(),
+            });
+        }
+        if let ValueExpr::Variable(var) = &assigned {
+            seen.insert(name.to_string());
+            return self.resolve_variable_value(&var.name, seen);
+        }
+        assigned
+    }
+
+    /// TODO: make this `pub(crate)` again
+    pub fn mark_latest_assignments(
+        &mut self,
+        node_id: Uuid,
+        assignments: &HashMap<String, ValueExpr>,
+    ) {
+        for target in assignments.keys() {
+            self.latest_assignments.insert(target.clone(), node_id);
+        }
+    }
+
+    /// Add data-flow edges implied by a value expression.
+    ///
+    /// Use this when a node consumes an expression so upstream dependencies are
+    /// encoded in the runtime graph.
+    ///
+    /// Example IR:
+    /// - total = @sum(values)
+    ///   A data-flow edge is added from the values assignment node to the action.
+    ///
+    /// TODO: make this `pub(crate)` again
+    pub fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) {
+        let source_ids =
+            collect_value_sources(value, &|name| self.latest_assignments.get(name).copied());
+        self.record_data_flow_edges(node_id, &source_ids);
+    }
+
+    /// Register data-flow edges from sources to the given node.
+    ///
+    /// Example:
+    /// - sources {A, B} and node C produce edges A -> C and B -> C.
+    fn record_data_flow_edges(&mut self, node_id: Uuid, source_ids: &HashSet<Uuid>) {
+        for source_id in source_ids {
+            if *source_id == node_id {
+                continue;
+            }
+            self.register_edge(ExecutionEdge {
+                source: *source_id,
+                target: node_id,
+                edge_type: EdgeType::DataFlow,
+            });
+        }
+    }
+
+    /// Convert an IR expression into a symbolic ValueExpr tree.
+    ///
+    /// Use this when interpreting IR statements or DAG templates into the
+    /// runtime state; it queues actions and spreads as needed.
+    ///
+    /// Example IR:
+    /// - total = base + 1
+    ///   Produces BinaryOpValue(VariableValue("base"), LiteralValue(1)).
+    pub fn expr_to_value(
+        &mut self,
+        expr: &ir::Expr,
+        local_scope: Option<&HashMap<String, ValueExpr>>,
+    ) -> Result<ValueExpr, RunnerStateError> {
+        match expr.kind.as_ref() {
+            Some(ir::expr::Kind::Literal(lit)) => Ok(ValueExpr::Literal(LiteralValue {
+                value: literal_value(lit),
+            })),
+            Some(ir::expr::Kind::Variable(var)) => {
+                if let Some(scope) = local_scope
+                    && let Some(value) = scope.get(&var.name)
+                {
+                    return Ok(value.clone());
+                }
+                Ok(ValueExpr::Variable(VariableValue {
+                    name: var.name.clone(),
+                }))
+            }
+            Some(ir::expr::Kind::BinaryOp(op)) => {
+                let left = op
+                    .left
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("binary op missing left".to_string()))?;
+                let right = op
+                    .right
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("binary op missing right".to_string()))?;
+                let left_value = self.expr_to_value(left, local_scope)?;
+                let right_value = self.expr_to_value(right, local_scope)?;
+                Ok(self.binary_op_value(op.op, left_value, right_value))
+            }
+            Some(ir::expr::Kind::UnaryOp(op)) => {
+                let operand = op
+                    .operand
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("unary op missing operand".to_string()))?;
+                let operand_value = self.expr_to_value(operand, local_scope)?;
+                Ok(self.unary_op_value(op.op, operand_value))
+            }
+            Some(ir::expr::Kind::List(list)) => {
+                let elements = list
+                    .elements
+                    .iter()
+                    .map(|item| self.expr_to_value(item, local_scope))
+                    .collect::<Result<Vec<ValueExpr>, RunnerStateError>>()?;
+                Ok(ValueExpr::List(ListValue { elements }))
+            }
+            Some(ir::expr::Kind::Dict(dict_expr)) => {
+                let mut entries = Vec::new();
+                for entry in &dict_expr.entries {
+                    let key_expr = entry
+                        .key
+                        .as_ref()
+                        .ok_or_else(|| RunnerStateError("dict entry missing key".to_string()))?;
+                    let value_expr = entry
+                        .value
+                        .as_ref()
+                        .ok_or_else(|| RunnerStateError("dict entry missing value".to_string()))?;
+                    entries.push(DictEntryValue {
+                        key: self.expr_to_value(key_expr, local_scope)?,
+                        value: self.expr_to_value(value_expr, local_scope)?,
+                    });
+                }
+                Ok(ValueExpr::Dict(DictValue { entries }))
+            }
+            Some(ir::expr::Kind::Index(index)) => {
+                let object = index
+                    .object
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("index access missing object".to_string()))?;
+                let index_expr = index
+                    .index
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("index access missing index".to_string()))?;
+                let object_value = self.expr_to_value(object, local_scope)?;
+                let index_value = self.expr_to_value(index_expr, local_scope)?;
+                Ok(self.index_value(object_value, index_value))
+            }
+            Some(ir::expr::Kind::Dot(dot)) => {
+                let object = dot
+                    .object
+                    .as_ref()
+                    .ok_or_else(|| RunnerStateError("dot access missing object".to_string()))?;
+                Ok(ValueExpr::Dot(DotValue {
+                    object: Box::new(self.expr_to_value(object, local_scope)?),
+                    attribute: dot.attribute.clone(),
+                }))
+            }
+            Some(ir::expr::Kind::FunctionCall(call)) => {
+                let args = call
+                    .args
+                    .iter()
+                    .map(|arg| self.expr_to_value(arg, local_scope))
+                    .collect::<Result<Vec<ValueExpr>, RunnerStateError>>()?;
+                let mut kwargs = HashMap::new();
+                for kw in &call.kwargs {
+                    if let Some(value) = &kw.value {
+                        kwargs.insert(kw.name.clone(), self.expr_to_value(value, local_scope)?);
+                    }
+                }
+                let global_fn = if call.global_function != 0 {
+                    Some(call.global_function)
+                } else {
+                    None
+                };
+                Ok(ValueExpr::FunctionCall(FunctionCallValue {
+                    name: call.name.clone(),
+                    args,
+                    kwargs,
+                    global_function: global_fn,
+                }))
+            }
+            Some(ir::expr::Kind::ActionCall(action)) => {
+                let result = self.queue_action_call(action, None, None, local_scope)?;
+                Ok(ValueExpr::ActionResult(result))
+            }
+            Some(ir::expr::Kind::ParallelExpr(parallel)) => {
+                let mut calls = Vec::new();
+                for call in &parallel.calls {
+                    calls.push(self.call_to_value(call, local_scope)?);
+                }
+                Ok(ValueExpr::List(ListValue { elements: calls }))
+            }
+            Some(ir::expr::Kind::SpreadExpr(spread)) => self.spread_expr_value(spread, local_scope),
+            None => Ok(ValueExpr::Literal(LiteralValue {
+                value: serde_json::Value::Null,
+            })),
+        }
+    }
+
+    /// Convert an IR call (action/function) into a ValueExpr.
+    ///
+    /// Use this for parallel expressions that contain mixed call types.
+    ///
+    /// Example IR:
+    /// - parallel { @double(x), helper(x) }
+    ///   Action calls become ActionResultValue nodes; function calls become
+    ///   FunctionCallValue expressions.
+    fn call_to_value(
+        &mut self,
+        call: &ir::Call,
+        local_scope: Option<&HashMap<String, ValueExpr>>,
+    ) -> Result<ValueExpr, RunnerStateError> {
+        match call.kind.as_ref() {
+            Some(ir::call::Kind::Action(action)) => Ok(ValueExpr::ActionResult(
+                self.queue_action_call(action, None, None, local_scope)?,
+            )),
+            Some(ir::call::Kind::Function(function)) => self.expr_to_value(
+                &ir::Expr {
+                    kind: Some(ir::expr::Kind::FunctionCall(function.clone())),
+                    span: None,
+                },
+                local_scope,
+            ),
+            None => Ok(ValueExpr::Literal(LiteralValue {
+                value: serde_json::Value::Null,
+            })),
+        }
+    }
+
+    /// Materialize a spread expression into concrete calls or a symbolic spread.
+    ///
+    /// Use this when converting IR spreads so known list collections unroll to
+    /// explicit action calls, while unknown collections stay symbolic.
+    ///
+    /// Example IR:
+    /// - spread [1, 2]:item -> @double(value=item)
+    ///   Produces a ListValue of ActionResultValue entries for each item.
+    fn spread_expr_value(
+        &mut self,
+        spread: &ir::SpreadExpr,
+        local_scope: Option<&HashMap<String, ValueExpr>>,
+    ) -> Result<ValueExpr, RunnerStateError> {
+        let collection = self.expr_to_value(
+            spread
+                .collection
+                .as_ref()
+                .ok_or_else(|| RunnerStateError("spread collection missing".to_string()))?,
+            local_scope,
+        )?;
+        if let ValueExpr::List(list) = &collection {
+            let mut results = Vec::new();
+            for (idx, item) in list.elements.iter().enumerate() {
+                let mut scope = HashMap::new();
+                scope.insert(spread.loop_var.clone(), item.clone());
+                let result = self.queue_action_call(
+                    spread
+                        .action
+                        .as_ref()
+                        .ok_or_else(|| RunnerStateError("spread action missing".to_string()))?,
+                    None,
+                    Some(idx as i32),
+                    Some(&scope),
+                )?;
+                results.push(ValueExpr::ActionResult(result));
+            }
+            return Ok(ValueExpr::List(ListValue { elements: results }));
+        }
+
+        let action_spec = self.action_spec_from_ir(
+            spread
+                .action
+                .as_ref()
+                .ok_or_else(|| RunnerStateError("spread action missing".to_string()))?,
+            None,
+        );
+        Ok(ValueExpr::Spread(SpreadValue {
+            collection: Box::new(collection),
+            loop_var: spread.loop_var.clone(),
+            action: action_spec,
+        }))
+    }
+
+    /// Build a binary-op value with simple constant folding.
+    ///
+    /// Use this when converting IR so literals and list concatenations are
+    /// simplified early.
+    ///
+    /// Example IR:
+    /// - total = 1 + 2
+    ///   Produces LiteralValue(3) instead of a BinaryOpValue.
+    fn binary_op_value(&self, op: i32, left: ValueExpr, right: ValueExpr) -> ValueExpr {
+        if ir::BinaryOperator::try_from(op).ok() == Some(ir::BinaryOperator::BinaryOpAdd)
+            && let (ValueExpr::List(left_list), ValueExpr::List(right_list)) = (&left, &right)
+        {
+            let mut elements = left_list.elements.clone();
+            elements.extend(right_list.elements.clone());
+            return ValueExpr::List(ListValue { elements });
+        }
+        if let (ValueExpr::Literal(left_val), ValueExpr::Literal(right_val)) = (&left, &right)
+            && let Some(folded) = fold_literal_binary(op, &left_val.value, &right_val.value)
+        {
+            return ValueExpr::Literal(LiteralValue { value: folded });
+        }
+        ValueExpr::BinaryOp(BinaryOpValue {
+            left: Box::new(left),
+            op,
+            right: Box::new(right),
+        })
+    }
+
+    /// Build a unary-op value with constant folding for literals.
+    ///
+    /// Example IR:
+    /// - neg = -1
+    ///   Produces LiteralValue(-1) instead of UnaryOpValue.
+    fn unary_op_value(&self, op: i32, operand: ValueExpr) -> ValueExpr {
+        if let ValueExpr::Literal(lit) = &operand
+            && let Some(folded) = fold_literal_unary(op, &lit.value)
+        {
+            return ValueExpr::Literal(LiteralValue { value: folded });
+        }
+        ValueExpr::UnaryOp(UnaryOpValue {
+            op,
+            operand: Box::new(operand),
+        })
+    }
+
+    /// Build an index value, folding list literals when possible.
+    ///
+    /// Example IR:
+    /// - first = [10, 20][0]
+    ///   Produces LiteralValue(10) when the list is fully literal.
+    fn index_value(&self, object: ValueExpr, index: ValueExpr) -> ValueExpr {
+        if let (ValueExpr::List(list), ValueExpr::Literal(idx)) = (&object, &index)
+            && let Some(idx) = idx.value.as_i64()
+            && idx >= 0
+            && (idx as usize) < list.elements.len()
+        {
+            return list.elements[idx as usize].clone();
+        }
+        ValueExpr::Index(IndexValue {
+            object: Box::new(object),
+            index: Box::new(index),
+        })
+    }
+
+    /// Extract an action call spec from a DAG node.
+    ///
+    /// Use this when queueing nodes from the DAG template.
+    ///
+    /// Example:
+    /// - ActionCallNode(action_name="double", kwargs={"value": "$x"})
+    ///   Produces ActionCallSpec(action_name="double", kwargs={"value": VariableValue("x")}).
+    fn action_spec_from_node(&mut self, node: &ActionCallNode) -> ActionCallSpec {
+        let kwargs = node
+            .kwarg_exprs
+            .iter()
+            .map(|(name, expr)| (name.clone(), self.expr_to_value(expr, None).unwrap()))
+            .collect();
+        ActionCallSpec {
+            action_name: node.action_name.clone(),
+            module_name: node.module_name.clone(),
+            kwargs,
+        }
+    }
+
+    /// Extract an action call spec from IR, applying local scope bindings.
+    ///
+    /// Example IR:
+    /// - @double(value=item) with local_scope["item"]=LiteralValue(2)
+    ///   Produces kwargs {"value": LiteralValue(2)}.
+    fn action_spec_from_ir(
+        &mut self,
+        action: &ir::ActionCall,
+        local_scope: Option<&HashMap<String, ValueExpr>>,
+    ) -> ActionCallSpec {
+        let kwargs = action
+            .kwargs
+            .iter()
+            .filter_map(|kw| kw.value.as_ref().map(|value| (kw.name.clone(), value)))
+            .map(|(name, value)| (name, self.expr_to_value(value, local_scope).unwrap()))
+            .collect();
+        ActionCallSpec {
+            action_name: action.action_name.clone(),
+            module_name: action.module_name.clone(),
+            kwargs,
+        }
+    }
+
+    /// Queue an action call from raw parameters and return a symbolic result.
+    ///
+    /// Use this when constructing runner state programmatically without IR
+    /// objects, while still wiring data-flow edges and assignments.
+    ///
+    /// Example:
+    /// - queue_action("double", targets=["out"], kwargs={"value": LiteralValue(2)})
+    ///   Defines out via an ActionResultValue and records data-flow from the literal.
+    pub fn queue_action(
+        &mut self,
+        action_name: &str,
+        targets: Option<Vec<String>>,
+        kwargs: Option<HashMap<String, ValueExpr>>,
+        module_name: Option<String>,
+        iteration_index: Option<i32>,
+    ) -> Result<ActionResultValue, RunnerStateError> {
+        let spec = ActionCallSpec {
+            action_name: action_name.to_string(),
+            module_name,
+            kwargs: kwargs.unwrap_or_default(),
+        };
+        let node = self.queue_node(
+            ExecutionNodeType::ActionCall.as_str(),
+            &format!("@{}()", spec.action_name),
+            QueueNodeParams {
+                targets: targets.clone(),
+                action: Some(spec.clone()),
+                ..QueueNodeParams::default()
+            },
+        )?;
+        for value in spec.kwargs.values() {
+            self.record_data_flow_from_value(node.node_id, value);
+        }
+        let result = self.assign_action_results(
+            &node,
+            &spec.action_name,
+            targets.as_deref(),
+            iteration_index,
+            true,
+        )?;
+        if let Some(node) = self.nodes.get_mut(&node.node_id) {
+            node.value_expr = Some(ValueExpr::ActionResult(result.clone()));
+        }
+        Ok(result)
+    }
+
+    /// Record an IR assignment as a runtime node with symbolic values.
+    ///
+    /// Use this when interpreting IR statements into the unrolled runtime graph.
+    ///
+    /// Example IR:
+    /// - results = []
+    ///   Produces an assignment node with targets ["results"] and a ListValue([]).
+    pub fn record_assignment(
+        &mut self,
+        targets: Vec<String>,
+        expr: &ir::Expr,
+        node_id: Option<Uuid>,
+        label: Option<String>,
+    ) -> Result<ExecutionNode, RunnerStateError> {
+        let value_expr = self.expr_to_value(expr, None)?;
+        self.record_assignment_value(targets, value_expr, node_id, label)
+    }
+
+    /// Record a symbolic assignment node and update data-flow/definitions.
+    ///
+    /// Use this for assignments created programmatically after ValueExpr
+    /// construction (tests or state rewrites).
+    ///
+    /// Example:
+    /// - record_assignment_value(targets=["x"], value_expr=LiteralValue(1))
+    ///   Creates an assignment node with x bound to LiteralValue(1).
+    pub fn record_assignment_value(
+        &mut self,
+        targets: Vec<String>,
+        value_expr: ValueExpr,
+        node_id: Option<Uuid>,
+        label: Option<String>,
+    ) -> Result<ExecutionNode, RunnerStateError> {
+        let exec_node_id = node_id.unwrap_or_else(Uuid::new_v4);
+        let node = self.queue_node(
+            "assignment",
+            label.as_deref().unwrap_or("assignment"),
+            QueueNodeParams {
+                node_id: Some(exec_node_id),
+                targets: Some(targets.clone()),
+                value_expr: Some(value_expr.clone()),
+                ..QueueNodeParams::default()
+            },
+        )?;
+        self.record_data_flow_from_value(exec_node_id, &value_expr);
+        let assignments = self.build_assignments(&targets, &value_expr)?;
+        if let Some(node_mut) = self.nodes.get_mut(&node.node_id) {
+            node_mut.assignments.extend(assignments.clone());
+        }
+        self.mark_latest_assignments(node.node_id, &assignments);
+        Ok(node)
+    }
+}
+
+/// Render a ValueExpr to a python-like string for debugging/visualization.
+///
+/// Example:
+/// - BinaryOpValue(VariableValue("a"), +, LiteralValue(1)) -> "a + 1"
+pub fn format_value(expr: &ValueExpr) -> String {
+    format_value_inner(expr, 0)
+}
+
+/// Recursive ValueExpr formatter with operator precedence handling.
+///
+/// Example:
+/// - (a + b) * c renders with parentheses when needed.
+fn format_value_inner(expr: &ValueExpr, parent_prec: i32) -> String {
+    match expr {
+        ValueExpr::Literal(lit) => format_literal(&lit.value),
+        ValueExpr::Variable(var) => var.name.clone(),
+        ValueExpr::ActionResult(value) => value.label(),
+        ValueExpr::BinaryOp(value) => {
+            let (op_str, prec) = binary_operator(value.op);
+            let left = format_value_inner(&value.left, prec);
+            let right = format_value_inner(&value.right, prec + 1);
+            let rendered = format!("{left} {op_str} {right}");
+            if prec < parent_prec {
+                format!("({rendered})")
+            } else {
+                rendered
+            }
+        }
+        ValueExpr::UnaryOp(value) => {
+            let (op_str, prec) = unary_operator(value.op);
+            let operand = format_value_inner(&value.operand, prec);
+            let rendered = format!("{op_str}{operand}");
+            if prec < parent_prec {
+                format!("({rendered})")
+            } else {
+                rendered
+            }
+        }
+        ValueExpr::List(value) => {
+            let items: Vec<String> = value
+                .elements
+                .iter()
+                .map(|item| format_value_inner(item, 0))
+                .collect();
+            format!("[{}]", items.join(", "))
+        }
+        ValueExpr::Dict(value) => {
+            let entries: Vec<String> = value
+                .entries
+                .iter()
+                .map(|entry| {
+                    format!(
+                        "{}: {}",
+                        format_value_inner(&entry.key, 0),
+                        format_value_inner(&entry.value, 0)
+                    )
+                })
+                .collect();
+            format!("{{{}}}", entries.join(", "))
+        }
+        ValueExpr::Index(value) => {
+            let prec = precedence("index");
+            let obj = format_value_inner(&value.object, prec);
+            let idx = format_value_inner(&value.index, 0);
+            let rendered = format!("{obj}[{idx}]");
+            if prec < parent_prec {
+                format!("({rendered})")
+            } else {
+                rendered
+            }
+        }
+        ValueExpr::Dot(value) => {
+            let prec = precedence("dot");
+            let obj = format_value_inner(&value.object, prec);
+            let rendered = format!("{obj}.{}", value.attribute);
+            if prec < parent_prec {
+                format!("({rendered})")
+            } else {
+                rendered
+            }
+        }
+        ValueExpr::FunctionCall(value) => {
+            let mut args: Vec<String> = value
+                .args
+                .iter()
+                .map(|arg| format_value_inner(arg, 0))
+                .collect();
+            for (name, val) in &value.kwargs {
+                args.push(format!("{name}={}", format_value_inner(val, 0)));
+            }
+            format!("{}({})", value.name, args.join(", "))
+        }
+        ValueExpr::Spread(value) => {
+            let collection = format_value_inner(&value.collection, 0);
+            let mut args: Vec<String> = Vec::new();
+            for (name, val) in &value.action.kwargs {
+                args.push(format!("{name}={}", format_value_inner(val, 0)));
+            }
+            let call = format!("@{}({})", value.action.action_name, args.join(", "));
+            format!("spread {collection}:{} -> {call}", value.loop_var)
+        }
+    }
+}
+
+fn value_expr_contains_variable(expr: &ValueExpr, name: &str) -> bool {
+    match expr {
+        ValueExpr::Variable(var) => var.name == name,
+        ValueExpr::BinaryOp(value) => {
+            value_expr_contains_variable(&value.left, name)
+                || value_expr_contains_variable(&value.right, name)
+        }
+        ValueExpr::UnaryOp(value) => value_expr_contains_variable(&value.operand, name),
+        ValueExpr::List(value) => value
+            .elements
+            .iter()
+            .any(|item| value_expr_contains_variable(item, name)),
+        ValueExpr::Dict(value) => value.entries.iter().any(|entry| {
+            value_expr_contains_variable(&entry.key, name)
+                || value_expr_contains_variable(&entry.value, name)
+        }),
+        ValueExpr::Index(value) => {
+            value_expr_contains_variable(&value.object, name)
+                || value_expr_contains_variable(&value.index, name)
+        }
+        ValueExpr::Dot(value) => value_expr_contains_variable(&value.object, name),
+        ValueExpr::FunctionCall(value) => {
+            value
+                .args
+                .iter()
+                .any(|arg| value_expr_contains_variable(arg, name))
+                || value
+                    .kwargs
+                    .values()
+                    .any(|kwarg| value_expr_contains_variable(kwarg, name))
+        }
+        ValueExpr::Spread(value) => {
+            value_expr_contains_variable(&value.collection, name)
+                || value
+                    .action
+                    .kwargs
+                    .values()
+                    .any(|kwarg| value_expr_contains_variable(kwarg, name))
+        }
+        ValueExpr::Literal(_) | ValueExpr::ActionResult(_) => false,
+    }
+}
+
+/// Map binary operator enums to (symbol, precedence) for formatting.
+fn binary_operator(op: i32) -> (&'static str, i32) {
+    match ir::BinaryOperator::try_from(op).ok() {
+        Some(ir::BinaryOperator::BinaryOpOr) => ("or", 10),
+        Some(ir::BinaryOperator::BinaryOpAnd) => ("and", 20),
+        Some(ir::BinaryOperator::BinaryOpEq) => ("==", 30),
+        Some(ir::BinaryOperator::BinaryOpNe) => ("!=", 30),
+        Some(ir::BinaryOperator::BinaryOpLt) => ("<", 30),
+        Some(ir::BinaryOperator::BinaryOpLe) => ("<=", 30),
+        Some(ir::BinaryOperator::BinaryOpGt) => (">", 30),
+        Some(ir::BinaryOperator::BinaryOpGe) => (">=", 30),
+        Some(ir::BinaryOperator::BinaryOpIn) => ("in", 30),
+        Some(ir::BinaryOperator::BinaryOpNotIn) => ("not in", 30),
+        Some(ir::BinaryOperator::BinaryOpAdd) => ("+", 40),
+        Some(ir::BinaryOperator::BinaryOpSub) => ("-", 40),
+        Some(ir::BinaryOperator::BinaryOpMul) => ("*", 50),
+        Some(ir::BinaryOperator::BinaryOpDiv) => ("/", 50),
+        Some(ir::BinaryOperator::BinaryOpFloorDiv) => ("//", 50),
+        Some(ir::BinaryOperator::BinaryOpMod) => ("%", 50),
+        _ => ("?", 0),
+    }
+}
+
+/// Map unary operator enums to (symbol, precedence) for formatting.
+fn unary_operator(op: i32) -> (&'static str, i32) {
+    match ir::UnaryOperator::try_from(op).ok() {
+        Some(ir::UnaryOperator::UnaryOpNeg) => ("-", 60),
+        Some(ir::UnaryOperator::UnaryOpNot) => ("not ", 60),
+        _ => ("?", 0),
+    }
+}
+
+/// Return precedence for non-operator constructs like index/dot.
+fn precedence(kind: &str) -> i32 {
+    match kind {
+        "index" | "dot" => 80,
+        _ => 0,
+    }
+}
+
+/// Format Python literals as source-like text.
+fn format_literal(value: &serde_json::Value) -> String {
+    match value {
+        serde_json::Value::Null => "None".to_string(),
+        serde_json::Value::Bool(value) => {
+            if *value {
+                "True".to_string()
+            } else {
+                "False".to_string()
+            }
+        }
+        serde_json::Value::String(value) => {
+            serde_json::to_string(value).unwrap_or_else(|_| format!("\"{value}\""))
+        }
+        _ => value.to_string(),
+    }
+}
+
+/// Convert an IR literal into a Python value.
+///
+/// Example IR:
+/// - Literal(int_value=3) -> 3
+pub fn literal_value(lit: &ir::Literal) -> serde_json::Value {
+    match lit.value.as_ref() {
+        Some(ir::literal::Value::IntValue(value)) => serde_json::Value::Number((*value).into()),
+        Some(ir::literal::Value::FloatValue(value)) => serde_json::Number::from_f64(*value)
+            .map(serde_json::Value::Number)
+            .unwrap_or(serde_json::Value::Null),
+        Some(ir::literal::Value::StringValue(value)) => serde_json::Value::String(value.clone()),
+        Some(ir::literal::Value::BoolValue(value)) => serde_json::Value::Bool(*value),
+        Some(ir::literal::Value::IsNone(_)) => serde_json::Value::Null,
+        None => serde_json::Value::Null,
+    }
+}
+
+/// Try to fold a literal binary operation to a concrete value.
+///
+/// Example:
+/// - (1, 2, BINARY_OP_ADD) -> 3
+fn fold_literal_binary(
+    op: i32,
+    left: &serde_json::Value,
+    right: &serde_json::Value,
+) -> Option<serde_json::Value> {
+    match ir::BinaryOperator::try_from(op).ok() {
+        Some(ir::BinaryOperator::BinaryOpAdd) => {
+            if let (Some(left), Some(right)) = (left.as_i64(), right.as_i64()) {
+                return Some(serde_json::Value::Number((left + right).into()));
+            }
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                return serde_json::Number::from_f64(left + right).map(serde_json::Value::Number);
+            }
+            if let (Some(left), Some(right)) = (left.as_str(), right.as_str()) {
+                return Some(serde_json::Value::String(format!("{left}{right}")));
+            }
+            None
+        }
+        Some(ir::BinaryOperator::BinaryOpSub) => {
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                return serde_json::Number::from_f64(left - right).map(serde_json::Value::Number);
+            }
+            None
+        }
+        Some(ir::BinaryOperator::BinaryOpMul) => {
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                return serde_json::Number::from_f64(left * right).map(serde_json::Value::Number);
+            }
+            None
+        }
+        Some(ir::BinaryOperator::BinaryOpDiv) => {
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                return serde_json::Number::from_f64(left / right).map(serde_json::Value::Number);
+            }
+            None
+        }
+        Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                if right == 0.0 {
+                    return None;
+                }
+                let value = (left / right).floor();
+                return serde_json::Number::from_f64(value).map(serde_json::Value::Number);
+            }
+            None
+        }
+        Some(ir::BinaryOperator::BinaryOpMod) => {
+            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
+                return serde_json::Number::from_f64(left % right).map(serde_json::Value::Number);
+            }
+            None
+        }
+        _ => None,
+    }
+}
+
+/// Try to fold a literal unary operation to a concrete value.
+///
+/// Example:
+/// - (UNARY_OP_NEG, 4) -> -4
+fn fold_literal_unary(op: i32, operand: &serde_json::Value) -> Option<serde_json::Value> {
+    match ir::UnaryOperator::try_from(op).ok() {
+        Some(ir::UnaryOperator::UnaryOpNeg) => operand
+            .as_f64()
+            .and_then(|value| serde_json::Number::from_f64(-value).map(serde_json::Value::Number)),
+        Some(ir::UnaryOperator::UnaryOpNot) => Some(serde_json::Value::Bool(!is_truthy(operand))),
+        _ => None,
+    }
+}
+
+impl fmt::Display for NodeStatus {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let value = match self {
+            NodeStatus::Queued => "queued",
+            NodeStatus::Running => "running",
+            NodeStatus::Completed => "completed",
+            NodeStatus::Failed => "failed",
+        };
+        write!(f, "{value}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::Value;
+    use waymark_proto::ast as ir;
+
+    fn action_plus_two_expr() -> ir::Expr {
+        ir::Expr {
+            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
+                left: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
+                        name: "action_result".to_string(),
+                    })),
+                    span: None,
+                })),
+                op: ir::BinaryOperator::BinaryOpAdd as i32,
+                right: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                        value: Some(ir::literal::Value::IntValue(2)),
+                    })),
+                    span: None,
+                })),
+            }))),
+            span: None,
+        }
+    }
+
+    #[test]
+    fn test_runner_state_unrolls_loop_assignments() {
+        let mut state = RunnerState::new(None, None, None, true);
+
+        state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                Some(0),
+            )
+            .expect("queue action");
+        let first_list = ir::Expr {
+            kind: Some(ir::expr::Kind::List(ir::ListExpr {
+                elements: vec![action_plus_two_expr()],
+            })),
+            span: None,
+        };
+        state
+            .record_assignment(vec!["results".to_string()], &first_list, None, None)
+            .expect("record assignment");
+
+        state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                Some(1),
+            )
+            .expect("queue action");
+        let second_list = ir::Expr {
+            kind: Some(ir::expr::Kind::List(ir::ListExpr {
+                elements: vec![action_plus_two_expr()],
+            })),
+            span: None,
+        };
+        let concat_expr = ir::Expr {
+            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
+                left: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
+                        name: "results".to_string(),
+                    })),
+                    span: None,
+                })),
+                op: ir::BinaryOperator::BinaryOpAdd as i32,
+                right: Some(Box::new(second_list)),
+            }))),
+            span: None,
+        };
+        state
+            .record_assignment(vec!["results".to_string()], &concat_expr, None, None)
+            .expect("record assignment");
+
+        let mut results: Option<ValueExpr> = None;
+        for node_id in state.timeline.iter().rev() {
+            let node = state.nodes.get(node_id).unwrap();
+            if let Some(value) = node.assignments.get("results") {
+                results = Some(value.clone());
+                break;
+            }
+        }
+
+        let results = results.expect("results assignment");
+        let binary = match results {
+            ValueExpr::BinaryOp(value) => value,
+            other => panic!("expected BinaryOpValue, got {other:?}"),
+        };
+
+        match binary.left.as_ref() {
+            ValueExpr::Variable(value) => assert_eq!(value.name, "results"),
+            other => panic!("expected VariableValue, got {other:?}"),
+        }
+
+        let right_list = match binary.right.as_ref() {
+            ValueExpr::List(value) => value,
+            other => panic!("expected ListValue, got {other:?}"),
+        };
+        assert_eq!(right_list.elements.len(), 1);
+
+        let item_bin = match &right_list.elements[0] {
+            ValueExpr::BinaryOp(value) => value,
+            other => panic!("expected BinaryOpValue, got {other:?}"),
+        };
+
+        match item_bin.left.as_ref() {
+            ValueExpr::Variable(value) => assert_eq!(value.name, "action_result"),
+            other => panic!("expected VariableValue, got {other:?}"),
+        }
+
+        match item_bin.right.as_ref() {
+            ValueExpr::Literal(value) => assert_eq!(value.value, Value::Number(2.into())),
+            other => panic!("expected LiteralValue, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_runner_state_single_target_assignments_stay_symbolic() {
+        let mut state = RunnerState::new(None, None, None, true);
+
+        let initial = ValueExpr::Dict(DictValue {
+            entries: vec![DictEntryValue {
+                key: ValueExpr::Literal(LiteralValue {
+                    value: Value::String("result".to_string()),
+                }),
+                value: ValueExpr::Literal(LiteralValue {
+                    value: Value::Number(1.into()),
+                }),
+            }],
+        });
+        state
+            .record_assignment_value(vec!["result".to_string()], initial, None, None)
+            .expect("record initial assignment");
+
+        let wrapped = ValueExpr::Dict(DictValue {
+            entries: vec![DictEntryValue {
+                key: ValueExpr::Literal(LiteralValue {
+                    value: Value::String("result".to_string()),
+                }),
+                value: ValueExpr::Variable(VariableValue {
+                    name: "result".to_string(),
+                }),
+            }],
+        });
+        state
+            .record_assignment_value(vec!["result".to_string()], wrapped, None, None)
+            .expect("record wrapped assignment");
+
+        let mut latest: Option<ValueExpr> = None;
+        for node_id in state.timeline.iter().rev() {
+            let node = state.nodes.get(node_id).expect("node");
+            if let Some(value) = node.assignments.get("result") {
+                latest = Some(value.clone());
+                break;
+            }
+        }
+        let latest = latest.expect("latest assignment");
+        let dict = match latest {
+            ValueExpr::Dict(value) => value,
+            other => panic!("expected DictValue, got {other:?}"),
+        };
+        assert_eq!(dict.entries.len(), 1);
+        match &dict.entries[0].value {
+            ValueExpr::Variable(value) => assert_eq!(value.name, "result"),
+            other => panic!("expected VariableValue, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_materialize_value_keeps_self_referential_variable_symbolic() {
+        let mut state = RunnerState::new(None, None, None, true);
+        state
+            .record_assignment_value(
+                vec!["count".to_string()],
+                ValueExpr::Literal(LiteralValue {
+                    value: Value::Number(0.into()),
+                }),
+                None,
+                None,
+            )
+            .expect("record initial count");
+        state
+            .record_assignment_value(
+                vec!["count".to_string()],
+                ValueExpr::BinaryOp(BinaryOpValue {
+                    left: Box::new(ValueExpr::Variable(VariableValue {
+                        name: "count".to_string(),
+                    })),
+                    op: ir::BinaryOperator::BinaryOpAdd as i32,
+                    right: Box::new(ValueExpr::Literal(LiteralValue {
+                        value: Value::Number(1.into()),
+                    })),
+                }),
+                None,
+                None,
+            )
+            .expect("record count update");
+
+        let materialized = state.materialize_value(ValueExpr::Variable(VariableValue {
+            name: "count".to_string(),
+        }));
+        match materialized {
+            ValueExpr::Variable(value) => assert_eq!(value.name, "count"),
+            other => panic!("expected VariableValue, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_runner_state_graph_dirty_for_action_updates() {
+        let mut state = RunnerState::new(None, None, None, true);
+        assert!(!state.consume_graph_dirty_for_durable_execution());
+
+        let action_result = state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                None,
+            )
+            .expect("queue action");
+        assert!(state.consume_graph_dirty_for_durable_execution());
+        assert!(!state.consume_graph_dirty_for_durable_execution());
+
+        state
+            .increment_action_attempt(action_result.node_id)
+            .expect("increment action attempt");
+        assert!(state.consume_graph_dirty_for_durable_execution());
+    }
+
+    #[test]
+    fn test_runner_state_graph_dirty_not_set_for_assignments() {
+        let mut state = RunnerState::new(None, None, None, true);
+        let value_expr = ValueExpr::Literal(LiteralValue {
+            value: Value::Number(1.into()),
+        });
+        state
+            .record_assignment_value(vec!["value".to_string()], value_expr, None, None)
+            .expect("record assignment");
+
+        assert!(!state.consume_graph_dirty_for_durable_execution());
+    }
+
+    #[test]
+    fn test_runner_state_records_action_start_stop_timestamps() {
+        let mut state = RunnerState::new(None, None, None, true);
+        let action_result = state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                None,
+            )
+            .expect("queue action");
+
+        // Clear queue-time dirty bit so lifecycle transitions are isolated.
+        assert!(state.consume_graph_dirty_for_durable_execution());
+
+        state
+            .mark_running(action_result.node_id)
+            .expect("mark running");
+        let started_at = state
+            .nodes
+            .get(&action_result.node_id)
+            .and_then(|node| node.started_at);
+        assert!(
+            started_at.is_some(),
+            "running action should record started_at"
+        );
+        assert!(
+            state
+                .nodes
+                .get(&action_result.node_id)
+                .and_then(|node| node.completed_at)
+                .is_none(),
+            "running action should clear completed_at"
+        );
+        assert!(
+            !state.ready_queue.contains(&action_result.node_id),
+            "running action should be removed from ready_queue"
+        );
+        assert!(state.consume_graph_dirty_for_durable_execution());
+
+        state
+            .mark_completed(action_result.node_id)
+            .expect("mark completed");
+        let completed_at = state
+            .nodes
+            .get(&action_result.node_id)
+            .and_then(|node| node.completed_at);
+        assert!(
+            completed_at.is_some(),
+            "completed action should record completed_at"
+        );
+        assert!(
+            completed_at >= started_at,
+            "completed_at should be at or after started_at"
+        );
+        assert!(state.consume_graph_dirty_for_durable_execution());
+    }
+}
diff --git a/crates/runner-state/src/util.rs b/crates/runner-state/src/util.rs
new file mode 100644
index 00000000..20768070
--- /dev/null
+++ b/crates/runner-state/src/util.rs
@@ -0,0 +1,12 @@
+pub(crate) fn is_truthy(value: &serde_json::Value) -> bool {
+    match value {
+        serde_json::Value::Null => false,
+        serde_json::Value::Bool(value) => *value,
+        serde_json::Value::Number(number) => {
+            number.as_f64().map(|value| value != 0.0).unwrap_or(false)
+        }
+        serde_json::Value::String(value) => !value.is_empty(),
+        serde_json::Value::Array(values) => !values.is_empty(),
+        serde_json::Value::Object(map) => !map.is_empty(),
+    }
+}
diff --git a/crates/runner-state/src/value_visitor.rs b/crates/runner-state/src/value_visitor.rs
new file mode 100644
index 00000000..fbc7736a
--- /dev/null
+++ b/crates/runner-state/src/value_visitor.rs
@@ -0,0 +1,533 @@
+//! Shared ValueExpr visitors for traversal, resolution, and evaluation.
+
+use std::collections::{HashMap, HashSet};
+
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+use super::state::{
+    ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue,
+    FunctionCallValue, IndexValue, ListValue, LiteralValue, SpreadValue, UnaryOpValue,
+    VariableValue,
+};
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type", content = "data")]
+pub enum ValueExpr {
+    Literal(LiteralValue),
+    Variable(VariableValue),
+    ActionResult(ActionResultValue),
+    BinaryOp(BinaryOpValue),
+    UnaryOp(UnaryOpValue),
+    List(ListValue),
+    Dict(DictValue),
+    Index(IndexValue),
+    Dot(DotValue),
+    FunctionCall(FunctionCallValue),
+    Spread(SpreadValue),
+}
+
+/// Resolve variables inside a ValueExpr tree without executing actions.
+///
+/// Example IR:
+/// - y = x + 1 (where x -> LiteralValue(2))
+///   Produces BinaryOpValue(LiteralValue(2), +, LiteralValue(1)).
+pub struct ValueExprResolver<'a> {
+    resolve_variable: &'a dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
+    seen: &'a mut HashSet<String>,
+}
+
+impl<'a> ValueExprResolver<'a> {
+    pub fn new(
+        resolve_variable: &'a dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
+        seen: &'a mut HashSet<String>,
+    ) -> Self {
+        Self {
+            resolve_variable,
+            seen,
+        }
+    }
+
+    pub fn visit(&mut self, expr: &ValueExpr) -> ValueExpr {
+        match expr {
+            ValueExpr::Literal(value) => ValueExpr::Literal(value.clone()),
+            ValueExpr::Variable(value) => (self.resolve_variable)(&value.name, self.seen),
+            ValueExpr::ActionResult(value) => ValueExpr::ActionResult(value.clone()),
+            ValueExpr::BinaryOp(value) => ValueExpr::BinaryOp(BinaryOpValue {
+                left: Box::new(self.visit(&value.left)),
+                op: value.op,
+                right: Box::new(self.visit(&value.right)),
+            }),
+            ValueExpr::UnaryOp(value) => ValueExpr::UnaryOp(UnaryOpValue {
+                op: value.op,
+                operand: Box::new(self.visit(&value.operand)),
+            }),
+            ValueExpr::List(value) => ValueExpr::List(ListValue {
+                elements: value.elements.iter().map(|item| self.visit(item)).collect(),
+            }),
+            ValueExpr::Dict(value) => ValueExpr::Dict(DictValue {
+                entries: value
+                    .entries
+                    .iter()
+                    .map(|entry| DictEntryValue {
+                        key: self.visit(&entry.key),
+                        value: self.visit(&entry.value),
+                    })
+                    .collect(),
+            }),
+            ValueExpr::Index(value) => ValueExpr::Index(IndexValue {
+                object: Box::new(self.visit(&value.object)),
+                index: Box::new(self.visit(&value.index)),
+            }),
+            ValueExpr::Dot(value) => ValueExpr::Dot(DotValue {
+                object: Box::new(self.visit(&value.object)),
+                attribute: value.attribute.clone(),
+            }),
+            ValueExpr::FunctionCall(value) => ValueExpr::FunctionCall(FunctionCallValue {
+                name: value.name.clone(),
+                args: value.args.iter().map(|arg| self.visit(arg)).collect(),
+                kwargs: value
+                    .kwargs
+                    .iter()
+                    .map(|(name, arg)| (name.clone(), self.visit(arg)))
+                    .collect(),
+                global_function: value.global_function,
+            }),
+            ValueExpr::Spread(value) => {
+                let kwargs = value
+                    .action
+                    .kwargs
+                    .iter()
+                    .map(|(name, arg)| (name.clone(), self.visit(arg)))
+                    .collect::<HashMap<_, _>>();
+                let action = ActionCallSpec {
+                    action_name: value.action.action_name.clone(),
+                    module_name: value.action.module_name.clone(),
+                    kwargs,
+                };
+                ValueExpr::Spread(SpreadValue {
+                    collection: Box::new(self.visit(&value.collection)),
+                    loop_var: value.loop_var.clone(),
+                    action,
+                })
+            }
+        }
+    }
+}
+
+/// Collect execution node ids that supply data to a ValueExpr tree.
+///
+/// Example IR:
+/// - total = a + @sum(values)
+///   Returns the node ids that last defined `a` and the action node for sum().
+pub struct ValueExprSourceCollector<'a> {
+    resolve_variable: &'a dyn Fn(&str) -> Option<Uuid>,
+}
+
+impl<'a> ValueExprSourceCollector<'a> {
+    pub fn new(resolve_variable: &'a dyn Fn(&str) -> Option<Uuid>) -> Self {
+        Self { resolve_variable }
+    }
+
+    pub fn visit(&self, expr: &ValueExpr) -> HashSet<Uuid> {
+        match expr {
+            ValueExpr::Literal(_) => HashSet::new(),
+            ValueExpr::Variable(value) => {
+                (self.resolve_variable)(&value.name).into_iter().collect()
+            }
+            ValueExpr::ActionResult(value) => [value.node_id].into_iter().collect(),
+            ValueExpr::BinaryOp(value) => {
+                let mut sources = self.visit(&value.left);
+                sources.extend(self.visit(&value.right));
+                sources
+            }
+            ValueExpr::UnaryOp(value) => self.visit(&value.operand),
+            ValueExpr::List(value) => {
+                let mut sources = HashSet::new();
+                for item in &value.elements {
+                    sources.extend(self.visit(item));
+                }
+                sources
+            }
+            ValueExpr::Dict(value) => {
+                let mut sources = HashSet::new();
+                for entry in &value.entries {
+                    sources.extend(self.visit(&entry.key));
+                    sources.extend(self.visit(&entry.value));
+                }
+                sources
+            }
+            ValueExpr::Index(value) => {
+                let mut sources = self.visit(&value.object);
+                sources.extend(self.visit(&value.index));
+                sources
+            }
+            ValueExpr::Dot(value) => self.visit(&value.object),
+            ValueExpr::FunctionCall(value) => {
+                let mut sources = HashSet::new();
+                for arg in &value.args {
+                    sources.extend(self.visit(arg));
+                }
+                for arg in value.kwargs.values() {
+                    sources.extend(self.visit(arg));
+                }
+                sources
+            }
+            ValueExpr::Spread(value) => {
+                let mut sources = self.visit(&value.collection);
+                for arg in value.action.kwargs.values() {
+                    sources.extend(self.visit(arg));
+                }
+                sources
+            }
+        }
+    }
+}
+
+/// Evaluate ValueExpr nodes into concrete Python values.
+///
+/// Example:
+/// - BinaryOpValue(VariableValue("a"), +, LiteralValue(1)) becomes the
+///   current value of a plus 1.
+pub struct ValueExprEvaluator<'a, E> {
+    resolve_variable: &'a dyn Fn(&str) -> Result<serde_json::Value, E>,
+    resolve_action_result: &'a dyn Fn(&ActionResultValue) -> Result<serde_json::Value, E>,
+    resolve_function_call: &'a ResolveFunctionCall<'a, E>,
+    apply_binary:
+        &'a dyn Fn(i32, serde_json::Value, serde_json::Value) -> Result<serde_json::Value, E>,
+    apply_unary: &'a dyn Fn(i32, serde_json::Value) -> Result<serde_json::Value, E>,
+    error_factory: &'a dyn Fn(&str) -> E,
+}
+
+type ResolveFunctionCall<'a, E> = dyn Fn(
+        &FunctionCallValue,
+        Vec<serde_json::Value>,
+        HashMap<String, serde_json::Value>,
+    ) -> Result<serde_json::Value, E>
+    + 'a;
+
+impl<'a, E> ValueExprEvaluator<'a, E> {
+    pub fn new(
+        resolve_variable: &'a dyn Fn(&str) -> Result<serde_json::Value, E>,
+        resolve_action_result: &'a dyn Fn(&ActionResultValue) -> Result<serde_json::Value, E>,
+        resolve_function_call: &'a ResolveFunctionCall<'a, E>,
+        apply_binary: &'a dyn Fn(
+            i32,
+            serde_json::Value,
+            serde_json::Value,
+        ) -> Result<serde_json::Value, E>,
+        apply_unary: &'a dyn Fn(i32, serde_json::Value) -> Result<serde_json::Value, E>,
+        error_factory: &'a dyn Fn(&str) -> E,
+    ) -> Self {
+        Self {
+            resolve_variable,
+            resolve_action_result,
+            resolve_function_call,
+            apply_binary,
+            apply_unary,
+            error_factory,
+        }
+    }
+
+    pub fn visit(&self, expr: &ValueExpr) -> Result<serde_json::Value, E> {
+        match expr {
+            ValueExpr::Literal(value) => Ok(value.value.clone()),
+            ValueExpr::Variable(value) => (self.resolve_variable)(&value.name),
+            ValueExpr::ActionResult(value) => (self.resolve_action_result)(value),
+            ValueExpr::BinaryOp(value) => {
+                let left = self.visit(&value.left)?;
+                let right = self.visit(&value.right)?;
+                (self.apply_binary)(value.op, left, right)
+            }
+            ValueExpr::UnaryOp(value) => {
+                let operand = self.visit(&value.operand)?;
+                (self.apply_unary)(value.op, operand)
+            }
+            ValueExpr::List(value) => {
+                let mut items = Vec::with_capacity(value.elements.len());
+                for item in &value.elements {
+                    items.push(self.visit(item)?);
+                }
+                Ok(serde_json::Value::Array(items))
+            }
+            ValueExpr::Dict(value) => {
+                let mut map = serde_json::Map::with_capacity(value.entries.len());
+                for entry in &value.entries {
+                    let key_value = self.visit(&entry.key)?;
+                    let key = key_value
+                        .as_str()
+                        .map(|value| value.to_string())
+                        .unwrap_or_else(|| key_value.to_string());
+                    let entry_value = self.visit(&entry.value)?;
+                    map.insert(key, entry_value);
+                }
+                Ok(serde_json::Value::Object(map))
+            }
+            ValueExpr::Index(value) => {
+                let object = self.visit(&value.object)?;
+                let index = self.visit(&value.index)?;
+                match (object, index) {
+                    (serde_json::Value::Array(items), serde_json::Value::Number(idx)) => {
+                        let idx = idx.as_i64().unwrap_or(-1);
+                        if idx < 0 || idx as usize >= items.len() {
+                            return Err((self.error_factory)("index out of range"));
+                        }
+                        Ok(items[idx as usize].clone())
+                    }
+                    (serde_json::Value::Object(map), serde_json::Value::String(key)) => map
+                        .get(&key)
+                        .cloned()
+                        .or_else(|| lookup_exception_value(&map, &key))
+                        .ok_or_else(|| (self.error_factory)("dict has no key")),
+                    _ => Err((self.error_factory)("unsupported index operation")),
+                }
+            }
+            ValueExpr::Dot(value) => {
+                let object = self.visit(&value.object)?;
+                if let serde_json::Value::Object(map) = object {
+                    return map
+                        .get(&value.attribute)
+                        .cloned()
+                        .or_else(|| lookup_exception_value(&map, &value.attribute))
+                        .ok_or_else(|| (self.error_factory)("dict has no key"));
+                }
+                Err((self.error_factory)("attribute not found"))
+            }
+            ValueExpr::FunctionCall(value) => {
+                let mut args = Vec::with_capacity(value.args.len());
+                for arg in &value.args {
+                    args.push(self.visit(arg)?);
+                }
+                let mut kwargs = HashMap::new();
+                for (name, arg) in &value.kwargs {
+                    kwargs.insert(name.clone(), self.visit(arg)?);
+                }
+                (self.resolve_function_call)(value, args, kwargs)
+            }
+            ValueExpr::Spread(_) => Err((self.error_factory)(
+                "cannot replay unresolved spread expression",
+            )),
+        }
+    }
+}
+
+fn lookup_exception_value(
+    map: &serde_json::Map<String, serde_json::Value>,
+    key: &str,
+) -> Option<serde_json::Value> {
+    if !(map.contains_key("type") && map.contains_key("message")) {
+        return None;
+    }
+    map.get("values")
+        .and_then(|value| value.as_object())
+        .and_then(|values| values.get(key))
+        .cloned()
+}
+
+/// Recursively resolve variable references throughout a value tree.
+///
+/// Use this as the core materialization step before assignment storage.
+///
+/// Example IR:
+/// - z = (x + y) * 2
+///   The tree walk replaces VariableValue("x")/("y") with their latest
+///   symbolic definitions before storing z.
+pub fn resolve_value_tree(
+    value: &ValueExpr,
+    resolve_variable: &dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
+) -> ValueExpr {
+    let mut seen = HashSet::new();
+    let mut resolver = ValueExprResolver::new(resolve_variable, &mut seen);
+    resolver.visit(value)
+}
+
+/// Find execution node ids that supply data to the given value.
+///
+/// Example IR:
+/// - total = a + @sum(values)
+///   Returns the latest assignment node for a and the action node for sum().
+pub fn collect_value_sources(
+    value: &ValueExpr,
+    resolve_variable: &dyn Fn(&str) -> Option<Uuid>,
+) -> HashSet<Uuid> {
+    let collector = ValueExprSourceCollector::new(resolve_variable);
+    collector.visit(value)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::{HashMap, HashSet};
+
+    use serde_json::Value;
+    use uuid::Uuid;
+
+    use super::*;
+    use waymark_proto::ast as ir;
+
+    fn literal_int(value: i64) -> ValueExpr {
+        ValueExpr::Literal(LiteralValue {
+            value: Value::Number(value.into()),
+        })
+    }
+
+    #[test]
+    fn test_value_expr_resolver_visit_happy_path() {
+        let mut seen = HashSet::new();
+        let resolve = |name: &str, _: &mut HashSet<String>| {
+            if name == "x" {
+                literal_int(3)
+            } else {
+                literal_int(0)
+            }
+        };
+        let mut resolver = ValueExprResolver::new(&resolve, &mut seen);
+        let expr = ValueExpr::BinaryOp(BinaryOpValue {
+            left: Box::new(ValueExpr::Variable(VariableValue {
+                name: "x".to_string(),
+            })),
+            op: ir::BinaryOperator::BinaryOpAdd as i32,
+            right: Box::new(literal_int(1)),
+        });
+
+        let resolved = resolver.visit(&expr);
+        match resolved {
+            ValueExpr::BinaryOp(value) => {
+                assert!(matches!(*value.left, ValueExpr::Literal(_)));
+                assert!(matches!(*value.right, ValueExpr::Literal(_)));
+            }
+            other => panic!("expected binary value, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_value_expr_source_collector_visit_happy_path() {
+        let variable_source = Uuid::new_v4();
+        let action_source = Uuid::new_v4();
+        let resolve = |name: &str| {
+            if name == "x" {
+                Some(variable_source)
+            } else {
+                None
+            }
+        };
+        let collector = ValueExprSourceCollector::new(&resolve);
+        let expr = ValueExpr::BinaryOp(BinaryOpValue {
+            left: Box::new(ValueExpr::Variable(VariableValue {
+                name: "x".to_string(),
+            })),
+            op: ir::BinaryOperator::BinaryOpAdd as i32,
+            right: Box::new(ValueExpr::ActionResult(ActionResultValue {
+                node_id: action_source,
+                action_name: "fetch".to_string(),
+                iteration_index: None,
+                result_index: None,
+            })),
+        });
+
+        let sources = collector.visit(&expr);
+        assert!(sources.contains(&variable_source));
+        assert!(sources.contains(&action_source));
+    }
+
+    #[test]
+    fn test_value_expr_evaluator_visit_happy_path() {
+        let resolve_variable = |name: &str| -> Result<Value, String> {
+            if name == "x" {
+                Ok(Value::Number(2.into()))
+            } else {
+                Err(format!("unknown variable: {name}"))
+            }
+        };
+        let resolve_action_result =
+            |_value: &ActionResultValue| -> Result<Value, String> { Ok(Value::Number(0.into())) };
+        let resolve_function_call =
+            |_call: &FunctionCallValue,
+             args: Vec<Value>,
+             _kwargs: HashMap<String, Value>|
+             -> Result<Value, String> { Ok(Value::Number((args.len() as i64).into())) };
+        let apply_binary = |_op: i32, left: Value, right: Value| -> Result<Value, String> {
+            match (left.as_i64(), right.as_i64()) {
+                (Some(left), Some(right)) => Ok(Value::Number((left + right).into())),
+                _ => Err("bad operands".to_string()),
+            }
+        };
+        let apply_unary = |_op: i32, value: Value| -> Result<Value, String> {
+            Ok(Value::Bool(!value.as_bool().unwrap_or(false)))
+        };
+        let error_factory = |message: &str| message.to_string();
+
+        let evaluator = ValueExprEvaluator::new(
+            &resolve_variable,
+            &resolve_action_result,
+            &resolve_function_call,
+            &apply_binary,
+            &apply_unary,
+            &error_factory,
+        );
+        let expr = ValueExpr::BinaryOp(BinaryOpValue {
+            left: Box::new(ValueExpr::Variable(VariableValue {
+                name: "x".to_string(),
+            })),
+            op: ir::BinaryOperator::BinaryOpAdd as i32,
+            right: Box::new(literal_int(5)),
+        });
+
+        let value = evaluator.visit(&expr).expect("evaluate expression");
+        assert_eq!(value, Value::Number(7.into()));
+    }
+
+    #[test]
+    fn test_resolve_value_tree_happy_path() {
+        let expr = ValueExpr::List(ListValue {
+            elements: vec![ValueExpr::Variable(VariableValue {
+                name: "user_id".to_string(),
+            })],
+        });
+        let resolve = |name: &str, _seen: &mut HashSet<String>| {
+            if name == "user_id" {
+                ValueExpr::Literal(LiteralValue {
+                    value: Value::String("abc".to_string()),
+                })
+            } else {
+                ValueExpr::Literal(LiteralValue { value: Value::Null })
+            }
+        };
+
+        let resolved = resolve_value_tree(&expr, &resolve);
+        match resolved {
+            ValueExpr::List(list) => {
+                assert_eq!(list.elements.len(), 1);
+                assert!(matches!(list.elements[0], ValueExpr::Literal(_)));
+            }
+            other => panic!("expected list value, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_collect_value_sources_happy_path() {
+        let source_a = Uuid::new_v4();
+        let source_b = Uuid::new_v4();
+        let expr = ValueExpr::FunctionCall(FunctionCallValue {
+            name: "sum".to_string(),
+            args: vec![ValueExpr::Variable(VariableValue {
+                name: "a".to_string(),
+            })],
+            kwargs: HashMap::from([(
+                "other".to_string(),
+                ValueExpr::ActionResult(ActionResultValue {
+                    node_id: source_b,
+                    action_name: "compute".to_string(),
+                    iteration_index: None,
+                    result_index: None,
+                }),
+            )]),
+            global_function: None,
+        });
+        let resolve = |name: &str| if name == "a" { Some(source_a) } else { None };
+
+        let sources = collect_value_sources(&expr, &resolve);
+        assert_eq!(sources.len(), 2);
+        assert!(sources.contains(&source_a));
+        assert!(sources.contains(&source_b));
+    }
+}
diff --git a/crates/runner/Cargo.toml b/crates/runner/Cargo.toml
new file mode 100644
index 00000000..115de256
--- /dev/null
+++ b/crates/runner/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "waymark-runner"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+chrono = { workspace = true, features = ["serde"]  }
+rustc-hash = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+uuid = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-core-backend = { workspace = true }
+tracing = { workspace = true }
+
+[dev-dependencies]
+waymark-ir-parser = { workspace = true }
+waymark-backend-memory = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/runner/src/executor.rs b/crates/runner/src/executor.rs
new file mode 100644
index 00000000..bae9a9c2
--- /dev/null
+++ b/crates/runner/src/executor.rs
@@ -0,0 +1,3015 @@
+//! Incremental DAG executor for runner state graphs.
+
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
+
+use chrono::{DateTime, Utc};
+use rustc_hash::FxHashMap;
+use serde_json::Value;
+use uuid::Uuid;
+
+use crate::expression_evaluator::is_exception_value;
+use crate::retry::{RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies};
+use crate::synthetic_exceptions::{SyntheticExceptionType, build_synthetic_exception_value};
+use waymark_core_backend::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate};
+use waymark_dag::{
+    ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType,
+};
+use waymark_observability::obs;
+use waymark_proto::ast as ir;
+use waymark_runner_state::value_visitor::ValueExpr;
+use waymark_runner_state::{
+    ActionCallSpec, ExecutionEdge, ExecutionNode, ExecutionNodeType, IndexValue, ListValue,
+    LiteralValue, NodeStatus, QueueNodeParams, RunnerState, RunnerStateError,
+};
+
+/// Raised when the runner executor cannot advance safely.
+#[derive(Debug, thiserror::Error)]
+#[error("{0}")]
+pub struct RunnerExecutorError(pub String);
+
+#[derive(Clone, Debug)]
+/// Persistence payloads required before dispatching new actions.
+/// These need to be written to the backends in order to ensure that we can mark any
+/// inflight actions as failed before queuing them up again
+pub struct DurableUpdates {
+    pub actions_done: Vec<ActionDone>,
+    pub graph_updates: Vec<GraphUpdate>,
+}
+
+#[derive(Clone, Debug)]
+/// Return value for executor steps with newly queued action nodes.
+pub struct ExecutorStep {
+    pub actions: Vec<ExecutionNode>,
+    pub sleep_requests: Vec<SleepRequest>,
+    pub updates: Option<DurableUpdates>,
+}
+
+#[derive(Clone, Debug)]
+/// Sleep requests emitted by the executor with wake-up times.
+pub struct SleepRequest {
+    pub node_id: Uuid,
+    pub wake_at: DateTime<Utc>,
+}
+
+/// Action result payloads keyed by execution node id.
+type ExecutionResultMap = HashMap<Uuid, Value>;
+
+struct FinishedNodeOutcome {
+    /// Node to continue graph traversal from.
+    start: Option<ExecutionNode>,
+    /// Exception payload forwarded to exception edges.
+    exception_value: Option<Value>,
+    /// Durable attempt metadata for this finished action (if applicable).
+    action_done: Option<ActionDone>,
+    /// Retry action to dispatch immediately after state transition.
+    retry_action: Option<ExecutionNode>,
+}
+
+#[derive(Default)]
+struct IncrementAccumulator {
+    actions_done: Vec<ActionDone>,
+    pending_starts: Vec<(ExecutionNode, Option<Value>)>,
+    actions: Vec<ExecutionNode>,
+    sleep_requests: Vec<SleepRequest>,
+    seen_actions: HashSet<Uuid>,
+    seen_sleep_nodes: HashSet<Uuid>,
+}
+
+impl IncrementAccumulator {
+    fn absorb_finished_outcome(&mut self, outcome: FinishedNodeOutcome) {
+        if let Some(start) = outcome.start {
+            self.pending_starts.push((start, outcome.exception_value));
+        }
+        if let Some(done) = outcome.action_done {
+            self.actions_done.push(done);
+        }
+        if let Some(retry_action) = outcome.retry_action {
+            self.record_action(retry_action);
+        }
+    }
+
+    fn record_action(&mut self, action: ExecutionNode) {
+        // Multiple finished nodes can converge on the same queued action.
+        if self.seen_actions.insert(action.node_id) {
+            self.actions.push(action);
+        }
+    }
+
+    fn record_sleep_request(&mut self, sleep_request: SleepRequest) {
+        if self.seen_sleep_nodes.insert(sleep_request.node_id) {
+            self.sleep_requests.push(sleep_request);
+        }
+    }
+}
+
+struct WalkOutcome {
+    actions: Vec<ExecutionNode>,
+    sleep_requests: Vec<SleepRequest>,
+}
+
+struct FinishedActionMetadata {
+    attempt: i32,
+    started_at: Option<DateTime<Utc>>,
+    result: Value,
+}
+
+enum ActionFailureTransition {
+    RetryQueued(Box<ExecutionNode>),
+    Failed,
+}
+
+enum TemplateKind {
+    SpreadAction(Box<ActionCallNode>),
+    Aggregator(String),
+    Regular(String),
+}
+
+enum SleepDecision {
+    Completed,
+    Blocked(DateTime<Utc>),
+}
+
+/// Advance a DAG template using the current runner state and action results.
+///
+/// The executor treats the DAG as a control-flow template. It queues runtime
+/// execution nodes into RunnerState, unrolling loops/spreads into explicit
+/// iterations, and stops when it encounters action calls that must be executed
+/// by an external worker.
+///
+/// This serves as a runner supervisor for a single instance that's owned
+/// in memory by our logic.
+///
+/// Each call to increment() starts from finished execution nodes, walks
+/// downstream through inline nodes (assignments, branches, joins, etc.), and
+/// returns any newly queued action nodes that are now unblocked.
+pub struct RunnerExecutor {
+    dag: Arc<DAG>,
+    state: RunnerState,
+    action_results: ExecutionResultMap,
+    backend: Option<Arc<dyn CoreBackend>>,
+    template_index: DagEdgeIndex,
+    incoming_exec_edges: FxHashMap<Uuid, Vec<ExecutionEdge>>,
+    /// Index: template_id -> list of execution node IDs with that template
+    template_to_exec_nodes: FxHashMap<String, Vec<Uuid>>,
+    /// Cached assignment evaluations for the current increment pass.
+    /// Cleared at the start of each increment call.
+    eval_cache: RefCell<FxHashMap<(Uuid, String), Value>>,
+    instance_id: Option<Uuid>,
+    terminal_error: Option<Value>,
+}
+
+impl RunnerExecutor {
+    pub fn new(
+        dag: Arc<DAG>,
+        state: RunnerState,
+        // Action results keyed by execution node id.
+        action_results: ExecutionResultMap,
+        backend: Option<Arc<dyn CoreBackend>>,
+    ) -> Self {
+        let mut state = state;
+        state.dag = Some(dag.clone());
+        state.set_link_queued_nodes(false);
+
+        let template_index = dag.edge_index();
+        let incoming_exec_edges = Self::build_incoming_exec_edges(&state);
+        let template_to_exec_nodes = Self::build_template_to_exec_nodes(&state);
+
+        Self {
+            dag,
+            state,
+            action_results,
+            backend,
+            template_index,
+            incoming_exec_edges,
+            template_to_exec_nodes,
+            eval_cache: RefCell::new(FxHashMap::default()),
+            instance_id: None,
+            terminal_error: None,
+        }
+    }
+
+    pub fn state(&self) -> &RunnerState {
+        &self.state
+    }
+
+    pub fn state_mut(&mut self) -> &mut RunnerState {
+        &mut self.state
+    }
+
+    pub fn dag(&self) -> &DAG {
+        &self.dag
+    }
+
+    pub fn action_results(&self) -> &ExecutionResultMap {
+        &self.action_results
+    }
+
+    pub fn instance_id(&self) -> Option<Uuid> {
+        self.instance_id
+    }
+
+    pub fn set_instance_id(&mut self, instance_id: Uuid) {
+        self.instance_id = Some(instance_id);
+    }
+
+    pub fn terminal_error(&self) -> Option<&Value> {
+        self.terminal_error.as_ref()
+    }
+
+    pub(super) fn eval_cache_get(&self, key: &(Uuid, String)) -> Option<Value> {
+        self.eval_cache.borrow().get(key).cloned()
+    }
+
+    pub(super) fn eval_cache_insert(&self, key: (Uuid, String), value: Value) {
+        self.eval_cache.borrow_mut().insert(key, value);
+    }
+
+    /// Store an action result value for a specific execution node id.
+    pub fn set_action_result(&mut self, node_id: Uuid, result: Value) {
+        self.action_results.insert(node_id, result);
+    }
+
+    /// Remove any cached action result for a specific execution node.
+    /// Used when re-queuing an action so we don't replay stale results.
+    pub fn clear_action_result(&mut self, node_id: Uuid) {
+        self.action_results.remove(&node_id);
+    }
+
+    /// Resolve timeout policy seconds for an action node.
+    pub fn action_timeout_seconds(&self, node_id: Uuid) -> Result<u32, RunnerExecutorError> {
+        let node = self.execution_node(node_id)?;
+        if !node.is_action_call() {
+            return Ok(0);
+        }
+        let Some(action_template) = self.template_action_for_execution_node(node)? else {
+            return Ok(0);
+        };
+        Ok(timeout_seconds_from_policies(&action_template.policies).unwrap_or(0))
+    }
+
+    /// Fail inflight actions and return any that should be retried.
+    ///
+    /// Use this after recovering from a crash: running actions are treated as
+    /// failed, their attempt counter is incremented if retry policies allow,
+    /// and retryable nodes are re-queued for execution.
+    pub fn resume(&mut self) -> Result<ExecutorStep, RunnerExecutorError> {
+        let mut finished_nodes = Vec::new();
+        for (node_id, node) in &self.state.nodes {
+            if node.is_action_call() && node.status == NodeStatus::Running {
+                finished_nodes.push(*node_id);
+                self.action_results.insert(
+                    *node_id,
+                    build_synthetic_exception_value(
+                        SyntheticExceptionType::ExecutorResume,
+                        format!(
+                            "action {node_id} was running during resume and is treated as failed"
+                        ),
+                        Vec::new(),
+                    ),
+                );
+            }
+        }
+        if finished_nodes.is_empty() {
+            let updates = self.collect_updates(Vec::new())?;
+            return Ok(ExecutorStep {
+                actions: Vec::new(),
+                sleep_requests: Vec::new(),
+                updates,
+            });
+        }
+        self.increment(&finished_nodes)
+    }
+
+    /// Advance execution for finished nodes in a single batch.
+    ///
+    /// Use this when multiple actions complete in the same tick so the graph
+    /// update and action inserts are persisted together.
+    #[obs]
+    pub fn increment(
+        &mut self,
+        finished_nodes: &[Uuid],
+    ) -> Result<ExecutorStep, RunnerExecutorError> {
+        self.eval_cache.borrow_mut().clear();
+        let mut accum = IncrementAccumulator::default();
+        self.collect_increment_results(finished_nodes, &mut accum)?;
+        self.walk_pending_starts(&mut accum)?;
+
+        let IncrementAccumulator {
+            actions_done,
+            actions,
+            sleep_requests,
+            ..
+        } = accum;
+        let running_actions = self.mark_actions_running(&actions)?;
+        let updates = self.collect_updates(actions_done)?;
+
+        // Note: Action timeouts and delayed retries require wall-clock tracking in the run loop.
+        // The executor only handles timeout failures once they surface as action results.
+
+        Ok(ExecutorStep {
+            actions: running_actions,
+            sleep_requests,
+            updates,
+        })
+    }
+
+    fn collect_increment_results(
+        &mut self,
+        finished_nodes: &[Uuid],
+        accum: &mut IncrementAccumulator,
+    ) -> Result<(), RunnerExecutorError> {
+        for &node_id in finished_nodes {
+            accum.absorb_finished_outcome(self.apply_finished_node(node_id)?);
+        }
+        Ok(())
+    }
+
+    fn walk_pending_starts(
+        &mut self,
+        accum: &mut IncrementAccumulator,
+    ) -> Result<(), RunnerExecutorError> {
+        while let Some((start, exception_value)) = accum.pending_starts.pop() {
+            let outcome = self.walk_from(start, exception_value)?;
+            for action in outcome.actions {
+                accum.record_action(action);
+            }
+            for sleep_request in outcome.sleep_requests {
+                accum.record_sleep_request(sleep_request);
+            }
+        }
+        Ok(())
+    }
+
+    fn mark_actions_running(
+        &mut self,
+        actions: &[ExecutionNode],
+    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
+        let mut running_actions = Vec::with_capacity(actions.len());
+        for action in actions {
+            self.clear_action_result(action.node_id);
+            self.state
+                .mark_running(action.node_id)
+                .map_err(Self::state_error)?;
+            running_actions.push(self.execution_node_clone(action.node_id)?);
+        }
+        Ok(running_actions)
+    }
+
+    /// Walk downstream from a node, executing inline nodes until blocked by an action node.
+    #[obs]
+    fn walk_from(
+        &mut self,
+        node: ExecutionNode,
+        exception_value: Option<Value>,
+    ) -> Result<WalkOutcome, RunnerExecutorError> {
+        let mut pending = vec![(node, exception_value)];
+        let mut actions = Vec::new();
+        let mut sleep_requests = Vec::new();
+        let mut forwarded_completed: HashSet<Uuid> = HashSet::new();
+
+        while let Some((current, current_exception)) = pending.pop() {
+            // template_id is the DAG node id, not the execution id.
+            let template_node_id = match &current.template_id {
+                Some(id) => id,
+                None => continue,
+            };
+            let edges = if let Some(template_edges) = self.template_index.outgoing(template_node_id)
+            {
+                self.select_edges(template_edges, &current, current_exception)?
+            } else {
+                continue;
+            };
+            for edge in edges {
+                let successors = self.queue_successor(&current, &edge)?;
+                for successor in successors {
+                    self.handle_walk_successor(
+                        successor,
+                        &mut pending,
+                        &mut actions,
+                        &mut sleep_requests,
+                        &mut forwarded_completed,
+                    )?;
+                }
+            }
+        }
+        Ok(WalkOutcome {
+            actions,
+            sleep_requests,
+        })
+    }
+
+    fn handle_walk_successor(
+        &mut self,
+        successor: ExecutionNode,
+        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
+        actions: &mut Vec<ExecutionNode>,
+        sleep_requests: &mut Vec<SleepRequest>,
+        forwarded_completed: &mut HashSet<Uuid>,
+    ) -> Result<(), RunnerExecutorError> {
+        if self.forward_completed_successor(&successor, pending, forwarded_completed) {
+            return Ok(());
+        }
+        if successor.is_action_call() {
+            actions.push(successor);
+            return Ok(());
+        }
+        if successor.is_sleep() {
+            self.handle_sleep_successor(successor, pending, sleep_requests)?;
+            return Ok(());
+        }
+        self.handle_inline_successor(successor, pending)
+    }
+
+    fn forward_completed_successor(
+        &self,
+        successor: &ExecutionNode,
+        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
+        forwarded_completed: &mut HashSet<Uuid>,
+    ) -> bool {
+        if successor.status != NodeStatus::Completed {
+            return false;
+        }
+        if forwarded_completed.insert(successor.node_id) {
+            // Rehydrated runs can revisit completed paths to recover downstream
+            // sleep/action work without mutating already completed nodes.
+            pending.push((successor.clone(), None));
+        }
+        true
+    }
+
+    fn handle_sleep_successor(
+        &mut self,
+        successor: ExecutionNode,
+        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
+        sleep_requests: &mut Vec<SleepRequest>,
+    ) -> Result<(), RunnerExecutorError> {
+        if !self.inline_ready(&successor) {
+            return Ok(());
+        }
+        match self.handle_sleep_node(&successor)? {
+            SleepDecision::Completed => pending.push((successor, None)),
+            SleepDecision::Blocked(wake_at) => sleep_requests.push(SleepRequest {
+                node_id: successor.node_id,
+                wake_at,
+            }),
+        }
+        Ok(())
+    }
+
+    fn handle_inline_successor(
+        &mut self,
+        successor: ExecutionNode,
+        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
+    ) -> Result<(), RunnerExecutorError> {
+        if !self.inline_ready(&successor) {
+            return Ok(());
+        }
+        self.execute_inline_node(&successor)?;
+        pending.push((successor, None));
+        Ok(())
+    }
+
+    /// Update state for a finished node and return replay metadata.
+    #[obs]
+    fn apply_finished_node(
+        &mut self,
+        node_id: Uuid,
+    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
+        if self.execution_node(node_id)?.is_action_call() {
+            return self.apply_finished_action_node(node_id);
+        }
+        // Non-action nodes are inline runtime steps; completion is a status flip.
+        self.state
+            .mark_completed(node_id)
+            .map_err(Self::state_error)?;
+        Ok(FinishedNodeOutcome {
+            start: Some(self.execution_node_clone(node_id)?),
+            exception_value: None,
+            action_done: None,
+            retry_action: None,
+        })
+    }
+
+    fn apply_finished_action_node(
+        &mut self,
+        node_id: Uuid,
+    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
+        let metadata = self.finished_action_metadata(node_id)?;
+        if is_exception_value(&metadata.result) {
+            return self.apply_exception_action_completion(node_id, metadata);
+        }
+        self.apply_successful_action_completion(node_id, metadata)
+    }
+
+    fn finished_action_metadata(
+        &self,
+        node_id: Uuid,
+    ) -> Result<FinishedActionMetadata, RunnerExecutorError> {
+        let node = self.execution_node(node_id)?;
+        let result =
+            self.action_results.get(&node_id).cloned().ok_or_else(|| {
+                RunnerExecutorError(format!("missing action result for {node_id}"))
+            })?;
+        Ok(FinishedActionMetadata {
+            attempt: node.action_attempt,
+            started_at: node.started_at,
+            result,
+        })
+    }
+
+    fn apply_successful_action_completion(
+        &mut self,
+        node_id: Uuid,
+        metadata: FinishedActionMetadata,
+    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
+        self.state
+            .mark_completed(node_id)
+            .map_err(Self::state_error)?;
+        let assignments = self.execution_node(node_id)?.assignments.clone();
+        if !assignments.is_empty() {
+            self.state.mark_latest_assignments(node_id, &assignments);
+        }
+        let completed_at = self
+            .execution_node(node_id)?
+            .completed_at
+            .unwrap_or_else(Utc::now);
+        let action_done = build_action_done(
+            node_id,
+            metadata.attempt,
+            ActionAttemptStatus::Completed,
+            metadata.started_at,
+            completed_at,
+            metadata.result,
+        );
+        Ok(FinishedNodeOutcome {
+            start: Some(self.execution_node_clone(node_id)?),
+            exception_value: None,
+            action_done: Some(action_done),
+            retry_action: None,
+        })
+    }
+
+    fn apply_exception_action_completion(
+        &mut self,
+        node_id: Uuid,
+        metadata: FinishedActionMetadata,
+    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
+        let exception_value = metadata.result;
+        let status = action_done_status_for_exception(&exception_value);
+        let finished_at = Utc::now();
+
+        match self.apply_action_failure_transition(node_id, Some(&exception_value), finished_at)? {
+            ActionFailureTransition::RetryQueued(retry_action) => {
+                // Retries are re-queued and dispatched in this same increment pass.
+                let action_done = build_action_done(
+                    node_id,
+                    metadata.attempt,
+                    status,
+                    metadata.started_at,
+                    finished_at,
+                    exception_value,
+                );
+                Ok(FinishedNodeOutcome {
+                    start: None,
+                    exception_value: None,
+                    action_done: Some(action_done),
+                    retry_action: Some(*retry_action),
+                })
+            }
+            ActionFailureTransition::Failed => {
+                // Terminal failures keep exception payloads on the node so exception
+                // handler edges can bind $__exception in downstream inline nodes.
+                if !self.failure_has_exception_handler(node_id, &exception_value)?
+                    && self.terminal_error.is_none()
+                {
+                    self.terminal_error = Some(exception_value.clone());
+                }
+                let completed_at = self
+                    .execution_node(node_id)?
+                    .completed_at
+                    .unwrap_or(finished_at);
+                let action_done = build_action_done(
+                    node_id,
+                    metadata.attempt,
+                    status,
+                    metadata.started_at,
+                    completed_at,
+                    exception_value.clone(),
+                );
+                Ok(FinishedNodeOutcome {
+                    start: Some(self.execution_node_clone(node_id)?),
+                    exception_value: Some(exception_value),
+                    action_done: Some(action_done),
+                    retry_action: None,
+                })
+            }
+        }
+    }
+
+    fn apply_action_failure_transition(
+        &mut self,
+        node_id: Uuid,
+        exception_value: Option<&Value>,
+        finished_at: DateTime<Utc>,
+    ) -> Result<ActionFailureTransition, RunnerExecutorError> {
+        let should_retry = {
+            let node = self.execution_node(node_id)?;
+            self.retry_decision(node, exception_value)?.should_retry
+        };
+        if should_retry {
+            let retry_node = self.transition_action_to_retry(node_id, finished_at)?;
+            return Ok(ActionFailureTransition::RetryQueued(Box::new(retry_node)));
+        }
+        self.transition_action_to_failed(node_id, exception_value, finished_at)?;
+        Ok(ActionFailureTransition::Failed)
+    }
+
+    fn transition_action_to_retry(
+        &mut self,
+        node_id: Uuid,
+        finished_at: DateTime<Utc>,
+    ) -> Result<ExecutionNode, RunnerExecutorError> {
+        // Retry transition invariants:
+        // 1) bump attempt counter before re-dispatch
+        // 2) return to queued status
+        // 3) keep completion timestamp for the failed attempt
+        self.state
+            .increment_action_attempt(node_id)
+            .map_err(Self::state_error)?;
+        let should_queue = !self.state.ready_queue.contains(&node_id);
+        {
+            let node = self.execution_node_mut(node_id)?;
+            node.status = NodeStatus::Queued;
+            node.started_at = None;
+            node.completed_at = Some(finished_at);
+        }
+        if should_queue {
+            self.state.ready_queue.push(node_id);
+        }
+        self.execution_node_clone(node_id)
+    }
+
+    fn transition_action_to_failed(
+        &mut self,
+        node_id: Uuid,
+        exception_value: Option<&Value>,
+        finished_at: DateTime<Utc>,
+    ) -> Result<(), RunnerExecutorError> {
+        self.state.mark_failed(node_id).map_err(Self::state_error)?;
+        self.execution_node_mut(node_id)?.completed_at = Some(finished_at);
+        if let Some(exception_value) = exception_value {
+            self.assign_exception_scope(node_id, exception_value.clone())?;
+        }
+        Ok(())
+    }
+
+    fn assign_exception_scope(
+        &mut self,
+        node_id: Uuid,
+        exception_value: Value,
+    ) -> Result<(), RunnerExecutorError> {
+        let exception_expr = ValueExpr::Literal(LiteralValue {
+            value: exception_value,
+        });
+        let mut exception_assignment = HashMap::new();
+        exception_assignment.insert(EXCEPTION_SCOPE_VAR.to_string(), exception_expr.clone());
+        self.execution_node_mut(node_id)?
+            .assignments
+            .insert(EXCEPTION_SCOPE_VAR.to_string(), exception_expr);
+        self.state
+            .mark_latest_assignments(node_id, &exception_assignment);
+        Ok(())
+    }
+
+    fn failure_has_exception_handler(
+        &self,
+        node_id: Uuid,
+        exception_value: &Value,
+    ) -> Result<bool, RunnerExecutorError> {
+        let node = self.execution_node(node_id)?;
+        let template_id = match &node.template_id {
+            Some(id) => id,
+            None => return Ok(false),
+        };
+        let template_edges = match self.template_index.outgoing(template_id) {
+            Some(edges) => edges,
+            None => return Ok(false),
+        };
+        let selected = self.select_edges(template_edges, node, Some(exception_value.clone()))?;
+        Ok(selected
+            .iter()
+            .any(|edge| edge.edge_type == EdgeType::StateMachine))
+    }
+
+    fn retry_decision(
+        &self,
+        node: &ExecutionNode,
+        exception_value: Option<&Value>,
+    ) -> Result<RetryDecision, RunnerExecutorError> {
+        let Some(action) = self.template_action_for_execution_node(node)? else {
+            return Ok(RetryDecision {
+                should_retry: false,
+            });
+        };
+        let exception_name = exception_value.and_then(exception_type);
+        let evaluator = RetryPolicyEvaluator::new(&action.policies, exception_name);
+        Ok(evaluator.decision(node.action_attempt))
+    }
+
+    /// Select outgoing edges based on guards and exception state.
+    fn select_edges(
+        &self,
+        edges: &[DAGEdge],
+        _node: &ExecutionNode,
+        exception_value: Option<Value>,
+    ) -> Result<Vec<DAGEdge>, RunnerExecutorError> {
+        // Fast path: exception handling
+        if let Some(exception_value) = exception_value {
+            let mut result = Vec::new();
+            for edge in edges {
+                if edge.exception_types.is_some() && self.exception_matches(edge, &exception_value)
+                {
+                    result.push(edge.clone());
+                }
+            }
+            return Ok(result);
+        }
+
+        // Check if we have any conditional edges (guards or else)
+        let has_guards = edges.iter().any(|e| e.guard_expr.is_some());
+        let has_else = edges.iter().any(|e| e.is_else);
+
+        if has_guards || has_else {
+            // Evaluate guards first
+            let mut passed = Vec::new();
+            for edge in edges {
+                if edge.guard_expr.is_some() && self.evaluate_guard(edge.guard_expr.as_ref())? {
+                    passed.push(edge.clone());
+                }
+            }
+            if !passed.is_empty() {
+                return Ok(passed);
+            }
+            // Fall through to else edges
+            let mut else_edges = Vec::new();
+            for edge in edges {
+                if edge.is_else {
+                    else_edges.push(edge.clone());
+                }
+            }
+            return Ok(else_edges);
+        }
+
+        // Fast path: regular edges (no exceptions, guards, or else)
+        let mut result = Vec::with_capacity(edges.len());
+        for edge in edges {
+            if edge.exception_types.is_none() {
+                result.push(edge.clone());
+            }
+        }
+        Ok(result)
+    }
+
+    /// Queue successor nodes for a template edge, handling spreads/aggregators.
+    fn queue_successor(
+        &mut self,
+        source: &ExecutionNode,
+        edge: &DAGEdge,
+    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
+        if edge.edge_type != EdgeType::StateMachine {
+            return Ok(Vec::new());
+        }
+
+        // Extract info from template without holding borrow across mutable calls
+        let kind = {
+            let template = self.dag.nodes.get(&edge.target).ok_or_else(|| {
+                RunnerExecutorError(format!("template node not found: {}", edge.target))
+            })?;
+
+            match template {
+                waymark_dag::DAGNode::ActionCall(action) if action.spread_loop_var.is_some() => {
+                    TemplateKind::SpreadAction(Box::new(action.clone()))
+                }
+                waymark_dag::DAGNode::Aggregator(_) => {
+                    TemplateKind::Aggregator(template.id().to_string())
+                }
+                _ => TemplateKind::Regular(template.id().to_string()),
+            }
+        };
+
+        match kind {
+            TemplateKind::SpreadAction(action) => {
+                self.expand_spread_action(source, action.as_ref())
+            }
+            TemplateKind::Aggregator(template_id) => {
+                if let Some(existing) = self.find_connected_successor(source.node_id, &template_id)
+                {
+                    return Ok(vec![existing]);
+                }
+                let agg_node = self.get_or_create_aggregator(&template_id)?;
+                self.add_exec_edge(source.node_id, agg_node.node_id);
+                Ok(vec![agg_node])
+            }
+            TemplateKind::Regular(template_id) => {
+                if let Some(existing) = self.find_connected_successor(source.node_id, &template_id)
+                {
+                    return Ok(vec![existing]);
+                }
+                let exec_node = self.get_or_create_exec_node(&template_id)?;
+                self.add_exec_edge(source.node_id, exec_node.node_id);
+                Ok(vec![exec_node])
+            }
+        }
+    }
+
+    /// Unroll a spread action into per-item action nodes and a shared aggregator.
+    ///
+    /// Example IR:
+    /// - results = spread items:item -> @work(item=item)
+    ///   Produces one action execution node per element in items and connects
+    ///   them to a single aggregator node for results.
+    fn expand_spread_action(
+        &mut self,
+        source: &ExecutionNode,
+        template: &ActionCallNode,
+    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
+        let collection_expr = template.spread_collection_expr.as_ref().ok_or_else(|| {
+            RunnerExecutorError("spread action missing collection expression".to_string())
+        })?;
+        let loop_var = template.spread_loop_var.as_ref().ok_or_else(|| {
+            RunnerExecutorError("spread action missing loop variable".to_string())
+        })?;
+        let elements = self.expand_collection(collection_expr)?;
+        let agg_id = template.aggregates_to.as_ref().ok_or_else(|| {
+            RunnerExecutorError("spread action missing aggregator link".to_string())
+        })?;
+
+        let agg_node = self
+            .state
+            .queue_template_node(agg_id, None)
+            .map_err(|err| RunnerExecutorError(err.0))?;
+        if elements.is_empty() {
+            return Ok(vec![agg_node]);
+        }
+
+        let mut created = Vec::new();
+        for (idx, element) in elements.into_iter().enumerate() {
+            let exec_node = self.queue_action_from_template(
+                template,
+                Some(HashMap::from([(loop_var.clone(), element)])),
+                Some(idx as i32),
+            )?;
+            self.add_exec_edge(source.node_id, exec_node.node_id);
+            self.add_exec_edge(exec_node.node_id, agg_node.node_id);
+            created.push(exec_node);
+        }
+        Ok(created)
+    }
+
+    /// Create an action execution node from a template with optional bindings.
+    ///
+    /// Example IR:
+    /// - @work(value=item) with local_scope{"item": LiteralValue(3)}
+    ///   Produces an action node whose kwargs include the literal 3.
+    fn queue_action_from_template(
+        &mut self,
+        template: &ActionCallNode,
+        local_scope: Option<HashMap<String, ValueExpr>>,
+        iteration_index: Option<i32>,
+    ) -> Result<ExecutionNode, RunnerExecutorError> {
+        let kwargs = template
+            .kwarg_exprs
+            .iter()
+            .map(|(name, expr)| {
+                let value = self
+                    .state
+                    .expr_to_value(expr, local_scope.as_ref())
+                    .map_err(|err| RunnerExecutorError(err.0))?;
+                Ok((name.clone(), value))
+            })
+            .collect::<Result<HashMap<_, _>, RunnerExecutorError>>()?;
+
+        let spec = ActionCallSpec {
+            action_name: template.action_name.clone(),
+            module_name: template.module_name.clone(),
+            kwargs,
+        };
+        let targets = template
+            .targets
+            .clone()
+            .or_else(|| template.target.clone().map(|target| vec![target]))
+            .unwrap_or_default();
+        let node = self
+            .state
+            .queue_node(
+                ExecutionNodeType::ActionCall.as_str(),
+                &template.label(),
+                QueueNodeParams {
+                    template_id: Some(template.id.clone()),
+                    targets: Some(targets.clone()),
+                    action: Some(spec.clone()),
+                    ..QueueNodeParams::default()
+                },
+            )
+            .map_err(|err| RunnerExecutorError(err.0))?;
+        for value in spec.kwargs.values() {
+            self.state.record_data_flow_from_value(node.node_id, value);
+        }
+        let result = self
+            .state
+            .assign_action_results(
+                &node,
+                &template.action_name,
+                Some(&targets),
+                iteration_index,
+                false,
+            )
+            .map_err(|err| RunnerExecutorError(err.0))?;
+        if let Some(node_mut) = self.state.nodes.get_mut(&node.node_id) {
+            node_mut.value_expr = Some(ValueExpr::ActionResult(result));
+        }
+        Ok(node)
+    }
+
+    /// Execute a non-action node inline and update assignments/edges.
+    fn execute_inline_node(&mut self, node: &ExecutionNode) -> Result<(), RunnerExecutorError> {
+        let template_id = node
+            .template_id
+            .as_ref()
+            .ok_or_else(|| RunnerExecutorError("inline node missing template id".to_string()))?;
+        let template = self.dag.nodes.get(template_id).ok_or_else(|| {
+            RunnerExecutorError(format!("template node not found: {template_id}"))
+        })?;
+
+        let aggregator = match template {
+            waymark_dag::DAGNode::Aggregator(aggregator) => Some(aggregator.clone()),
+            _ => None,
+        };
+        if let Some(aggregator) = aggregator {
+            self.apply_aggregator_assignments(node, &aggregator)?;
+        }
+
+        self.state
+            .mark_completed(node.node_id)
+            .map_err(|err| RunnerExecutorError(err.0))
+    }
+
+    fn handle_sleep_node(
+        &mut self,
+        node: &ExecutionNode,
+    ) -> Result<SleepDecision, RunnerExecutorError> {
+        let now = Utc::now();
+        let scheduled_at = self
+            .state
+            .nodes
+            .get(&node.node_id)
+            .and_then(|node| node.scheduled_at);
+        if let Some(wake_at) = scheduled_at {
+            if wake_at <= now {
+                self.state
+                    .mark_completed(node.node_id)
+                    .map_err(|err| RunnerExecutorError(err.0))?;
+                return Ok(SleepDecision::Completed);
+            }
+            return Ok(SleepDecision::Blocked(wake_at));
+        }
+
+        let value_expr = self
+            .state
+            .nodes
+            .get(&node.node_id)
+            .and_then(|node| node.value_expr.clone())
+            .unwrap_or(ValueExpr::Literal(LiteralValue {
+                value: Value::Number(0.into()),
+            }));
+        let materialized = self.state.materialize_value(value_expr);
+        let duration_value = self.evaluate_value_expr(&materialized)?;
+
+        let duration_secs = match duration_value {
+            Value::Number(value) => value.as_f64().ok_or_else(|| {
+                RunnerExecutorError("sleep duration must be a number".to_string())
+            })?,
+            Value::Null => 0.0,
+            _ => {
+                return Err(RunnerExecutorError(
+                    "sleep duration must be a number".to_string(),
+                ));
+            }
+        };
+
+        if !duration_secs.is_finite() {
+            return Err(RunnerExecutorError(
+                "sleep duration must be finite".to_string(),
+            ));
+        }
+
+        if duration_secs <= 0.0 {
+            self.state
+                .mark_completed(node.node_id)
+                .map_err(|err| RunnerExecutorError(err.0))?;
+            return Ok(SleepDecision::Completed);
+        }
+
+        let duration = Duration::from_secs_f64(duration_secs);
+        let chrono_duration = chrono::Duration::from_std(duration)
+            .map_err(|_| RunnerExecutorError("sleep duration is out of range".to_string()))?;
+        let wake_at = now + chrono_duration;
+        self.state
+            .set_node_scheduled_at(node.node_id, Some(wake_at))
+            .map_err(|err| RunnerExecutorError(err.0))?;
+        Ok(SleepDecision::Blocked(wake_at))
+    }
+
+    /// Check if an inline node is ready to run based on incoming edges.
+    fn inline_ready(&self, node: &ExecutionNode) -> bool {
+        if node.status == NodeStatus::Completed {
+            return false;
+        }
+        let incoming = match self.incoming_exec_edges.get(&node.node_id) {
+            Some(edges) if !edges.is_empty() => edges,
+            _ => return true, // No incoming edges means ready
+        };
+
+        let template = match node
+            .template_id
+            .as_ref()
+            .and_then(|id| self.dag.nodes.get(id))
+        {
+            Some(template) => template,
+            None => return false,
+        };
+
+        if let waymark_dag::DAGNode::Aggregator(_) = template {
+            if let Some(required) = self.template_index.incoming(template.id()) {
+                let connected = self.connected_template_sources(node.node_id);
+                if !required.is_subset(&connected) {
+                    return false;
+                }
+            }
+            for edge in incoming {
+                if let Some(source) = self.state.nodes.get(&edge.source) {
+                    if !matches!(source.status, NodeStatus::Completed | NodeStatus::Failed) {
+                        return false;
+                    }
+                } else {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        for edge in incoming {
+            if let Some(source) = self.state.nodes.get(&edge.source) {
+                if !matches!(source.status, NodeStatus::Completed | NodeStatus::Failed) {
+                    return false;
+                }
+            } else {
+                return false;
+            }
+        }
+        true
+    }
+
+    /// Populate aggregated list assignments for a ready aggregator node.
+    ///
+    /// Example:
+    /// - results = spread items: @work(item)
+    ///   When all action nodes complete, the aggregator assigns
+    ///   results = [ActionResultValue(...), ...].
+    fn apply_aggregator_assignments(
+        &mut self,
+        node: &ExecutionNode,
+        template: &AggregatorNode,
+    ) -> Result<(), RunnerExecutorError> {
+        let targets = template
+            .targets
+            .clone()
+            .or_else(|| template.target.clone().map(|target| vec![target]))
+            .unwrap_or_default();
+        if targets.len() != 1 {
+            return Ok(());
+        }
+
+        let incoming_nodes: Vec<ExecutionNode> = self
+            .incoming_exec_edges
+            .get(&node.node_id)
+            .cloned()
+            .unwrap_or_default()
+            .into_iter()
+            .filter(|edge| edge.edge_type == EdgeType::StateMachine)
+            .filter_map(|edge| self.state.nodes.get(&edge.source).cloned())
+            .collect();
+
+        let mut values = Vec::new();
+        for source in &incoming_nodes {
+            let value_expr = source.value_expr.clone().ok_or_else(|| {
+                RunnerExecutorError("aggregator missing source value".to_string())
+            })?;
+            values.push(value_expr);
+        }
+
+        let ordered = self.order_aggregated_values(&incoming_nodes, &values)?;
+        let list_value = ValueExpr::List(ListValue { elements: ordered });
+        let assignment = HashMap::from([(targets[0].clone(), list_value.clone())]);
+        if let Some(node_mut) = self.state.nodes.get_mut(&node.node_id) {
+            node_mut.assignments.extend(assignment.clone());
+        }
+        self.state
+            .mark_latest_assignments(node.node_id, &assignment);
+        self.state
+            .record_data_flow_from_value(node.node_id, &list_value);
+        Ok(())
+    }
+
+    /// Order aggregator values by spread iteration or parallel index.
+    fn order_aggregated_values(
+        &self,
+        sources: &[ExecutionNode],
+        values: &[ValueExpr],
+    ) -> Result<Vec<ValueExpr>, RunnerExecutorError> {
+        // Order by explicit iteration/parallel indices when available, then fall back to timeline.
+        if sources.len() != values.len() {
+            return Err(RunnerExecutorError(
+                "aggregator sources/value mismatch".to_string(),
+            ));
+        }
+        let timeline_index: HashMap<Uuid, usize> = self
+            .state
+            .timeline
+            .iter()
+            .enumerate()
+            .map(|(idx, node_id)| (*node_id, idx))
+            .collect();
+        let mut pairs: Vec<((i32, i32), ValueExpr)> = Vec::with_capacity(values.len());
+        for (source, value) in sources.iter().zip(values.iter()) {
+            let key = self.aggregated_sort_key(source, value, &timeline_index);
+            pairs.push((key, value.clone()));
+        }
+        pairs.sort_by_key(|item| item.0);
+        Ok(pairs.into_iter().map(|(_, value)| value).collect())
+    }
+
+    fn aggregated_sort_key(
+        &self,
+        source: &ExecutionNode,
+        value: &ValueExpr,
+        timeline_index: &HashMap<Uuid, usize>,
+    ) -> (i32, i32) {
+        let mut primary = 2;
+        let mut secondary = *timeline_index.get(&source.node_id).unwrap_or(&0) as i32;
+        if let ValueExpr::ActionResult(action) = value {
+            if let Some(iter_idx) = action.iteration_index {
+                primary = 0;
+                secondary = iter_idx;
+            }
+        } else if let Some(template_id) = &source.template_id
+            && let Some(waymark_dag::DAGNode::ActionCall(action)) = self.dag.nodes.get(template_id)
+            && let Some(idx) = action.parallel_index
+        {
+            primary = 1;
+            secondary = idx;
+        }
+        (primary, secondary)
+    }
+
+    /// Expand a collection expression into element ValueExprs.
+    ///
+    /// Example IR:
+    /// - spread range(3):i -> @work(i)
+    ///   Produces [LiteralValue(0), LiteralValue(1), LiteralValue(2)].
+    fn expand_collection(
+        &mut self,
+        expr: &ir::Expr,
+    ) -> Result<Vec<ValueExpr>, RunnerExecutorError> {
+        let value = Self::expr_to_value(expr)?;
+        let value = self.state.materialize_value(value);
+        if let ValueExpr::List(list) = value {
+            return Ok(list.elements);
+        }
+
+        if let ValueExpr::ActionResult(action_value) = value.clone() {
+            let action_result = self.resolve_action_result(&action_value)?;
+            if let Value::Array(items) = action_result {
+                return Ok(items
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, _)| {
+                        ValueExpr::Index(IndexValue {
+                            object: Box::new(ValueExpr::ActionResult(action_value.clone())),
+                            index: Box::new(ValueExpr::Literal(LiteralValue {
+                                value: Value::Number((idx as i64).into()),
+                            })),
+                        })
+                    })
+                    .collect());
+            }
+            return Err(RunnerExecutorError(
+                "spread collection is not iterable".to_string(),
+            ));
+        }
+
+        let evaluated = self.evaluate_value_expr(&value)?;
+        if let Value::Array(items) = evaluated {
+            return Ok(items
+                .into_iter()
+                .map(|item| ValueExpr::Literal(LiteralValue { value: item }))
+                .collect());
+        }
+
+        Err(RunnerExecutorError(
+            "spread collection is not iterable".to_string(),
+        ))
+    }
+
+    fn build_incoming_exec_edges(state: &RunnerState) -> FxHashMap<Uuid, Vec<ExecutionEdge>> {
+        let mut incoming: FxHashMap<Uuid, Vec<ExecutionEdge>> = FxHashMap::default();
+        for edge in &state.edges {
+            if edge.edge_type != EdgeType::StateMachine {
+                continue;
+            }
+            incoming.entry(edge.target).or_default().push(edge.clone());
+        }
+        incoming
+    }
+
+    fn build_template_to_exec_nodes(state: &RunnerState) -> FxHashMap<String, Vec<Uuid>> {
+        let mut index: FxHashMap<String, Vec<Uuid>> = FxHashMap::default();
+        for (node_id, node) in &state.nodes {
+            if let Some(template_id) = &node.template_id {
+                index.entry(template_id.clone()).or_default().push(*node_id);
+            }
+        }
+        index
+    }
+
+    /// Register a new execution node in the template index
+    fn register_exec_node(&mut self, template_id: &str, node_id: Uuid) {
+        self.template_to_exec_nodes
+            .entry(template_id.to_string())
+            .or_default()
+            .push(node_id);
+    }
+
+    fn add_exec_edge(&mut self, source: Uuid, target: Uuid) {
+        let edge = ExecutionEdge {
+            source,
+            target,
+            edge_type: EdgeType::StateMachine,
+        };
+        if self.state.edges.contains(&edge) {
+            return;
+        }
+        self.state.edges.insert(edge.clone());
+        self.incoming_exec_edges
+            .entry(target)
+            .or_default()
+            .push(edge);
+    }
+
+    fn connected_template_sources(&self, exec_node_id: Uuid) -> HashSet<String> {
+        let mut connected = HashSet::new();
+        for edge in self
+            .incoming_exec_edges
+            .get(&exec_node_id)
+            .cloned()
+            .unwrap_or_default()
+        {
+            if let Some(source) = self.state.nodes.get(&edge.source)
+                && let Some(template_id) = &source.template_id
+            {
+                connected.insert(template_id.clone());
+            }
+        }
+        connected
+    }
+
+    fn find_connected_successor(
+        &self,
+        source_id: Uuid,
+        template_id: &str,
+    ) -> Option<ExecutionNode> {
+        for edge in &self.state.edges {
+            if edge.edge_type != EdgeType::StateMachine || edge.source != source_id {
+                continue;
+            }
+            let target = self.state.nodes.get(&edge.target)?;
+            if target.template_id.as_deref() == Some(template_id) {
+                return Some(target.clone());
+            }
+        }
+        None
+    }
+
+    fn get_or_create_aggregator(
+        &mut self,
+        template_id: &str,
+    ) -> Result<ExecutionNode, RunnerExecutorError> {
+        let mut candidates: Vec<ExecutionNode> = self
+            .state
+            .nodes
+            .values()
+            .filter(|node| {
+                node.template_id.as_deref() == Some(template_id)
+                    && node.status != NodeStatus::Completed
+            })
+            .cloned()
+            .collect();
+        if !candidates.is_empty() {
+            let timeline_index: HashMap<Uuid, usize> = self
+                .state
+                .timeline
+                .iter()
+                .enumerate()
+                .map(|(idx, node_id)| (*node_id, idx))
+                .collect();
+            candidates.sort_by_key(|node| {
+                std::cmp::Reverse(timeline_index.get(&node.node_id).copied().unwrap_or(0))
+            });
+            return Ok(candidates[0].clone());
+        }
+        self.state
+            .queue_template_node(template_id, None)
+            .map_err(|err| RunnerExecutorError(err.0))
+    }
+
+    fn get_or_create_exec_node(
+        &mut self,
+        template_id: &str,
+    ) -> Result<ExecutionNode, RunnerExecutorError> {
+        // Use the index to find candidate nodes - O(k) where k is nodes for this template
+        if let Some(node_ids) = self.template_to_exec_nodes.get(template_id) {
+            // Find the most recent non-completed node
+            let mut best_node_id: Option<Uuid> = None;
+            let mut best_timeline_pos: Option<usize> = None;
+
+            for &node_id in node_ids {
+                if let Some(node) = self.state.nodes.get(&node_id)
+                    && !matches!(node.status, NodeStatus::Completed | NodeStatus::Failed)
+                {
+                    let timeline_pos = self.state.timeline.iter().position(|&id| id == node_id);
+                    if let Some(pos) = timeline_pos {
+                        if best_timeline_pos.is_none() || pos > best_timeline_pos.unwrap() {
+                            best_timeline_pos = Some(pos);
+                            best_node_id = Some(node_id);
+                        }
+                    } else if best_node_id.is_none() {
+                        best_node_id = Some(node_id);
+                    }
+                }
+            }
+
+            if let Some(node_id) = best_node_id {
+                return self
+                    .state
+                    .nodes
+                    .get(&node_id)
+                    .cloned()
+                    .ok_or_else(|| RunnerExecutorError(format!("node disappeared: {node_id}")));
+            }
+        }
+
+        // Create new node and register it in the index
+        let node = self
+            .state
+            .queue_template_node(template_id, None)
+            .map_err(|err| RunnerExecutorError(err.0))?;
+        self.register_exec_node(template_id, node.node_id);
+        Ok(node)
+    }
+
+    fn execution_node(&self, node_id: Uuid) -> Result<&ExecutionNode, RunnerExecutorError> {
+        self.state
+            .nodes
+            .get(&node_id)
+            .ok_or_else(|| RunnerExecutorError(format!("execution node not found: {node_id}")))
+    }
+
+    fn execution_node_mut(
+        &mut self,
+        node_id: Uuid,
+    ) -> Result<&mut ExecutionNode, RunnerExecutorError> {
+        self.state
+            .nodes
+            .get_mut(&node_id)
+            .ok_or_else(|| RunnerExecutorError(format!("execution node not found: {node_id}")))
+    }
+
+    fn execution_node_clone(&self, node_id: Uuid) -> Result<ExecutionNode, RunnerExecutorError> {
+        self.execution_node(node_id).cloned()
+    }
+
+    fn template_action_for_execution_node(
+        &self,
+        node: &ExecutionNode,
+    ) -> Result<Option<&ActionCallNode>, RunnerExecutorError> {
+        let Some(template_id) = node.template_id.as_ref() else {
+            return Ok(None);
+        };
+        let template = self.dag.nodes.get(template_id).ok_or_else(|| {
+            RunnerExecutorError(format!("template node not found: {template_id}"))
+        })?;
+        match template {
+            waymark_dag::DAGNode::ActionCall(action) => Ok(Some(action)),
+            _ => Ok(None),
+        }
+    }
+
+    fn state_error(err: RunnerStateError) -> RunnerExecutorError {
+        RunnerExecutorError(err.0)
+    }
+
+    fn collect_updates(
+        &mut self,
+        actions_done: Vec<ActionDone>,
+    ) -> Result<Option<DurableUpdates>, RunnerExecutorError> {
+        if self.backend.is_none() {
+            return Ok(None);
+        }
+        let graph_dirty = self.state.consume_graph_dirty_for_durable_execution();
+        let mut graph_updates = Vec::new();
+        if graph_dirty {
+            let instance_id = self.instance_id.ok_or_else(|| {
+                RunnerExecutorError("instance_id is required for graph persistence".to_string())
+            })?;
+            graph_updates.push(GraphUpdate::from_state(instance_id, &self.state));
+        }
+        let updates = DurableUpdates {
+            actions_done,
+            graph_updates,
+        };
+        if updates.actions_done.is_empty() && updates.graph_updates.is_empty() {
+            Ok(None)
+        } else {
+            Ok(Some(updates))
+        }
+    }
+}
+
+fn exception_type(value: &Value) -> Option<&str> {
+    match value {
+        Value::Object(map) => map.get("type").and_then(|value| value.as_str()),
+        _ => None,
+    }
+}
+
+fn action_done_status_for_exception(value: &Value) -> ActionAttemptStatus {
+    match SyntheticExceptionType::from_value(value) {
+        Some(SyntheticExceptionType::ExecutorResume)
+        | Some(SyntheticExceptionType::ActionTimeout) => ActionAttemptStatus::TimedOut,
+        None => ActionAttemptStatus::Failed,
+    }
+}
+
+fn compute_action_duration_ms(
+    started_at: Option<DateTime<Utc>>,
+    completed_at: DateTime<Utc>,
+) -> Option<i64> {
+    started_at
+        .map(|started_at| {
+            completed_at
+                .signed_duration_since(started_at)
+                .num_milliseconds()
+        })
+        .filter(|duration| *duration >= 0)
+}
+
+fn build_action_done(
+    execution_id: Uuid,
+    attempt: i32,
+    status: ActionAttemptStatus,
+    started_at: Option<DateTime<Utc>>,
+    completed_at: DateTime<Utc>,
+    result: Value,
+) -> ActionDone {
+    ActionDone {
+        execution_id,
+        attempt,
+        status,
+        started_at,
+        completed_at: Some(completed_at),
+        duration_ms: compute_action_duration_ms(started_at, completed_at),
+        result,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::{HashMap, HashSet};
+    use std::sync::Arc;
+
+    use waymark_backend_memory::MemoryBackend;
+    use waymark_dag::{
+        ActionCallNode, ActionCallParams, AggregatorNode, AssignmentNode, DAG, DAGEdge,
+        convert_to_dag,
+    };
+    use waymark_ir_parser::parse_program;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
+
+    fn variable(name: &str) -> ir::Expr {
+        ir::Expr {
+            kind: Some(ir::expr::Kind::Variable(ir::Variable {
+                name: name.to_string(),
+            })),
+            span: None,
+        }
+    }
+
+    fn literal_int(value: i64) -> ir::Expr {
+        ir::Expr {
+            kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                value: Some(ir::literal::Value::IntValue(value)),
+            })),
+            span: None,
+        }
+    }
+
+    fn binary(left: ir::Expr, op: ir::BinaryOperator, right: ir::Expr) -> ir::Expr {
+        ir::Expr {
+            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
+                left: Some(Box::new(left)),
+                op: op as i32,
+                right: Some(Box::new(right)),
+            }))),
+            span: None,
+        }
+    }
+
+    #[test]
+    fn test_action_done_status_for_resume_exception_is_timed_out() {
+        let value = serde_json::json!({
+            "type": "ExecutorResume",
+            "message": "resumed action timed out",
+        });
+        assert_eq!(
+            action_done_status_for_exception(&value),
+            ActionAttemptStatus::TimedOut
+        );
+    }
+
+    #[test]
+    fn test_action_done_status_for_action_timeout_exception_is_timed_out() {
+        let value = serde_json::json!({
+            "type": "ActionTimeout",
+            "message": "action timed out",
+            "timeout_seconds": 1,
+            "attempt": 1,
+        });
+        assert_eq!(
+            action_done_status_for_exception(&value),
+            ActionAttemptStatus::TimedOut
+        );
+    }
+
+    #[test]
+    fn test_action_done_status_for_generic_exception_is_failed() {
+        let value = serde_json::json!({
+            "type": "ValueError",
+            "message": "boom",
+        });
+        assert_eq!(
+            action_done_status_for_exception(&value),
+            ActionAttemptStatus::Failed
+        );
+    }
+
+    #[test]
+    fn test_action_done_status_for_non_synthetic_timeout_error_is_failed() {
+        let value = serde_json::json!({
+            "type": "TimeoutError",
+            "message": "user action raised timeout",
+        });
+        assert_eq!(
+            action_done_status_for_exception(&value),
+            ActionAttemptStatus::Failed
+        );
+    }
+
+    #[test]
+    fn test_build_action_done_sets_duration_from_started_and_completed() {
+        let execution_id = Uuid::new_v4();
+        let started_at = Utc::now();
+        let completed_at = started_at + chrono::Duration::milliseconds(275);
+        let done = build_action_done(
+            execution_id,
+            2,
+            ActionAttemptStatus::Completed,
+            Some(started_at),
+            completed_at,
+            serde_json::json!({"ok": true}),
+        );
+
+        assert_eq!(done.execution_id, execution_id);
+        assert_eq!(done.attempt, 2);
+        assert_eq!(done.status, ActionAttemptStatus::Completed);
+        assert_eq!(done.started_at, Some(started_at));
+        assert_eq!(done.completed_at, Some(completed_at));
+        assert_eq!(done.duration_ms, Some(275));
+    }
+
+    #[derive(Default)]
+    struct ActionNodeOptions {
+        policies: Vec<ir::PolicyBracket>,
+        spread_loop_var: Option<String>,
+        spread_collection_expr: Option<ir::Expr>,
+        aggregates_to: Option<String>,
+    }
+
+    fn action_node(
+        node_id: &str,
+        action_name: &str,
+        kwarg_exprs: HashMap<String, ir::Expr>,
+        targets: Vec<String>,
+        options: ActionNodeOptions,
+    ) -> ActionCallNode {
+        let ActionNodeOptions {
+            policies,
+            spread_loop_var,
+            spread_collection_expr,
+            aggregates_to,
+        } = options;
+        ActionCallNode::new(
+            node_id,
+            action_name,
+            ActionCallParams {
+                module_name: None,
+                kwargs: HashMap::new(),
+                kwarg_exprs,
+                policies,
+                targets: Some(targets),
+                target: None,
+                parallel_index: None,
+                aggregates_to,
+                spread_loop_var,
+                spread_collection_expr,
+                function_name: Some("main".to_string()),
+            },
+        )
+    }
+
+    fn assignment_node(
+        node_id: &str,
+        targets: Vec<String>,
+        assign_expr: ir::Expr,
+    ) -> AssignmentNode {
+        AssignmentNode::new(
+            node_id,
+            targets,
+            None,
+            Some(assign_expr),
+            None,
+            Some("main".to_string()),
+        )
+    }
+
+    fn aggregator_node(
+        node_id: &str,
+        aggregates_from: &str,
+        targets: Vec<String>,
+    ) -> AggregatorNode {
+        AggregatorNode::new(
+            node_id,
+            aggregates_from,
+            Some(targets),
+            None,
+            "aggregate",
+            Some("main".to_string()),
+        )
+    }
+
+    fn snapshot_state(
+        state: &RunnerState,
+        action_results: &HashMap<Uuid, Value>,
+    ) -> (
+        HashMap<Uuid, ExecutionNode>,
+        HashSet<ExecutionEdge>,
+        HashMap<Uuid, Value>,
+    ) {
+        (
+            state.nodes.clone(),
+            state.edges.clone(),
+            action_results.clone(),
+        )
+    }
+
+    fn create_rehydrated_executor(
+        dag: &Arc<DAG>,
+        nodes: HashMap<Uuid, ExecutionNode>,
+        edges: HashSet<ExecutionEdge>,
+        action_results: HashMap<Uuid, Value>,
+    ) -> RunnerExecutor {
+        let state = RunnerState::new(Some(Arc::clone(dag)), Some(nodes), Some(edges), false);
+        RunnerExecutor::new(Arc::clone(dag), state, action_results, None)
+    }
+
+    fn compare_executor_states(original: &RunnerExecutor, rehydrated: &RunnerExecutor) {
+        let orig_state = original.state();
+        let rehy_state = rehydrated.state();
+        assert_eq!(
+            orig_state.nodes.keys().collect::<HashSet<_>>(),
+            rehy_state.nodes.keys().collect::<HashSet<_>>(),
+        );
+        for node_id in orig_state.nodes.keys() {
+            let orig_node = orig_state.nodes.get(node_id).unwrap();
+            let rehy_node = rehy_state.nodes.get(node_id).unwrap();
+            assert_eq!(orig_node.node_type, rehy_node.node_type);
+            assert_eq!(orig_node.status, rehy_node.status);
+            assert_eq!(orig_node.template_id, rehy_node.template_id);
+            assert_eq!(orig_node.targets, rehy_node.targets);
+            assert_eq!(orig_node.action_attempt, rehy_node.action_attempt);
+        }
+        assert_eq!(orig_state.edges, rehy_state.edges);
+    }
+
+    fn completion_action_result(action: &ExecutionNode) -> Value {
+        Value::String(format!(
+            "{}:attempt{}",
+            action.template_id.as_deref().unwrap_or("unknown_action"),
+            action.action_attempt
+        ))
+    }
+
+    fn dag_from_ir_source(source: &str) -> Arc<DAG> {
+        let program = parse_program(source.trim()).expect("parse program");
+        Arc::new(convert_to_dag(&program).expect("convert program to DAG"))
+    }
+
+    fn build_executor_at_entry(dag: &Arc<DAG>) -> (RunnerExecutor, Uuid) {
+        let mut state = RunnerState::new(Some(Arc::clone(dag)), None, None, false);
+        let entry_template = dag.entry_node.as_ref().expect("dag entry node");
+        let entry_exec = state
+            .queue_template_node(entry_template, None)
+            .expect("queue entry node");
+        (
+            RunnerExecutor::new(Arc::clone(dag), state, HashMap::new(), None),
+            entry_exec.node_id,
+        )
+    }
+
+    type ActionResultFor = fn(&ExecutionNode) -> Value;
+
+    struct RehydrateBranchHarness {
+        dag: Arc<DAG>,
+        canonical: RunnerExecutor,
+        branches: Vec<RunnerExecutor>,
+        action_result_for: ActionResultFor,
+    }
+
+    impl RehydrateBranchHarness {
+        const MAX_TICKS: usize = 256;
+
+        fn new(
+            dag: Arc<DAG>,
+            canonical: RunnerExecutor,
+            action_result_for: ActionResultFor,
+        ) -> Self {
+            let mut harness = Self {
+                dag,
+                canonical,
+                branches: Vec::new(),
+                action_result_for,
+            };
+            harness.fork_from_canonical();
+            harness
+        }
+
+        fn run_and_assert(mut self) {
+            self.advance_canonical_with_forks();
+            for (index, branch) in self.branches.iter_mut().enumerate() {
+                Self::advance_executor_to_completion(branch, self.action_result_for)
+                    .unwrap_or_else(|err| panic!("branch {index} failed to complete: {err}"));
+                Self::assert_completed_executor_equivalent(&self.canonical, branch);
+            }
+        }
+
+        fn fork_from_canonical(&mut self) {
+            let (nodes_snap, edges_snap, results_snap) =
+                snapshot_state(self.canonical.state(), self.canonical.action_results());
+            self.branches.push(create_rehydrated_executor(
+                &self.dag,
+                nodes_snap,
+                edges_snap,
+                results_snap,
+            ));
+        }
+
+        fn advance_canonical_with_forks(&mut self) {
+            let mut converged = false;
+            for _ in 0..Self::MAX_TICKS {
+                let progressed = Self::advance_executor_one_increment(
+                    &mut self.canonical,
+                    self.action_result_for,
+                )
+                .expect("advance canonical executor");
+                if !progressed {
+                    converged = true;
+                    break;
+                }
+                self.fork_from_canonical();
+            }
+            assert!(converged, "canonical executor did not converge");
+            assert!(
+                !self.branches.is_empty(),
+                "expected at least one rehydrated branch"
+            );
+        }
+
+        fn advance_executor_one_increment(
+            executor: &mut RunnerExecutor,
+            action_result_for: ActionResultFor,
+        ) -> Result<bool, RunnerExecutorError> {
+            let active_actions: Vec<ExecutionNode> = executor
+                .state()
+                .nodes
+                .values()
+                .filter(|node| {
+                    node.is_action_call()
+                        && matches!(node.status, NodeStatus::Queued | NodeStatus::Running)
+                })
+                .cloned()
+                .collect();
+            for action in &active_actions {
+                if !executor.action_results().contains_key(&action.node_id) {
+                    executor.set_action_result(action.node_id, action_result_for(action));
+                }
+            }
+
+            let mut finished_nodes: Vec<Uuid> =
+                active_actions.iter().map(|node| node.node_id).collect();
+            finished_nodes.extend(
+                executor
+                    .state()
+                    .nodes
+                    .values()
+                    .filter(|node| {
+                        node.status == NodeStatus::Queued
+                            && node.is_sleep()
+                            && node.scheduled_at.is_some()
+                    })
+                    .map(|node| node.node_id),
+            );
+
+            if finished_nodes.is_empty() {
+                return Ok(false);
+            }
+
+            let step = executor.increment(&finished_nodes)?;
+            for action in &step.actions {
+                if !executor.action_results().contains_key(&action.node_id) {
+                    executor.set_action_result(action.node_id, action_result_for(action));
+                }
+            }
+            for sleep_request in &step.sleep_requests {
+                executor
+                    .state_mut()
+                    .set_node_scheduled_at(
+                        sleep_request.node_id,
+                        Some(Utc::now() - chrono::Duration::seconds(1)),
+                    )
+                    .map_err(|err| RunnerExecutorError(err.0))?;
+            }
+            Ok(true)
+        }
+
+        fn advance_executor_to_completion(
+            executor: &mut RunnerExecutor,
+            action_result_for: ActionResultFor,
+        ) -> Result<(), RunnerExecutorError> {
+            for _ in 0..Self::MAX_TICKS {
+                if !Self::advance_executor_one_increment(executor, action_result_for)? {
+                    return Ok(());
+                }
+            }
+
+            Err(RunnerExecutorError(
+                "executor did not converge to completion".to_string(),
+            ))
+        }
+
+        fn count_keyed(items: impl IntoIterator<Item = String>) -> HashMap<String, usize> {
+            let mut counts: HashMap<String, usize> = HashMap::new();
+            for item in items {
+                *counts.entry(item).or_insert(0) += 1;
+            }
+            counts
+        }
+
+        fn node_shape_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
+            Self::count_keyed(executor.state().nodes.values().map(|node| {
+                let mut targets = node.targets.clone();
+                targets.sort();
+                let mut assignment_keys: Vec<String> = node.assignments.keys().cloned().collect();
+                assignment_keys.sort();
+                let mut action_kwarg_keys = node
+                    .action
+                    .as_ref()
+                    .map(|action| action.kwargs.keys().cloned().collect::<Vec<_>>())
+                    .unwrap_or_default();
+                action_kwarg_keys.sort();
+                format!(
+                    "type={}|template={}|status={:?}|attempt={}|targets={targets:?}|assignments={assignment_keys:?}|action={}({action_kwarg_keys:?})|scheduled={}",
+                    node.node_type,
+                    node.template_id.clone().unwrap_or_default(),
+                    node.status,
+                    node.action_attempt,
+                    node.action
+                        .as_ref()
+                        .map(|action| action.action_name.clone())
+                        .unwrap_or_default(),
+                    node.scheduled_at.is_some(),
+                )
+            }))
+        }
+
+        fn edge_shape_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
+            Self::count_keyed(executor.state().edges.iter().map(|edge| {
+                let source = executor
+                    .state()
+                    .nodes
+                    .get(&edge.source)
+                    .expect("source node")
+                    .template_id
+                    .clone()
+                    .unwrap_or_else(|| "__unknown_source".to_string());
+                let target = executor
+                    .state()
+                    .nodes
+                    .get(&edge.target)
+                    .expect("target node")
+                    .template_id
+                    .clone()
+                    .unwrap_or_else(|| "__unknown_target".to_string());
+                format!("{source}-{:?}->{target}", edge.edge_type)
+            }))
+        }
+
+        fn action_result_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
+            Self::count_keyed(executor.action_results().iter().map(|(node_id, value)| {
+                let template_id = executor
+                    .state()
+                    .nodes
+                    .get(node_id)
+                    .and_then(|node| node.template_id.clone())
+                    .unwrap_or_else(|| "__unknown_action".to_string());
+                let rendered =
+                    serde_json::to_string(value).expect("action result should serialize to JSON");
+                format!("{template_id}:{rendered}")
+            }))
+        }
+
+        fn assert_completed_executor_equivalent(
+            canonical: &RunnerExecutor,
+            rehydrated: &RunnerExecutor,
+        ) {
+            assert_eq!(
+                Self::node_shape_counts(canonical),
+                Self::node_shape_counts(rehydrated)
+            );
+            assert_eq!(
+                Self::edge_shape_counts(canonical),
+                Self::edge_shape_counts(rehydrated)
+            );
+            assert_eq!(
+                canonical.state().timeline.len(),
+                rehydrated.state().timeline.len()
+            );
+            assert_eq!(
+                Self::action_result_counts(canonical),
+                Self::action_result_counts(rehydrated)
+            );
+            assert_eq!(
+                canonical.state().ready_queue.is_empty(),
+                rehydrated.state().ready_queue.is_empty()
+            );
+
+            let replay_canonical =
+                crate::replay_variables(canonical.state(), canonical.action_results())
+                    .expect("replay canonical");
+            let replay_rehydrated =
+                crate::replay_variables(rehydrated.state(), rehydrated.action_results())
+                    .expect("replay rehydrated");
+
+            let mut assignment_counts: HashMap<String, usize> = HashMap::new();
+            for node in canonical.state().nodes.values() {
+                for target in node.assignments.keys() {
+                    *assignment_counts.entry(target.clone()).or_insert(0) += 1;
+                }
+            }
+            let stable_canonical: HashMap<String, Value> = replay_canonical
+                .variables
+                .into_iter()
+                .filter(|(name, _)| assignment_counts.get(name).copied().unwrap_or(0) <= 1)
+                .collect();
+            let stable_rehydrated: HashMap<String, Value> = replay_rehydrated
+                .variables
+                .into_iter()
+                .filter(|(name, _)| assignment_counts.get(name).copied().unwrap_or(0) <= 1)
+                .collect();
+            assert_eq!(stable_canonical, stable_rehydrated);
+        }
+    }
+
+    fn setup_linear_assignment_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
+        let dag = dag_from_ir_source(
+            r#"
+fn main(input: [], output: [z]):
+    x = @fetch()
+    y = x + 1
+    z = @process(value=y)
+    return z
+"#,
+        );
+        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
+
+        let first_step = executor
+            .increment(&[entry_exec_id])
+            .expect("advance from entry");
+        assert_eq!(first_step.actions.len(), 1);
+        let first_exec = first_step.actions[0].clone();
+        executor.set_action_result(first_exec.node_id, Value::Number(10.into()));
+
+        let step = executor.increment(&[first_exec.node_id]).expect("advance");
+        assert_eq!(step.actions.len(), 1);
+        (dag, executor)
+    }
+
+    fn setup_sleep_resume_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
+        let dag = dag_from_ir_source(
+            r#"
+fn main(input: [], output: [resumed]):
+    seed = 1
+    started = @get_timestamp()
+    sleep 60
+    resumed = @get_timestamp()
+    return resumed
+"#,
+        );
+        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
+
+        let start_step = executor.increment(&[entry_exec_id]).expect("start");
+        assert_eq!(start_step.actions.len(), 1);
+        let start_exec = start_step.actions[0].clone();
+        executor.set_action_result(start_exec.node_id, Value::String("t0".to_string()));
+
+        let sleep_step = executor
+            .increment(&[start_exec.node_id])
+            .expect("advance to sleep");
+        assert!(sleep_step.actions.is_empty());
+        assert_eq!(sleep_step.sleep_requests.len(), 1);
+        (dag, executor)
+    }
+
+    fn setup_spread_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
+        let dag = dag_from_ir_source(
+            r#"
+fn main(input: [], output: [done]):
+    items = @get_items()
+    results = spread items:item -> @double(value=item)
+    done = @finalize(values=results)
+    return done
+"#,
+        );
+        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
+
+        let first_step = executor.increment(&[entry_exec_id]).expect("start");
+        assert_eq!(first_step.actions.len(), 1);
+        let initial_exec = first_step.actions[0].clone();
+        executor.set_action_result(
+            initial_exec.node_id,
+            Value::Array(vec![1.into(), 2.into(), 3.into()]),
+        );
+
+        let step1 = executor
+            .increment(&[initial_exec.node_id])
+            .expect("expand spread");
+        assert_eq!(step1.actions.len(), 3);
+        for (idx, node) in step1.actions.iter().enumerate() {
+            executor.set_action_result(node.node_id, Value::Number(((idx + 1) as i64).into()));
+        }
+
+        let step2 = executor
+            .increment(
+                &step1
+                    .actions
+                    .iter()
+                    .map(|node| node.node_id)
+                    .collect::<Vec<_>>(),
+            )
+            .expect("complete spread");
+        assert_eq!(step2.actions.len(), 1);
+        (dag, executor)
+    }
+
+    #[test]
+    fn test_executor_unblocks_downstream_action() {
+        let mut dag = DAG::default();
+
+        let action_start = action_node(
+            "action_start",
+            "fetch",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let assign_node = assignment_node(
+            "assign",
+            vec!["y".to_string()],
+            binary(
+                variable("x"),
+                ir::BinaryOperator::BinaryOpAdd,
+                literal_int(1),
+            ),
+        );
+        let action_next = action_node(
+            "action_next",
+            "work",
+            HashMap::from([("value".to_string(), variable("y"))]),
+            vec!["z".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action_start.clone()));
+        dag.add_node(waymark_dag::DAGNode::Assignment(assign_node.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action_next.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action_start.id.clone(),
+            assign_node.id.clone(),
+        ));
+        dag.add_edge(DAGEdge::state_machine(
+            assign_node.id.clone(),
+            action_next.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let start_exec = state
+            .queue_template_node(&action_start.id, None)
+            .expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(start_exec.node_id, Value::Number(10.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+
+        let step = executor
+            .increment(&[start_exec.node_id])
+            .expect("increment");
+        assert_eq!(step.actions.len(), 1);
+        assert_eq!(
+            step.actions[0].template_id.as_deref(),
+            Some(action_next.id.as_str())
+        );
+    }
+
+    #[test]
+    fn test_rehydrate_after_first_action_queued() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "fetch",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action2 = action_node(
+            "action2",
+            "process",
+            HashMap::from([("value".to_string(), variable("x"))]),
+            vec!["y".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            action2.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        compare_executor_states(&executor, &rehydrated);
+        let node = rehydrated.state().nodes.get(&exec1.node_id).expect("node");
+        assert_eq!(node.status, NodeStatus::Queued);
+    }
+
+    #[test]
+    fn test_rehydrate_after_action_completed_and_increment() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "fetch",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action2 = action_node(
+            "action2",
+            "process",
+            HashMap::from([("value".to_string(), variable("x"))]),
+            vec!["y".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            action2.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(exec1.node_id, Value::Number(42.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+
+        let step = executor.increment(&[exec1.node_id]).expect("increment");
+        assert_eq!(step.actions.len(), 1);
+        let exec2 = &step.actions[0];
+        assert_eq!(exec2.template_id.as_deref(), Some(action2.id.as_str()));
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        let node1 = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
+        assert_eq!(node1.status, NodeStatus::Completed);
+        let node2 = rehydrated.state().nodes.get(&exec2.node_id).unwrap();
+        assert_eq!(node2.status, NodeStatus::Running);
+    }
+
+    #[test]
+    fn test_rehydrate_multi_step_chain() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "step1",
+            HashMap::new(),
+            vec!["a".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action2 = action_node(
+            "action2",
+            "step2",
+            HashMap::from([("input".to_string(), variable("a"))]),
+            vec!["b".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action3 = action_node(
+            "action3",
+            "step3",
+            HashMap::from([("input".to_string(), variable("b"))]),
+            vec!["c".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action3.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            action2.id.clone(),
+        ));
+        dag.add_edge(DAGEdge::state_machine(
+            action2.id.clone(),
+            action3.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+        let mut executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        executor.set_action_result(exec1.node_id, Value::Number(10.into()));
+        let step1 = executor.increment(&[exec1.node_id]).expect("increment");
+        let exec2 = step1.actions[0].clone();
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        executor.set_action_result(exec2.node_id, Value::Number(20.into()));
+        let step2 = executor.increment(&[exec2.node_id]).expect("increment");
+        let exec3 = step2.actions[0].clone();
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        executor.set_action_result(exec3.node_id, Value::Number(30.into()));
+        let step3 = executor.increment(&[exec3.node_id]).expect("increment");
+        assert!(step3.actions.is_empty());
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        for node in rehydrated.state().nodes.values() {
+            if node.is_action_call() {
+                assert_eq!(node.status, NodeStatus::Completed);
+            }
+        }
+    }
+
+    #[test]
+    fn test_rehydrate_with_assignment_node() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "fetch",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let assign = assignment_node(
+            "assign",
+            vec!["y".to_string()],
+            binary(
+                variable("x"),
+                ir::BinaryOperator::BinaryOpAdd,
+                literal_int(1),
+            ),
+        );
+        let action2 = action_node(
+            "action2",
+            "process",
+            HashMap::from([("value".to_string(), variable("y"))]),
+            vec!["z".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::Assignment(assign.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            assign.id.clone(),
+        ));
+        dag.add_edge(DAGEdge::state_machine(
+            assign.id.clone(),
+            action2.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(exec1.node_id, Value::Number(10.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+
+        let step = executor.increment(&[exec1.node_id]).expect("increment");
+        assert_eq!(step.actions.len(), 1);
+        assert_eq!(
+            step.actions[0].template_id.as_deref(),
+            Some(action2.id.as_str())
+        );
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        let assign_nodes: Vec<_> = rehydrated
+            .state()
+            .nodes
+            .values()
+            .filter(|node| node.template_id.as_deref() == Some(&assign.id))
+            .collect();
+        assert_eq!(assign_nodes.len(), 1);
+        assert_eq!(assign_nodes[0].status, NodeStatus::Completed);
+        assert!(assign_nodes[0].assignments.contains_key("y"));
+    }
+
+    #[test]
+    fn test_rehydrate_preserves_action_kwargs() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "compute",
+            HashMap::from([
+                ("a".to_string(), literal_int(5)),
+                (
+                    "b".to_string(),
+                    ir::Expr {
+                        kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                            value: Some(ir::literal::Value::StringValue("test".to_string())),
+                        })),
+                        span: None,
+                    },
+                ),
+            ]),
+            vec!["result".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        let orig_node = executor.state().nodes.get(&exec1.node_id).unwrap();
+        let rehy_node = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
+        assert!(orig_node.action.is_some());
+        assert!(rehy_node.action.is_some());
+        let orig_action = orig_node.action.as_ref().unwrap();
+        let rehy_action = rehy_node.action.as_ref().unwrap();
+        assert_eq!(orig_action.action_name, rehy_action.action_name);
+        let orig_keys: HashSet<_> = orig_action.kwargs.keys().cloned().collect();
+        let rehy_keys: HashSet<_> = rehy_action.kwargs.keys().cloned().collect();
+        assert_eq!(orig_keys, rehy_keys);
+    }
+
+    #[test]
+    fn test_rehydrate_increments_from_same_position() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "first",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action2 = action_node(
+            "action2",
+            "second",
+            HashMap::new(),
+            vec!["y".to_string()],
+            ActionNodeOptions::default(),
+        );
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            action2.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(exec1.node_id, Value::Number(100.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let mut rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        let orig_step = executor.increment(&[exec1.node_id]).expect("increment");
+        let rehy_step = rehydrated.increment(&[exec1.node_id]).expect("increment");
+        assert_eq!(orig_step.actions.len(), rehy_step.actions.len());
+        assert_eq!(
+            orig_step.actions[0].template_id,
+            rehy_step.actions[0].template_id
+        );
+    }
+
+    #[test]
+    fn test_rehydrate_resume_marks_running_as_retryable() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "work",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions {
+                policies: vec![ir::PolicyBracket {
+                    kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
+                        max_retries: 3,
+                        backoff: None,
+                        exception_types: vec!["ExecutorResume".to_string()],
+                    })),
+                }],
+                ..ActionNodeOptions::default()
+            },
+        );
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+        state.mark_running(exec1.node_id).expect("mark running");
+
+        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let mut rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        assert_eq!(
+            rehydrated.state().nodes.get(&exec1.node_id).unwrap().status,
+            NodeStatus::Running
+        );
+
+        let step = rehydrated.resume().expect("resume");
+        assert_eq!(step.actions.len(), 1);
+        assert_eq!(step.actions[0].node_id, exec1.node_id);
+        let node = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
+        assert_eq!(node.status, NodeStatus::Running);
+        assert_eq!(node.action_attempt, 2);
+        assert!(node.started_at.is_some());
+    }
+
+    #[test]
+    fn test_increment_records_failed_action_attempt() {
+        let mut dag = DAG::default();
+        let action = action_node(
+            "action1",
+            "work",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec = state.queue_template_node(&action.id, None).expect("queue");
+
+        let mut executor = RunnerExecutor::new(
+            dag,
+            state,
+            HashMap::new(),
+            Some(Arc::new(MemoryBackend::new())),
+        );
+        executor.set_instance_id(Uuid::new_v4());
+        executor.set_action_result(
+            exec.node_id,
+            serde_json::json!({"type": "ValueError", "message": "boom"}),
+        );
+
+        let step = executor.increment(&[exec.node_id]).expect("increment");
+        let updates = step.updates.expect("durable updates");
+        assert_eq!(updates.actions_done.len(), 1);
+        assert_eq!(updates.actions_done[0].execution_id, exec.node_id);
+        assert_eq!(updates.actions_done[0].attempt, 1);
+        assert_eq!(
+            updates.actions_done[0]
+                .result
+                .get("type")
+                .and_then(Value::as_str),
+            Some("ValueError")
+        );
+        assert_eq!(
+            executor
+                .state()
+                .nodes
+                .get(&exec.node_id)
+                .map(|n| n.status.clone()),
+            Some(NodeStatus::Failed)
+        );
+    }
+
+    #[test]
+    fn test_increment_records_failed_attempt_before_retry() {
+        let mut dag = DAG::default();
+        let action = action_node(
+            "action1",
+            "work",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions {
+                policies: vec![ir::PolicyBracket {
+                    kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
+                        max_retries: 2,
+                        backoff: None,
+                        exception_types: Vec::new(),
+                    })),
+                }],
+                ..ActionNodeOptions::default()
+            },
+        );
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec = state.queue_template_node(&action.id, None).expect("queue");
+
+        let mut executor = RunnerExecutor::new(
+            dag,
+            state,
+            HashMap::new(),
+            Some(Arc::new(MemoryBackend::new())),
+        );
+        executor.set_instance_id(Uuid::new_v4());
+        executor.set_action_result(
+            exec.node_id,
+            serde_json::json!({"type": "ValueError", "message": "retry me"}),
+        );
+
+        let first_step = executor
+            .increment(&[exec.node_id])
+            .expect("first increment");
+        assert_eq!(first_step.actions.len(), 1);
+        assert_eq!(first_step.actions[0].node_id, exec.node_id);
+        let first_updates = first_step.updates.expect("first durable updates");
+        assert_eq!(first_updates.actions_done.len(), 1);
+        assert_eq!(first_updates.actions_done[0].attempt, 1);
+        assert_eq!(
+            executor
+                .state()
+                .nodes
+                .get(&exec.node_id)
+                .map(|n| n.status.clone()),
+            Some(NodeStatus::Running)
+        );
+        assert_eq!(
+            executor
+                .state()
+                .nodes
+                .get(&exec.node_id)
+                .map(|n| n.action_attempt),
+            Some(2)
+        );
+
+        executor.set_action_result(exec.node_id, Value::String("ok".to_string()));
+        let second_step = executor
+            .increment(&[exec.node_id])
+            .expect("second increment");
+        let second_updates = second_step.updates.expect("second durable updates");
+        assert_eq!(second_updates.actions_done.len(), 1);
+        assert_eq!(second_updates.actions_done[0].attempt, 2);
+        assert_eq!(
+            executor
+                .state()
+                .nodes
+                .get(&exec.node_id)
+                .map(|n| n.status.clone()),
+            Some(NodeStatus::Completed)
+        );
+    }
+
+    #[test]
+    fn test_rehydrate_replay_variables_consistent() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "fetch",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let assign = assignment_node(
+            "assign",
+            vec!["doubled".to_string()],
+            binary(
+                variable("x"),
+                ir::BinaryOperator::BinaryOpMul,
+                literal_int(2),
+            ),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::Assignment(assign.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            assign.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(exec1.node_id, Value::Number(21.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+        executor.increment(&[exec1.node_id]).expect("increment");
+
+        let orig_replay =
+            crate::replay_variables(executor.state(), executor.action_results()).expect("replay");
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        let rehy_replay = crate::replay_variables(rehydrated.state(), rehydrated.action_results())
+            .expect("replay");
+        assert_eq!(orig_replay.variables, rehy_replay.variables);
+        assert_eq!(
+            rehy_replay.variables.get("doubled"),
+            Some(&Value::Number(42.into()))
+        );
+    }
+
+    #[test]
+    fn test_rehydrate_completion_equivalent_across_ir_scenarios() {
+        let (linear_dag, linear_executor) = setup_linear_assignment_checkpoint();
+        RehydrateBranchHarness::new(linear_dag, linear_executor, completion_action_result)
+            .run_and_assert();
+
+        let (sleep_dag, sleep_executor) = setup_sleep_resume_checkpoint();
+        RehydrateBranchHarness::new(sleep_dag, sleep_executor, completion_action_result)
+            .run_and_assert();
+
+        let (spread_dag, spread_executor) = setup_spread_checkpoint();
+        RehydrateBranchHarness::new(spread_dag, spread_executor, completion_action_result)
+            .run_and_assert();
+    }
+
+    #[test]
+    fn test_rehydrate_spread_action_with_aggregator() {
+        let mut dag = DAG::default();
+        let initial_action = action_node(
+            "initial",
+            "get_items",
+            HashMap::new(),
+            vec!["items".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let spread_action = action_node(
+            "spread_action",
+            "process_item",
+            HashMap::from([("item".to_string(), variable("item"))]),
+            vec!["item_result".to_string()],
+            ActionNodeOptions {
+                spread_loop_var: Some("item".to_string()),
+                spread_collection_expr: Some(variable("items")),
+                aggregates_to: Some("aggregator".to_string()),
+                ..ActionNodeOptions::default()
+            },
+        );
+        let aggregator =
+            aggregator_node("aggregator", "spread_action", vec!["results".to_string()]);
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(initial_action.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(spread_action.clone()));
+        dag.add_node(waymark_dag::DAGNode::Aggregator(aggregator.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            initial_action.id.clone(),
+            spread_action.id.clone(),
+        ));
+        dag.add_edge(DAGEdge::state_machine(
+            spread_action.id.clone(),
+            aggregator.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let initial_exec = state
+            .queue_template_node(&initial_action.id, None)
+            .expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(
+            initial_exec.node_id,
+            Value::Array(vec![1.into(), 2.into(), 3.into()]),
+        );
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+
+        let step1 = executor
+            .increment(&[initial_exec.node_id])
+            .expect("increment");
+        assert_eq!(step1.actions.len(), 3);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        compare_executor_states(&executor, &rehydrated);
+        let action_nodes: Vec<_> = executor
+            .state()
+            .nodes
+            .values()
+            .filter(|node| {
+                node.is_action_call() && node.template_id.as_deref() == Some(&spread_action.id)
+            })
+            .collect();
+        assert_eq!(action_nodes.len(), 3);
+        for action_node in action_nodes {
+            let rehy_node = rehydrated.state().nodes.get(&action_node.node_id).unwrap();
+            assert_eq!(rehy_node.node_type, action_node.node_type);
+            assert_eq!(rehy_node.status, action_node.status);
+        }
+    }
+
+    #[test]
+    fn test_rehydrate_full_spread_execution() {
+        let mut dag = DAG::default();
+        let initial_action = action_node(
+            "initial",
+            "get_items",
+            HashMap::new(),
+            vec!["items".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let spread_action = action_node(
+            "spread_action",
+            "double",
+            HashMap::from([("value".to_string(), variable("item"))]),
+            vec!["item_result".to_string()],
+            ActionNodeOptions {
+                spread_loop_var: Some("item".to_string()),
+                spread_collection_expr: Some(variable("items")),
+                aggregates_to: Some("aggregator".to_string()),
+                ..ActionNodeOptions::default()
+            },
+        );
+        let aggregator =
+            aggregator_node("aggregator", "spread_action", vec!["results".to_string()]);
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(initial_action.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(spread_action.clone()));
+        dag.add_node(waymark_dag::DAGNode::Aggregator(aggregator.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            initial_action.id.clone(),
+            spread_action.id.clone(),
+        ));
+        dag.add_edge(DAGEdge::state_machine(
+            spread_action.id.clone(),
+            aggregator.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let initial_exec = state
+            .queue_template_node(&initial_action.id, None)
+            .expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(
+            initial_exec.node_id,
+            Value::Array(vec![10.into(), 20.into()]),
+        );
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results.clone(), None);
+
+        let step1 = executor
+            .increment(&[initial_exec.node_id])
+            .expect("increment");
+        let spread_nodes = step1.actions;
+        assert_eq!(spread_nodes.len(), 2);
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        for (idx, node) in spread_nodes.iter().enumerate() {
+            executor.set_action_result(node.node_id, Value::Number(((idx + 1) * 100).into()));
+        }
+
+        let _step2 = executor
+            .increment(&spread_nodes.iter().map(|n| n.node_id).collect::<Vec<_>>())
+            .expect("increment");
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+        compare_executor_states(&executor, &rehydrated);
+
+        let agg_nodes: Vec<_> = rehydrated
+            .state()
+            .nodes
+            .values()
+            .filter(|node| node.template_id.as_deref() == Some(&aggregator.id))
+            .collect();
+        assert_eq!(agg_nodes.len(), 1);
+        assert_eq!(agg_nodes[0].status, NodeStatus::Completed);
+        assert!(agg_nodes[0].assignments.contains_key("results"));
+    }
+
+    #[test]
+    fn test_rehydrate_timeline_ordering_preserved() {
+        let mut dag = DAG::default();
+        let mut actions = Vec::new();
+        for i in 0..4 {
+            actions.push(action_node(
+                &format!("action{i}"),
+                &format!("step{i}"),
+                HashMap::new(),
+                vec![format!("x{i}")],
+                ActionNodeOptions::default(),
+            ));
+        }
+        for action in &actions {
+            dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
+        }
+        for i in 0..actions.len() - 1 {
+            dag.add_edge(DAGEdge::state_machine(
+                actions[i].id.clone(),
+                actions[i + 1].id.clone(),
+            ));
+        }
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let mut exec_nodes: Vec<ExecutionNode> = Vec::new();
+        exec_nodes.push(
+            state
+                .queue_template_node(&actions[0].id, None)
+                .expect("queue"),
+        );
+        let mut executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
+
+        for i in 0..3 {
+            executor.set_action_result(
+                exec_nodes.last().unwrap().node_id,
+                Value::Number((i * 10).into()),
+            );
+            let step = executor
+                .increment(&[exec_nodes.last().unwrap().node_id])
+                .expect("increment");
+            if !step.actions.is_empty() {
+                exec_nodes.push(step.actions[0].clone());
+            }
+        }
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        let orig_timeline = executor.state().timeline.clone();
+        let rehy_timeline = rehydrated.state().timeline.clone();
+        assert_eq!(orig_timeline.len(), rehy_timeline.len());
+        assert_eq!(
+            orig_timeline.iter().collect::<HashSet<_>>(),
+            rehy_timeline.iter().collect::<HashSet<_>>()
+        );
+    }
+
+    #[test]
+    fn test_rehydrate_ready_queue_rebuilt_for_running_actions() {
+        let mut dag = DAG::default();
+        let action1 = action_node(
+            "action1",
+            "first",
+            HashMap::new(),
+            vec!["x".to_string()],
+            ActionNodeOptions::default(),
+        );
+        let action2 = action_node(
+            "action2",
+            "second",
+            HashMap::new(),
+            vec!["y".to_string()],
+            ActionNodeOptions::default(),
+        );
+
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
+        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
+        dag.add_edge(DAGEdge::state_machine(
+            action1.id.clone(),
+            action2.id.clone(),
+        ));
+
+        let dag = Arc::new(dag);
+        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
+        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
+
+        let mut action_results = HashMap::new();
+        action_results.insert(exec1.node_id, Value::Number(50.into()));
+        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
+        let step = executor.increment(&[exec1.node_id]).expect("increment");
+        let exec2 = step.actions[0].clone();
+
+        let (nodes_snap, edges_snap, results_snap) =
+            snapshot_state(executor.state(), executor.action_results());
+        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
+
+        let queued_nodes: Vec<_> = rehydrated
+            .state()
+            .nodes
+            .values()
+            .filter(|node| node.status == NodeStatus::Queued)
+            .collect();
+        assert!(queued_nodes.is_empty());
+        let running_nodes: Vec<_> = rehydrated
+            .state()
+            .nodes
+            .values()
+            .filter(|node| node.status == NodeStatus::Running)
+            .collect();
+        assert_eq!(running_nodes.len(), 1);
+        assert_eq!(running_nodes[0].node_id, exec2.node_id);
+        assert!(
+            rehydrated.state().ready_queue.is_empty(),
+            "rehydration should not requeue running action nodes"
+        );
+    }
+}
diff --git a/crates/runner/src/expression_evaluator.rs b/crates/runner/src/expression_evaluator.rs
new file mode 100644
index 00000000..dac989a9
--- /dev/null
+++ b/crates/runner/src/expression_evaluator.rs
@@ -0,0 +1,1056 @@
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::rc::Rc;
+
+use serde_json::Value;
+use uuid::Uuid;
+
+use waymark_dag::{DAGEdge, EdgeType};
+use waymark_observability::obs;
+use waymark_proto::ast as ir;
+use waymark_runner_state::{
+    ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue,
+    FunctionCallValue, IndexValue, ListValue, LiteralValue, UnaryOpValue, VariableValue,
+    literal_value,
+    value_visitor::{ValueExpr, ValueExprEvaluator},
+};
+
+use super::{RunnerExecutor, RunnerExecutorError};
+
+impl RunnerExecutor {
+    /// Convert a pure IR expression into a ValueExpr without side effects.
+    pub(super) fn expr_to_value(expr: &ir::Expr) -> Result<ValueExpr, RunnerExecutorError> {
+        match expr.kind.as_ref() {
+            Some(ir::expr::Kind::Literal(lit)) => Ok(ValueExpr::Literal(LiteralValue {
+                value: literal_value(lit),
+            })),
+            Some(ir::expr::Kind::Variable(var)) => Ok(ValueExpr::Variable(VariableValue {
+                name: var.name.clone(),
+            })),
+            Some(ir::expr::Kind::BinaryOp(op)) => {
+                let left = op
+                    .left
+                    .as_ref()
+                    .ok_or_else(|| RunnerExecutorError("binary op missing left".to_string()))?;
+                let right = op
+                    .right
+                    .as_ref()
+                    .ok_or_else(|| RunnerExecutorError("binary op missing right".to_string()))?;
+                Ok(ValueExpr::BinaryOp(BinaryOpValue {
+                    left: Box::new(Self::expr_to_value(left)?),
+                    op: op.op,
+                    right: Box::new(Self::expr_to_value(right)?),
+                }))
+            }
+            Some(ir::expr::Kind::UnaryOp(op)) => {
+                let operand = op
+                    .operand
+                    .as_ref()
+                    .ok_or_else(|| RunnerExecutorError("unary op missing operand".to_string()))?;
+                Ok(ValueExpr::UnaryOp(UnaryOpValue {
+                    op: op.op,
+                    operand: Box::new(Self::expr_to_value(operand)?),
+                }))
+            }
+            Some(ir::expr::Kind::List(list)) => {
+                let mut elements = Vec::new();
+                for item in &list.elements {
+                    elements.push(Self::expr_to_value(item)?);
+                }
+                Ok(ValueExpr::List(ListValue { elements }))
+            }
+            Some(ir::expr::Kind::Dict(dict_expr)) => {
+                let mut entries = Vec::new();
+                for entry in &dict_expr.entries {
+                    let key = entry
+                        .key
+                        .as_ref()
+                        .ok_or_else(|| RunnerExecutorError("dict entry missing key".to_string()))?;
+                    let value = entry.value.as_ref().ok_or_else(|| {
+                        RunnerExecutorError("dict entry missing value".to_string())
+                    })?;
+                    entries.push(DictEntryValue {
+                        key: Self::expr_to_value(key)?,
+                        value: Self::expr_to_value(value)?,
+                    });
+                }
+                Ok(ValueExpr::Dict(DictValue { entries }))
+            }
+            Some(ir::expr::Kind::Index(index)) => {
+                let object = index.object.as_ref().ok_or_else(|| {
+                    RunnerExecutorError("index access missing object".to_string())
+                })?;
+                let index_expr = index
+                    .index
+                    .as_ref()
+                    .ok_or_else(|| RunnerExecutorError("index access missing index".to_string()))?;
+                Ok(ValueExpr::Index(IndexValue {
+                    object: Box::new(Self::expr_to_value(object)?),
+                    index: Box::new(Self::expr_to_value(index_expr)?),
+                }))
+            }
+            Some(ir::expr::Kind::Dot(dot)) => {
+                let object = dot
+                    .object
+                    .as_ref()
+                    .ok_or_else(|| RunnerExecutorError("dot access missing object".to_string()))?;
+                Ok(ValueExpr::Dot(DotValue {
+                    object: Box::new(Self::expr_to_value(object)?),
+                    attribute: dot.attribute.clone(),
+                }))
+            }
+            Some(ir::expr::Kind::FunctionCall(call)) => {
+                let mut args = Vec::new();
+                for arg in &call.args {
+                    args.push(Self::expr_to_value(arg)?);
+                }
+                let mut kwargs = HashMap::new();
+                for kw in &call.kwargs {
+                    if let Some(value) = &kw.value {
+                        kwargs.insert(kw.name.clone(), Self::expr_to_value(value)?);
+                    }
+                }
+                let global_fn = if call.global_function != 0 {
+                    Some(call.global_function)
+                } else {
+                    None
+                };
+                Ok(ValueExpr::FunctionCall(FunctionCallValue {
+                    name: call.name.clone(),
+                    args,
+                    kwargs,
+                    global_function: global_fn,
+                }))
+            }
+            Some(
+                ir::expr::Kind::ActionCall(_)
+                | ir::expr::Kind::ParallelExpr(_)
+                | ir::expr::Kind::SpreadExpr(_),
+            ) => Err(RunnerExecutorError(
+                "action/spread calls not allowed in guard expressions".to_string(),
+            )),
+            None => Ok(ValueExpr::Literal(LiteralValue { value: Value::Null })),
+        }
+    }
+
+    /// Evaluate a guard expression using current symbolic assignments.
+    pub(super) fn evaluate_guard(
+        &self,
+        expr: Option<&ir::Expr>,
+    ) -> Result<bool, RunnerExecutorError> {
+        let expr = match expr {
+            Some(expr) => expr,
+            None => return Ok(false),
+        };
+        let value_expr = self.state().materialize_value(Self::expr_to_value(expr)?);
+        let result = self.evaluate_value_expr(&value_expr)?;
+        Ok(is_truthy(&result))
+    }
+
+    /// Resolve an action's symbolic kwargs to concrete Python values.
+    ///
+    /// Example:
+    /// - spec.kwargs={"value": VariableValue("x")}
+    /// - with x assigned to LiteralValue(10), returns {"value": 10}.
+    #[obs]
+    pub fn resolve_action_kwargs(
+        &self,
+        node_id: Uuid,
+        action: &ActionCallSpec,
+    ) -> Result<HashMap<String, Value>, RunnerExecutorError> {
+        let mut resolved = HashMap::new();
+        for (name, expr) in &action.kwargs {
+            resolved.insert(
+                name.clone(),
+                self.evaluate_value_expr_for_node(expr, Some(node_id))?,
+            );
+        }
+        Ok(resolved)
+    }
+
+    /// Evaluate a ValueExpr into a concrete Python value.
+    #[obs]
+    pub(super) fn evaluate_value_expr(
+        &self,
+        expr: &ValueExpr,
+    ) -> Result<Value, RunnerExecutorError> {
+        self.evaluate_value_expr_for_node(expr, None)
+    }
+
+    fn evaluate_value_expr_for_node(
+        &self,
+        expr: &ValueExpr,
+        current_node_id: Option<Uuid>,
+    ) -> Result<Value, RunnerExecutorError> {
+        let stack = Rc::new(RefCell::new(HashSet::new()));
+        let resolve_variable = {
+            let stack = stack.clone();
+            let this = self;
+            move |name: &str| {
+                this.evaluate_variable_with_context(current_node_id, name, stack.clone())
+            }
+        };
+        let resolve_action_result = {
+            let this = self;
+            move |value: &ActionResultValue| this.resolve_action_result(value)
+        };
+        let resolve_function_call = {
+            let this = self;
+            move |value: &FunctionCallValue, args, kwargs| {
+                this.evaluate_function_call(value, args, kwargs)
+            }
+        };
+        let apply_binary = |op, left, right| Self::apply_binary(op, left, right);
+        let apply_unary = |op, operand| Self::apply_unary(op, operand);
+        let error_factory = |message: &str| RunnerExecutorError(message.to_string());
+        let evaluator = ValueExprEvaluator::new(
+            &resolve_variable,
+            &resolve_action_result,
+            &resolve_function_call,
+            &apply_binary,
+            &apply_unary,
+            &error_factory,
+        );
+        evaluator.visit(expr)
+    }
+
+    fn find_variable_source_node(&self, current_node_id: Uuid, name: &str) -> Option<Uuid> {
+        let timeline_index: HashMap<Uuid, usize> = self
+            .state()
+            .timeline
+            .iter()
+            .enumerate()
+            .map(|(idx, node_id)| (*node_id, idx))
+            .collect();
+
+        self.state()
+            .edges
+            .iter()
+            .filter(|edge| edge.edge_type == EdgeType::DataFlow && edge.target == current_node_id)
+            .map(|edge| edge.source)
+            .filter(|source| {
+                self.state()
+                    .nodes
+                    .get(source)
+                    .map(|node| node.assignments.contains_key(name))
+                    .unwrap_or(false)
+            })
+            .max_by_key(|source| timeline_index.get(source).copied().unwrap_or(0))
+    }
+
+    fn evaluate_variable_with_context(
+        &self,
+        current_node_id: Option<Uuid>,
+        name: &str,
+        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
+    ) -> Result<Value, RunnerExecutorError> {
+        let node_id = current_node_id
+            .and_then(|node_id| self.find_variable_source_node(node_id, name))
+            .or_else(|| self.state().latest_assignment(name))
+            .ok_or_else(|| RunnerExecutorError(format!("variable not found: {name}")))?;
+        self.evaluate_assignment(node_id, name, stack)
+    }
+
+    pub(super) fn evaluate_assignment(
+        &self,
+        node_id: Uuid,
+        target: &str,
+        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
+    ) -> Result<Value, RunnerExecutorError> {
+        let key = (node_id, target.to_string());
+        if let Some(value) = self.eval_cache_get(&key) {
+            return Ok(value);
+        }
+        if stack.borrow().contains(&key) {
+            return Err(RunnerExecutorError(format!(
+                "recursive assignment detected for {target}"
+            )));
+        }
+
+        let node = self
+            .state()
+            .nodes
+            .get(&node_id)
+            .ok_or_else(|| RunnerExecutorError(format!("missing assignment for {target}")))?;
+        let expr = node
+            .assignments
+            .get(target)
+            .ok_or_else(|| RunnerExecutorError(format!("missing assignment for {target}")))?;
+
+        stack.borrow_mut().insert(key.clone());
+        let resolve_variable = {
+            let stack = stack.clone();
+            let this = self;
+            move |name: &str| {
+                this.evaluate_variable_with_context(Some(node_id), name, stack.clone())
+            }
+        };
+        let resolve_action_result = {
+            let this = self;
+            move |value: &ActionResultValue| this.resolve_action_result(value)
+        };
+        let resolve_function_call = {
+            let this = self;
+            move |value: &FunctionCallValue, args, kwargs| {
+                this.evaluate_function_call(value, args, kwargs)
+            }
+        };
+        let apply_binary = |op, left, right| Self::apply_binary(op, left, right);
+        let apply_unary = |op, operand| Self::apply_unary(op, operand);
+        let error_factory = |message: &str| RunnerExecutorError(message.to_string());
+        let evaluator = ValueExprEvaluator::new(
+            &resolve_variable,
+            &resolve_action_result,
+            &resolve_function_call,
+            &apply_binary,
+            &apply_unary,
+            &error_factory,
+        );
+        let value = evaluator.visit(expr)?;
+        stack.borrow_mut().remove(&key);
+        self.eval_cache_insert(key, value.clone());
+        Ok(value)
+    }
+
+    pub(super) fn resolve_action_result(
+        &self,
+        expr: &ActionResultValue,
+    ) -> Result<Value, RunnerExecutorError> {
+        let value = self
+            .action_results()
+            .get(&expr.node_id)
+            .cloned()
+            .ok_or_else(|| {
+                RunnerExecutorError(format!("missing action result for {}", expr.node_id))
+            })?;
+        if let Some(idx) = expr.result_index {
+            if let Value::Array(items) = value {
+                let idx = idx as usize;
+                return items.get(idx).cloned().ok_or_else(|| {
+                    RunnerExecutorError(format!(
+                        "action result for {} has no index {}",
+                        expr.node_id, idx
+                    ))
+                });
+            }
+            return Err(RunnerExecutorError(format!(
+                "action result for {} has no index {}",
+                expr.node_id, idx
+            )));
+        }
+        Ok(value)
+    }
+
+    pub(super) fn evaluate_function_call(
+        &self,
+        expr: &FunctionCallValue,
+        args: Vec<Value>,
+        kwargs: HashMap<String, Value>,
+    ) -> Result<Value, RunnerExecutorError> {
+        if let Some(global_fn) = expr.global_function
+            && global_fn != ir::GlobalFunction::Unspecified as i32
+        {
+            return self.evaluate_global_function(global_fn, args, kwargs);
+        }
+        Err(RunnerExecutorError(format!(
+            "cannot evaluate non-global function call: {}",
+            expr.name
+        )))
+    }
+
+    pub(super) fn evaluate_global_function(
+        &self,
+        global_function: i32,
+        args: Vec<Value>,
+        kwargs: HashMap<String, Value>,
+    ) -> Result<Value, RunnerExecutorError> {
+        let error = executor_error;
+        match ir::GlobalFunction::try_from(global_function).ok() {
+            Some(ir::GlobalFunction::Range) => Ok(range_from_args(&args).into()),
+            Some(ir::GlobalFunction::Len) => {
+                if let Some(first) = args.first() {
+                    return Ok(Value::Number(len_of_value(first, error)?));
+                }
+                if let Some(items) = kwargs.get("items") {
+                    return Ok(Value::Number(len_of_value(items, error)?));
+                }
+                Err(RunnerExecutorError("len() missing argument".to_string()))
+            }
+            Some(ir::GlobalFunction::Enumerate) => {
+                let items = if let Some(first) = args.first() {
+                    first.clone()
+                } else if let Some(items) = kwargs.get("items") {
+                    items.clone()
+                } else {
+                    return Err(RunnerExecutorError(
+                        "enumerate() missing argument".to_string(),
+                    ));
+                };
+                let list = match items {
+                    Value::Array(items) => items,
+                    _ => return Err(RunnerExecutorError("enumerate() expects list".to_string())),
+                };
+                let pairs: Vec<Value> = list
+                    .into_iter()
+                    .enumerate()
+                    .map(|(idx, item)| Value::Array(vec![Value::Number((idx as i64).into()), item]))
+                    .collect();
+                Ok(Value::Array(pairs))
+            }
+            Some(ir::GlobalFunction::Isexception) => {
+                if let Some(first) = args.first() {
+                    return Ok(Value::Bool(is_exception_value(first)));
+                }
+                if let Some(value) = kwargs.get("value") {
+                    return Ok(Value::Bool(is_exception_value(value)));
+                }
+                Err(RunnerExecutorError(
+                    "isexception() missing argument".to_string(),
+                ))
+            }
+            Some(ir::GlobalFunction::Unspecified) | None => Err(RunnerExecutorError(
+                "global function unspecified".to_string(),
+            )),
+        }
+    }
+
+    pub(super) fn apply_binary(
+        op: i32,
+        left: Value,
+        right: Value,
+    ) -> Result<Value, RunnerExecutorError> {
+        let error = executor_error;
+        match ir::BinaryOperator::try_from(op).ok() {
+            Some(ir::BinaryOperator::BinaryOpOr) => {
+                if is_truthy(&left) {
+                    Ok(left)
+                } else {
+                    Ok(right)
+                }
+            }
+            Some(ir::BinaryOperator::BinaryOpAnd) => {
+                if is_truthy(&left) {
+                    Ok(right)
+                } else {
+                    Ok(left)
+                }
+            }
+            Some(ir::BinaryOperator::BinaryOpEq) => Ok(Value::Bool(left == right)),
+            Some(ir::BinaryOperator::BinaryOpNe) => Ok(Value::Bool(left != right)),
+            Some(ir::BinaryOperator::BinaryOpLt) => {
+                compare_values(left, right, |a, b| a < b, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpLe) => {
+                compare_values(left, right, |a, b| a <= b, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpGt) => {
+                compare_values(left, right, |a, b| a > b, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpGe) => {
+                compare_values(left, right, |a, b| a >= b, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpIn) => Ok(Value::Bool(value_in(&left, &right))),
+            Some(ir::BinaryOperator::BinaryOpNotIn) => Ok(Value::Bool(!value_in(&left, &right))),
+            Some(ir::BinaryOperator::BinaryOpAdd) => add_values(left, right, error),
+            Some(ir::BinaryOperator::BinaryOpSub) => {
+                numeric_op(left, right, |a, b| a - b, true, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpMul) => {
+                numeric_op(left, right, |a, b| a * b, true, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpDiv) => {
+                numeric_op(left, right, |a, b| a / b, false, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
+                numeric_op(left, right, |a, b| (a / b).floor(), true, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpMod) => {
+                numeric_op(left, right, |a, b| a % b, true, error)
+            }
+            Some(ir::BinaryOperator::BinaryOpUnspecified) | None => Err(RunnerExecutorError(
+                "binary operator unspecified".to_string(),
+            )),
+        }
+    }
+
+    pub(super) fn apply_unary(op: i32, operand: Value) -> Result<Value, RunnerExecutorError> {
+        match ir::UnaryOperator::try_from(op).ok() {
+            Some(ir::UnaryOperator::UnaryOpNeg) => {
+                if let Some(value) = int_value(&operand) {
+                    return Ok(Value::Number((-value).into()));
+                }
+                match operand.as_f64() {
+                    Some(value) => Ok(Value::Number(
+                        serde_json::Number::from_f64(-value)
+                            .unwrap_or_else(|| serde_json::Number::from(0)),
+                    )),
+                    None => Err(RunnerExecutorError("unary neg expects number".to_string())),
+                }
+            }
+            Some(ir::UnaryOperator::UnaryOpNot) => Ok(Value::Bool(!is_truthy(&operand))),
+            Some(ir::UnaryOperator::UnaryOpUnspecified) | None => Err(RunnerExecutorError(
+                "unary operator unspecified".to_string(),
+            )),
+        }
+    }
+
+    pub(super) fn exception_matches(&self, edge: &DAGEdge, exception_value: &Value) -> bool {
+        let exception_types = match &edge.exception_types {
+            Some(types) => types,
+            None => return false,
+        };
+        if exception_types.is_empty() {
+            return true;
+        }
+        let exc_name = match exception_value {
+            Value::Object(map) => map
+                .get("type")
+                .and_then(|value| value.as_str())
+                .map(|value| value.to_string()),
+            _ => None,
+        };
+        if let Some(name) = exc_name {
+            return exception_types.iter().any(|value| value == &name);
+        }
+        false
+    }
+}
+
+fn executor_error(message: &'static str) -> RunnerExecutorError {
+    RunnerExecutorError(message.to_string())
+}
+
+pub(crate) fn int_value(value: &Value) -> Option<i64> {
+    value
+        .as_i64()
+        .or_else(|| value.as_u64().and_then(|value| i64::try_from(value).ok()))
+}
+
+pub(crate) fn numeric_op<E>(
+    left: Value,
+    right: Value,
+    op: impl Fn(f64, f64) -> f64,
+    prefer_int: bool,
+    error: fn(&'static str) -> E,
+) -> Result<Value, E> {
+    let left_num = left
+        .as_f64()
+        .ok_or_else(|| error("numeric operation expects number"))?;
+    let right_num = right
+        .as_f64()
+        .ok_or_else(|| error("numeric operation expects number"))?;
+    let result = op(left_num, right_num);
+    if prefer_int && int_value(&left).is_some() && int_value(&right).is_some() && result.is_finite()
+    {
+        let rounded = result.round();
+        if (result - rounded).abs() < 1e-9
+            && rounded >= (i64::MIN as f64)
+            && rounded <= (i64::MAX as f64)
+        {
+            return Ok(Value::Number((rounded as i64).into()));
+        }
+    }
+    Ok(Value::Number(
+        serde_json::Number::from_f64(result).unwrap_or_else(|| serde_json::Number::from(0)),
+    ))
+}
+
+pub(crate) fn add_values<E>(
+    left: Value,
+    right: Value,
+    error: fn(&'static str) -> E,
+) -> Result<Value, E> {
+    if let (Value::Array(mut left), Value::Array(right)) = (left.clone(), right.clone()) {
+        left.extend(right);
+        return Ok(Value::Array(left));
+    }
+    if let (Some(left), Some(right)) = (left.as_str(), right.as_str()) {
+        return Ok(Value::String(format!("{left}{right}")));
+    }
+    numeric_op(left, right, |a, b| a + b, true, error)
+}
+
+pub(crate) fn compare_values<E>(
+    left: Value,
+    right: Value,
+    op: impl Fn(f64, f64) -> bool,
+    error: fn(&'static str) -> E,
+) -> Result<Value, E> {
+    let left = left
+        .as_f64()
+        .ok_or_else(|| error("comparison expects number"))?;
+    let right = right
+        .as_f64()
+        .ok_or_else(|| error("comparison expects number"))?;
+    Ok(Value::Bool(op(left, right)))
+}
+
+pub(crate) fn value_in(value: &Value, container: &Value) -> bool {
+    match container {
+        Value::Array(items) => items.iter().any(|item| item == value),
+        Value::Object(map) => value
+            .as_str()
+            .map(|key| map.contains_key(key))
+            .unwrap_or(false),
+        Value::String(text) => value
+            .as_str()
+            .map(|needle| text.contains(needle))
+            .unwrap_or(false),
+        _ => false,
+    }
+}
+
+pub(crate) fn is_truthy(value: &Value) -> bool {
+    match value {
+        Value::Null => false,
+        Value::Bool(value) => *value,
+        Value::Number(number) => number.as_f64().map(|value| value != 0.0).unwrap_or(false),
+        Value::String(value) => !value.is_empty(),
+        Value::Array(values) => !values.is_empty(),
+        Value::Object(map) => !map.is_empty(),
+    }
+}
+
+pub(crate) fn is_exception_value(value: &Value) -> bool {
+    if let Value::Object(map) = value {
+        return map.contains_key("type") && map.contains_key("message");
+    }
+    false
+}
+
+pub(crate) fn len_of_value<E>(
+    value: &Value,
+    error: fn(&'static str) -> E,
+) -> Result<serde_json::Number, E> {
+    let len = match value {
+        Value::Array(items) => items.len() as i64,
+        Value::String(text) => text.len() as i64,
+        Value::Object(map) => map.len() as i64,
+        _ => return Err(error("len() expects list, string, or dict")),
+    };
+    Ok(len.into())
+}
+
+pub(crate) fn range_from_args(args: &[Value]) -> Vec<Value> {
+    let mut start = 0i64;
+    let mut end = 0i64;
+    let mut step = 1i64;
+    if args.len() == 1 {
+        end = args[0].as_i64().unwrap_or(0);
+    } else if args.len() >= 2 {
+        start = args[0].as_i64().unwrap_or(0);
+        end = args[1].as_i64().unwrap_or(0);
+        if args.len() >= 3 {
+            step = args[2].as_i64().unwrap_or(1);
+        }
+    }
+    if step == 0 {
+        return Vec::new();
+    }
+    let mut values = Vec::new();
+    if step > 0 {
+        let mut current = start;
+        while current < end {
+            values.push(Value::Number(current.into()));
+            current += step;
+        }
+    } else {
+        let mut current = start;
+        while current > end {
+            values.push(Value::Number(current.into()));
+            current += step;
+        }
+    }
+    values
+}
+
+#[cfg(test)]
+mod tests {
+    use std::cell::RefCell;
+    use std::collections::{HashMap, HashSet};
+    use std::rc::Rc;
+    use std::sync::Arc;
+
+    use uuid::Uuid;
+
+    use super::*;
+    use waymark_dag::{DAG, DAGEdge};
+    use waymark_ir_parser::IRParser;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{
+        ActionCallSpec, ActionResultValue, BinaryOpValue, FunctionCallValue, LiteralValue,
+        RunnerState, VariableValue, value_visitor::ValueExpr,
+    };
+
+    fn parse_expr(source: &str) -> ir::Expr {
+        IRParser::new("    ")
+            .parse_expr(source)
+            .expect("parse expression")
+    }
+
+    fn literal_int(value: i64) -> ValueExpr {
+        ValueExpr::Literal(LiteralValue {
+            value: Value::Number(value.into()),
+        })
+    }
+
+    fn empty_executor() -> RunnerExecutor {
+        let dag = Arc::new(DAG::default());
+        let state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+        RunnerExecutor::new(dag, state, HashMap::new(), None)
+    }
+
+    fn executor_with_assignment(name: &str, value: ValueExpr) -> RunnerExecutor {
+        let dag = Arc::new(DAG::default());
+        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+        state
+            .record_assignment_value(
+                vec![name.to_string()],
+                value,
+                None,
+                Some("test assignment".to_string()),
+            )
+            .expect("record assignment");
+        RunnerExecutor::new(dag, state, HashMap::new(), None)
+    }
+
+    #[test]
+    fn test_expr_to_value_happy_path() {
+        let expr = parse_expr("x + 2");
+        let value = RunnerExecutor::expr_to_value(&expr).expect("convert expression");
+        match value {
+            ValueExpr::BinaryOp(binary) => {
+                assert!(matches!(*binary.left, ValueExpr::Variable(_)));
+                assert!(matches!(*binary.right, ValueExpr::Literal(_)));
+            }
+            other => panic!("expected binary op, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_evaluate_guard_happy_path() {
+        let executor = executor_with_assignment("x", literal_int(2));
+        let guard = parse_expr("x > 1");
+        let result = executor
+            .evaluate_guard(Some(&guard))
+            .expect("evaluate guard");
+        assert!(result);
+    }
+
+    #[test]
+    fn test_resolve_action_kwargs_happy_path() {
+        let executor = executor_with_assignment("x", literal_int(10));
+        let action = ActionCallSpec {
+            action_name: "double".to_string(),
+            module_name: Some("tests".to_string()),
+            kwargs: HashMap::from([(
+                "value".to_string(),
+                ValueExpr::Variable(VariableValue {
+                    name: "x".to_string(),
+                }),
+            )]),
+        };
+        let resolved = executor
+            .resolve_action_kwargs(Uuid::new_v4(), &action)
+            .expect("resolve kwargs");
+        assert_eq!(resolved.get("value"), Some(&Value::Number(10.into())));
+    }
+
+    #[test]
+    fn test_resolve_action_kwargs_uses_data_flow_for_self_referential_targets() {
+        let dag = Arc::new(DAG::default());
+        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+        state
+            .record_assignment_value(
+                vec!["current".to_string()],
+                literal_int(0),
+                None,
+                Some("current = 0".to_string()),
+            )
+            .expect("record current");
+        let action_result = state
+            .queue_action(
+                "increment",
+                Some(vec!["current".to_string()]),
+                Some(HashMap::from([(
+                    "value".to_string(),
+                    ValueExpr::Variable(VariableValue {
+                        name: "current".to_string(),
+                    }),
+                )])),
+                None,
+                None,
+            )
+            .expect("queue increment");
+        let action_node = state
+            .nodes
+            .get(&action_result.node_id)
+            .expect("action node")
+            .clone();
+        let action_spec = action_node.action.expect("action spec");
+
+        let executor = RunnerExecutor::new(dag, state, HashMap::new(), None);
+        let resolved = executor
+            .resolve_action_kwargs(action_result.node_id, &action_spec)
+            .expect("resolve kwargs");
+        assert_eq!(resolved.get("value"), Some(&Value::Number(0.into())));
+    }
+
+    #[test]
+    fn test_evaluate_value_expr_happy_path() {
+        let executor = executor_with_assignment("x", literal_int(3));
+        let expr = ValueExpr::BinaryOp(waymark_runner_state::BinaryOpValue {
+            left: Box::new(ValueExpr::Variable(VariableValue {
+                name: "x".to_string(),
+            })),
+            op: ir::BinaryOperator::BinaryOpAdd as i32,
+            right: Box::new(literal_int(1)),
+        });
+        let value = executor
+            .evaluate_value_expr(&expr)
+            .expect("evaluate value expression");
+        assert_eq!(value, Value::Number(4.into()));
+    }
+
+    #[test]
+    fn test_evaluate_variable_happy_path() {
+        let executor = executor_with_assignment("value", literal_int(5));
+        let stack = Rc::new(RefCell::new(HashSet::new()));
+        let value = executor
+            .evaluate_variable_with_context(None, "value", stack)
+            .expect("evaluate variable");
+        assert_eq!(value, Value::Number(5.into()));
+    }
+
+    #[test]
+    fn test_evaluate_assignment_happy_path() {
+        let executor = executor_with_assignment("value", literal_int(9));
+        let node_id = executor
+            .state()
+            .latest_assignment("value")
+            .expect("latest assignment");
+        let stack = Rc::new(RefCell::new(HashSet::new()));
+        let value = executor
+            .evaluate_assignment(node_id, "value", stack)
+            .expect("evaluate assignment");
+        assert_eq!(value, Value::Number(9.into()));
+    }
+
+    #[test]
+    fn test_evaluate_assignment_uses_data_flow_for_self_referential_updates() {
+        let dag = Arc::new(DAG::default());
+        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+        state
+            .record_assignment_value(
+                vec!["count".to_string()],
+                literal_int(0),
+                None,
+                Some("count = 0".to_string()),
+            )
+            .expect("record initial count");
+        state
+            .record_assignment_value(
+                vec!["count".to_string()],
+                ValueExpr::BinaryOp(BinaryOpValue {
+                    left: Box::new(ValueExpr::Variable(VariableValue {
+                        name: "count".to_string(),
+                    })),
+                    op: ir::BinaryOperator::BinaryOpAdd as i32,
+                    right: Box::new(literal_int(1)),
+                }),
+                None,
+                Some("count = count + 1".to_string()),
+            )
+            .expect("record updated count");
+
+        let executor = RunnerExecutor::new(dag, state, HashMap::new(), None);
+        let node_id = executor
+            .state()
+            .latest_assignment("count")
+            .expect("latest assignment");
+        let stack = Rc::new(RefCell::new(HashSet::new()));
+        let value = executor
+            .evaluate_assignment(node_id, "count", stack)
+            .expect("evaluate self-referential assignment");
+        assert_eq!(value, Value::Number(1.into()));
+    }
+
+    #[test]
+    fn test_resolve_action_result_happy_path() {
+        let mut executor = empty_executor();
+        let action_id = Uuid::new_v4();
+        executor.set_action_result(
+            action_id,
+            Value::Array(vec![Value::Number(7.into()), Value::Number(8.into())]),
+        );
+        let result = executor
+            .resolve_action_result(&ActionResultValue {
+                node_id: action_id,
+                action_name: "fetch".to_string(),
+                iteration_index: None,
+                result_index: Some(1),
+            })
+            .expect("resolve action result");
+        assert_eq!(result, Value::Number(8.into()));
+    }
+
+    #[test]
+    fn test_evaluate_function_call_happy_path() {
+        let executor = empty_executor();
+        let value = executor
+            .evaluate_function_call(
+                &FunctionCallValue {
+                    name: "len".to_string(),
+                    args: Vec::new(),
+                    kwargs: HashMap::new(),
+                    global_function: Some(ir::GlobalFunction::Len as i32),
+                },
+                vec![Value::Array(vec![Value::Null, Value::Null])],
+                HashMap::new(),
+            )
+            .expect("evaluate function call");
+        assert_eq!(value, Value::Number(2.into()));
+    }
+
+    #[test]
+    fn test_evaluate_global_function_happy_path() {
+        let executor = empty_executor();
+        let value = executor
+            .evaluate_global_function(
+                ir::GlobalFunction::Range as i32,
+                vec![Value::Number(1.into()), Value::Number(4.into())],
+                HashMap::new(),
+            )
+            .expect("evaluate global function");
+        assert_eq!(
+            value,
+            Value::Array(vec![
+                Value::Number(1.into()),
+                Value::Number(2.into()),
+                Value::Number(3.into())
+            ])
+        );
+    }
+
+    #[test]
+    fn test_apply_binary_happy_path() {
+        let value = RunnerExecutor::apply_binary(
+            ir::BinaryOperator::BinaryOpAdd as i32,
+            Value::Number(2.into()),
+            Value::Number(3.into()),
+        )
+        .expect("apply binary");
+        assert_eq!(value, Value::Number(5.into()));
+    }
+
+    #[test]
+    fn test_apply_unary_happy_path() {
+        let value =
+            RunnerExecutor::apply_unary(ir::UnaryOperator::UnaryOpNot as i32, Value::Bool(true))
+                .expect("apply unary");
+        assert_eq!(value, Value::Bool(false));
+    }
+
+    #[test]
+    fn test_exception_matches_happy_path() {
+        let executor = empty_executor();
+        let edge = DAGEdge::state_machine_with_exception("a", "b", vec!["ValueError".to_string()]);
+        let exception = serde_json::json!({
+            "type": "ValueError",
+            "message": "boom",
+        });
+        assert!(executor.exception_matches(&edge, &exception));
+    }
+
+    #[test]
+    fn test_executor_error_happy_path() {
+        let error = executor_error("hello");
+        assert_eq!(error.0, "hello");
+    }
+
+    #[test]
+    fn test_int_value_happy_path() {
+        let value = Value::Number(7_u64.into());
+        assert_eq!(int_value(&value), Some(7));
+    }
+
+    #[test]
+    fn test_numeric_op_happy_path() {
+        let value = numeric_op(
+            Value::Number(10.into()),
+            Value::Number(3.into()),
+            |a, b| a + b,
+            true,
+            executor_error,
+        )
+        .expect("numeric op");
+        assert_eq!(value, Value::Number(13.into()));
+    }
+
+    #[test]
+    fn test_add_values_happy_path() {
+        let value = add_values(
+            Value::String("hello ".to_string()),
+            Value::String("world".to_string()),
+            executor_error,
+        )
+        .expect("add values");
+        assert_eq!(value, Value::String("hello world".to_string()));
+    }
+
+    #[test]
+    fn test_compare_values_happy_path() {
+        let value = compare_values(
+            Value::Number(3.into()),
+            Value::Number(5.into()),
+            |a, b| a < b,
+            executor_error,
+        )
+        .expect("compare values");
+        assert_eq!(value, Value::Bool(true));
+    }
+
+    #[test]
+    fn test_value_in_happy_path() {
+        let container = Value::Array(vec![Value::Number(1.into()), Value::Number(2.into())]);
+        assert!(value_in(&Value::Number(2.into()), &container));
+    }
+
+    #[test]
+    fn test_is_truthy_happy_path() {
+        assert!(is_truthy(&Value::String("non-empty".to_string())));
+    }
+
+    #[test]
+    fn test_is_exception_value_happy_path() {
+        let value = serde_json::json!({
+            "type": "RuntimeError",
+            "message": "bad",
+        });
+        assert!(is_exception_value(&value));
+    }
+
+    #[test]
+    fn test_len_of_value_happy_path() {
+        let value = Value::Array(vec![Value::Null, Value::Null, Value::Null]);
+        let len = len_of_value(&value, executor_error).expect("length");
+        assert_eq!(len.as_i64(), Some(3));
+    }
+
+    #[test]
+    fn test_range_from_args_happy_path() {
+        let values = range_from_args(&[
+            Value::Number(0.into()),
+            Value::Number(5.into()),
+            Value::Number(2.into()),
+        ]);
+        assert_eq!(
+            values,
+            vec![
+                Value::Number(0.into()),
+                Value::Number(2.into()),
+                Value::Number(4.into())
+            ]
+        );
+    }
+}
diff --git a/crates/runner/src/lib.rs b/crates/runner/src/lib.rs
new file mode 100644
index 00000000..ed59081f
--- /dev/null
+++ b/crates/runner/src/lib.rs
@@ -0,0 +1,12 @@
+//! Runner utilities.
+
+pub mod executor;
+pub mod expression_evaluator;
+pub mod replay;
+pub(crate) mod retry;
+pub(crate) mod synthetic_exceptions;
+
+pub use executor::{
+    DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
+};
+pub use replay::{ReplayError, ReplayResult, replay_action_kwargs, replay_variables};
diff --git a/crates/runner/src/replay.rs b/crates/runner/src/replay.rs
new file mode 100644
index 00000000..ffb413a1
--- /dev/null
+++ b/crates/runner/src/replay.rs
@@ -0,0 +1,659 @@
+//! Replay variable values from a runner state snapshot.
+
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::rc::Rc;
+
+use serde_json::Value;
+use uuid::Uuid;
+
+use crate::expression_evaluator::{
+    add_values, compare_values, int_value, is_exception_value, is_truthy, len_of_value, numeric_op,
+    range_from_args, value_in,
+};
+use waymark_dag::{EXCEPTION_SCOPE_VAR, EdgeType};
+use waymark_proto::ast as ir;
+use waymark_runner_state::{
+    ActionResultValue, FunctionCallValue, RunnerState,
+    value_visitor::{ValueExpr, ValueExprEvaluator},
+};
+
+/// Raised when replay cannot reconstruct variable values.
+#[derive(Debug, thiserror::Error)]
+#[error("{0}")]
+pub struct ReplayError(pub String);
+
+#[derive(Clone, Debug)]
+pub struct ReplayResult {
+    pub variables: HashMap<String, Value>,
+}
+
+/// Replay variable values from a runner state snapshot.
+pub struct ReplayEngine<'a> {
+    state: &'a RunnerState,
+    action_results: &'a HashMap<Uuid, Value>,
+    cache: RefCell<HashMap<(Uuid, String), Value>>,
+    timeline: Vec<Uuid>,
+    index: HashMap<Uuid, usize>,
+    incoming_data: HashMap<Uuid, Vec<Uuid>>,
+}
+
+impl<'a> ReplayEngine<'a> {
+    /// Prepare replay state derived from a runner snapshot.
+    ///
+    /// We precompute a timeline index and incoming data-flow map so lookups are
+    /// O(1) during evaluation.
+    ///
+    /// Example:
+    /// - timeline = [node_a, node_b]
+    /// - index[node_b] == 1 and incoming data edges are pre-sorted.
+    pub fn new(state: &'a RunnerState, action_results: &'a HashMap<Uuid, Value>) -> Self {
+        let timeline = if state.timeline.is_empty() {
+            state.nodes.keys().cloned().collect()
+        } else {
+            state.timeline.clone()
+        };
+        let index = timeline
+            .iter()
+            .enumerate()
+            .map(|(idx, node_id)| (*node_id, idx))
+            .collect();
+        let incoming_data = build_incoming_data_map(state, &index);
+        Self {
+            state,
+            action_results,
+            cache: RefCell::new(HashMap::new()),
+            timeline,
+            index,
+            incoming_data,
+        }
+    }
+
+    /// Replay variable values by scanning assignments from newest to oldest.
+    ///
+    /// We walk the timeline in reverse to capture the latest assignment for each
+    /// variable and skip older definitions once a value is known. This mirrors
+    /// "last write wins" semantics while avoiding redundant evaluation work.
+    ///
+    /// Example:
+    /// - x = 1
+    /// - x = 2
+    ///   Reverse traversal yields x=2 without evaluating the older assignment.
+    pub fn replay_variables(&self) -> Result<ReplayResult, ReplayError> {
+        let mut variables: HashMap<String, Value> = HashMap::new();
+        for node_id in self.timeline.iter().rev() {
+            let node = match self.state.nodes.get(node_id) {
+                Some(node) => node,
+                None => continue,
+            };
+            if node.assignments.is_empty() {
+                continue;
+            }
+            for target in node.assignments.keys() {
+                if variables.contains_key(target) {
+                    continue;
+                }
+                let value = self.evaluate_assignment(
+                    *node_id,
+                    target,
+                    Rc::new(RefCell::new(HashSet::new())),
+                )?;
+                variables.insert(target.clone(), value);
+            }
+        }
+        Ok(ReplayResult { variables })
+    }
+
+    /// Replay concrete kwargs for an action execution node.
+    ///
+    /// This resolves symbolic kwargs from the action node in the context of
+    /// the node's incoming data-flow edges.
+    pub fn replay_action_kwargs(
+        &self,
+        node_id: Uuid,
+    ) -> Result<HashMap<String, Value>, ReplayError> {
+        let node = self
+            .state
+            .nodes
+            .get(&node_id)
+            .ok_or_else(|| ReplayError(format!("action node not found: {node_id}")))?;
+        let action = node
+            .action
+            .as_ref()
+            .ok_or_else(|| ReplayError(format!("node is not an action call: {node_id}")))?;
+        let mut resolved = HashMap::new();
+        for (name, expr) in &action.kwargs {
+            let value = self.evaluate_value_expr_at_node(node_id, expr)?;
+            resolved.insert(name.clone(), value);
+        }
+        Ok(resolved)
+    }
+
+    /// Evaluate a single assignment expression with cycle detection.
+    ///
+    /// We memoize evaluated (node, target) pairs and guard against recursive
+    /// references by tracking a stack of active evaluations.
+    ///
+    /// Example:
+    /// - x = y + 1
+    /// - y = 2
+    ///   Evaluating x resolves y first, then computes x.
+    fn evaluate_assignment(
+        &self,
+        node_id: Uuid,
+        target: &str,
+        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
+    ) -> Result<Value, ReplayError> {
+        let key = (node_id, target.to_string());
+        if let Some(value) = self.cache.borrow().get(&key) {
+            return Ok(value.clone());
+        }
+        if stack.borrow().contains(&key) {
+            return Err(ReplayError(format!(
+                "recursive assignment detected for {target} in {node_id}"
+            )));
+        }
+
+        let node =
+            self.state.nodes.get(&node_id).ok_or_else(|| {
+                ReplayError(format!("missing assignment for {target} in {node_id}"))
+            })?;
+        let expr = node
+            .assignments
+            .get(target)
+            .ok_or_else(|| ReplayError(format!("missing assignment for {target} in {node_id}")))?;
+
+        stack.borrow_mut().insert(key.clone());
+        let resolve_variable = {
+            let stack = stack.clone();
+            let this = self;
+            move |name: &str| this.resolve_variable(node_id, name, stack.clone())
+        };
+        let resolve_action_result = {
+            let this = self;
+            move |value: &ActionResultValue| this.resolve_action_result(value)
+        };
+        let resolve_function_call = {
+            let this = self;
+            move |value: &FunctionCallValue, args, kwargs| {
+                this.evaluate_function_call(value, args, kwargs)
+            }
+        };
+        let apply_binary = |op, left, right| apply_binary(op, left, right);
+        let apply_unary = |op, operand| apply_unary(op, operand);
+        let error_factory = |message: &str| ReplayError(message.to_string());
+        let evaluator = ValueExprEvaluator::new(
+            &resolve_variable,
+            &resolve_action_result,
+            &resolve_function_call,
+            &apply_binary,
+            &apply_unary,
+            &error_factory,
+        );
+        let value = evaluator.visit(expr)?;
+        stack.borrow_mut().remove(&key);
+        self.cache.borrow_mut().insert(key, value.clone());
+        Ok(value)
+    }
+
+    fn evaluate_value_expr_at_node(
+        &self,
+        node_id: Uuid,
+        expr: &ValueExpr,
+    ) -> Result<Value, ReplayError> {
+        let stack = Rc::new(RefCell::new(HashSet::new()));
+        let resolve_variable = {
+            let stack = stack.clone();
+            let this = self;
+            move |name: &str| this.resolve_variable(node_id, name, stack.clone())
+        };
+        let resolve_action_result = {
+            let this = self;
+            move |value: &ActionResultValue| this.resolve_action_result(value)
+        };
+        let resolve_function_call = {
+            let this = self;
+            move |value: &FunctionCallValue, args, kwargs| {
+                this.evaluate_function_call(value, args, kwargs)
+            }
+        };
+        let apply_binary = |op, left, right| apply_binary(op, left, right);
+        let apply_unary = |op, operand| apply_unary(op, operand);
+        let error_factory = |message: &str| ReplayError(message.to_string());
+        let evaluator = ValueExprEvaluator::new(
+            &resolve_variable,
+            &resolve_action_result,
+            &resolve_function_call,
+            &apply_binary,
+            &apply_unary,
+            &error_factory,
+        );
+        evaluator.visit(expr)
+    }
+
+    /// Resolve a variable reference via data-flow edges.
+    ///
+    /// This walks to the closest upstream definition and replays that
+    /// assignment for the requested variable.
+    ///
+    /// Example:
+    /// - action_1 defines x
+    /// - assign_2 uses x
+    ///   Resolving x from assign_2 evaluates action_1's assignment.
+    fn resolve_variable(
+        &self,
+        current_node_id: Uuid,
+        name: &str,
+        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
+    ) -> Result<Value, ReplayError> {
+        let mut source_node_id = self.find_variable_source_node(current_node_id, name);
+        if source_node_id.is_none() && name == EXCEPTION_SCOPE_VAR {
+            source_node_id = self.state.latest_assignment(name);
+        }
+        let source_node_id = source_node_id.ok_or_else(|| {
+            ReplayError(format!("variable not found via data-flow edges: {name}"))
+        })?;
+        self.evaluate_assignment(source_node_id, name, stack)
+    }
+
+    /// Find the nearest upstream node that defines the variable.
+    ///
+    /// We consult pre-sorted incoming data edges and ignore sources that are
+    /// later in the timeline than the current node.
+    ///
+    /// Example:
+    /// - if node_b comes after node_a, node_b cannot be a source for node_a.
+    fn find_variable_source_node(&self, current_node_id: Uuid, name: &str) -> Option<Uuid> {
+        let sources = self.incoming_data.get(&current_node_id)?;
+        let current_idx = self
+            .index
+            .get(&current_node_id)
+            .copied()
+            .unwrap_or(self.index.len());
+        for source_id in sources {
+            if self.index.get(source_id).copied().unwrap_or(0) > current_idx {
+                continue;
+            }
+            if let Some(node) = self.state.nodes.get(source_id)
+                && node.assignments.contains_key(name)
+            {
+                return Some(*source_id);
+            }
+        }
+        None
+    }
+
+    /// Fetch an action result by node id, handling indexed results.
+    ///
+    /// Example:
+    /// - result = @fetch()
+    /// - result[0]
+    ///   The evaluator looks up the action result and returns index 0.
+    fn resolve_action_result(&self, expr: &ActionResultValue) -> Result<Value, ReplayError> {
+        let value = self
+            .action_results
+            .get(&expr.node_id)
+            .cloned()
+            .ok_or_else(|| ReplayError(format!("missing action result for {}", expr.node_id)))?;
+        if let Some(idx) = expr.result_index {
+            if let Value::Array(items) = value {
+                let idx = idx as usize;
+                return items.get(idx).cloned().ok_or_else(|| {
+                    ReplayError(format!(
+                        "action result for {} has no index {}",
+                        expr.node_id, idx
+                    ))
+                });
+            }
+            return Err(ReplayError(format!(
+                "action result for {} has no index {}",
+                expr.node_id, idx
+            )));
+        }
+        Ok(value)
+    }
+
+    /// Evaluate a function call during replay.
+    ///
+    /// Only global functions are supported because user-defined functions are
+    /// not available in this replay context.
+    ///
+    /// Example:
+    /// - len(items=[1, 2]) -> 2
+    fn evaluate_function_call(
+        &self,
+        expr: &FunctionCallValue,
+        args: Vec<Value>,
+        kwargs: HashMap<String, Value>,
+    ) -> Result<Value, ReplayError> {
+        if let Some(global_fn) = expr.global_function
+            && global_fn != ir::GlobalFunction::Unspecified as i32
+        {
+            return evaluate_global_function(global_fn, args, kwargs);
+        }
+        Err(ReplayError(format!(
+            "cannot replay non-global function call: {}",
+            expr.name
+        )))
+    }
+}
+
+fn replay_error(message: &'static str) -> ReplayError {
+    ReplayError(message.to_string())
+}
+
+/// Apply a binary operator to replayed operands.
+///
+/// Example:
+/// - left=1, right=2, op=ADD -> 3
+fn apply_binary(op: i32, left: Value, right: Value) -> Result<Value, ReplayError> {
+    let error = replay_error;
+    match ir::BinaryOperator::try_from(op).ok() {
+        Some(ir::BinaryOperator::BinaryOpOr) => {
+            if is_truthy(&left) {
+                Ok(left)
+            } else {
+                Ok(right)
+            }
+        }
+        Some(ir::BinaryOperator::BinaryOpAnd) => {
+            if is_truthy(&left) {
+                Ok(right)
+            } else {
+                Ok(left)
+            }
+        }
+        Some(ir::BinaryOperator::BinaryOpEq) => Ok(Value::Bool(left == right)),
+        Some(ir::BinaryOperator::BinaryOpNe) => Ok(Value::Bool(left != right)),
+        Some(ir::BinaryOperator::BinaryOpLt) => compare_values(left, right, |a, b| a < b, error),
+        Some(ir::BinaryOperator::BinaryOpLe) => compare_values(left, right, |a, b| a <= b, error),
+        Some(ir::BinaryOperator::BinaryOpGt) => compare_values(left, right, |a, b| a > b, error),
+        Some(ir::BinaryOperator::BinaryOpGe) => compare_values(left, right, |a, b| a >= b, error),
+        Some(ir::BinaryOperator::BinaryOpIn) => Ok(Value::Bool(value_in(&left, &right))),
+        Some(ir::BinaryOperator::BinaryOpNotIn) => Ok(Value::Bool(!value_in(&left, &right))),
+        Some(ir::BinaryOperator::BinaryOpAdd) => add_values(left, right, error),
+        Some(ir::BinaryOperator::BinaryOpSub) => numeric_op(left, right, |a, b| a - b, true, error),
+        Some(ir::BinaryOperator::BinaryOpMul) => numeric_op(left, right, |a, b| a * b, true, error),
+        Some(ir::BinaryOperator::BinaryOpDiv) => {
+            numeric_op(left, right, |a, b| a / b, false, error)
+        }
+        Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
+            numeric_op(left, right, |a, b| (a / b).floor(), true, error)
+        }
+        Some(ir::BinaryOperator::BinaryOpMod) => numeric_op(left, right, |a, b| a % b, true, error),
+        Some(ir::BinaryOperator::BinaryOpUnspecified) | None => {
+            Err(ReplayError("binary operator unspecified".to_string()))
+        }
+    }
+}
+
+/// Apply a unary operator to a replayed operand.
+///
+/// Example:
+/// - op=NOT, operand=True -> False
+fn apply_unary(op: i32, operand: Value) -> Result<Value, ReplayError> {
+    match ir::UnaryOperator::try_from(op).ok() {
+        Some(ir::UnaryOperator::UnaryOpNeg) => {
+            if let Some(value) = int_value(&operand) {
+                return Ok(Value::Number((-value).into()));
+            }
+            match operand.as_f64() {
+                Some(value) => Ok(Value::Number(
+                    serde_json::Number::from_f64(-value)
+                        .unwrap_or_else(|| serde_json::Number::from(0)),
+                )),
+                None => Err(ReplayError("unary neg expects number".to_string())),
+            }
+        }
+        Some(ir::UnaryOperator::UnaryOpNot) => Ok(Value::Bool(!is_truthy(&operand))),
+        Some(ir::UnaryOperator::UnaryOpUnspecified) | None => {
+            Err(ReplayError("unary operator unspecified".to_string()))
+        }
+    }
+}
+
+/// Evaluate supported global helper functions.
+///
+/// Example:
+/// - range(0, 3) -> [0, 1, 2]
+/// - isexception(value={"type": "...", "message": "..."}) -> True
+fn evaluate_global_function(
+    global_function: i32,
+    args: Vec<Value>,
+    kwargs: HashMap<String, Value>,
+) -> Result<Value, ReplayError> {
+    match ir::GlobalFunction::try_from(global_function).ok() {
+        Some(ir::GlobalFunction::Range) => Ok(range_from_args(&args).into()),
+        Some(ir::GlobalFunction::Len) => {
+            if let Some(first) = args.first() {
+                return Ok(Value::Number(len_of_value(first, replay_error)?));
+            }
+            if let Some(items) = kwargs.get("items") {
+                return Ok(Value::Number(len_of_value(items, replay_error)?));
+            }
+            Err(ReplayError("len() missing argument".to_string()))
+        }
+        Some(ir::GlobalFunction::Enumerate) => {
+            let items = if let Some(first) = args.first() {
+                first.clone()
+            } else if let Some(items) = kwargs.get("items") {
+                items.clone()
+            } else {
+                return Err(ReplayError("enumerate() missing argument".to_string()));
+            };
+            let list = match items {
+                Value::Array(items) => items,
+                _ => return Err(ReplayError("enumerate() expects list".to_string())),
+            };
+            let pairs: Vec<Value> = list
+                .into_iter()
+                .enumerate()
+                .map(|(idx, item)| Value::Array(vec![Value::Number((idx as i64).into()), item]))
+                .collect();
+            Ok(Value::Array(pairs))
+        }
+        Some(ir::GlobalFunction::Isexception) => {
+            if let Some(first) = args.first() {
+                return Ok(Value::Bool(is_exception_value(first)));
+            }
+            if let Some(value) = kwargs.get("value") {
+                return Ok(Value::Bool(is_exception_value(value)));
+            }
+            Err(ReplayError("isexception() missing argument".to_string()))
+        }
+        Some(ir::GlobalFunction::Unspecified) | None => {
+            Err(ReplayError("global function unspecified".to_string()))
+        }
+    }
+}
+
+/// Build a reverse index of incoming data-flow edges.
+///
+/// Sources are sorted from most-recent to oldest by timeline index so
+/// lookups can short-circuit on the first viable definition.
+fn build_incoming_data_map(
+    state: &RunnerState,
+    index: &HashMap<Uuid, usize>,
+) -> HashMap<Uuid, Vec<Uuid>> {
+    let mut incoming: HashMap<Uuid, Vec<Uuid>> = HashMap::new();
+    for edge in &state.edges {
+        if edge.edge_type != EdgeType::DataFlow {
+            continue;
+        }
+        incoming.entry(edge.target).or_default().push(edge.source);
+    }
+    for (_target, sources) in incoming.iter_mut() {
+        sources.sort_by_key(|node_id| {
+            (
+                index.get(node_id).copied().unwrap_or(0),
+                node_id.to_string(),
+            )
+        });
+        sources.reverse();
+    }
+    incoming
+}
+
+/// Replay variable values from a runner state snapshot.
+///
+/// This is a convenience wrapper around ReplayEngine that prefers the latest
+/// assignment for each variable and returns a fully materialized mapping.
+pub fn replay_variables(
+    state: &RunnerState,
+    action_results: &HashMap<Uuid, Value>,
+) -> Result<ReplayResult, ReplayError> {
+    ReplayEngine::new(state, action_results).replay_variables()
+}
+
+/// Replay concrete kwargs for a specific action node from a state snapshot.
+pub fn replay_action_kwargs(
+    state: &RunnerState,
+    action_results: &HashMap<Uuid, Value>,
+    node_id: Uuid,
+) -> Result<HashMap<String, Value>, ReplayError> {
+    ReplayEngine::new(state, action_results).replay_action_kwargs(node_id)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{RunnerState, VariableValue, value_visitor::ValueExpr};
+
+    fn action_plus_two_expr() -> ir::Expr {
+        ir::Expr {
+            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
+                left: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
+                        name: "action_result".to_string(),
+                    })),
+                    span: None,
+                })),
+                op: ir::BinaryOperator::BinaryOpAdd as i32,
+                right: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                        value: Some(ir::literal::Value::IntValue(2)),
+                    })),
+                    span: None,
+                })),
+            }))),
+            span: None,
+        }
+    }
+
+    #[test]
+    fn test_replay_variables_resolves_action_results() {
+        let mut state = RunnerState::new(None, None, None, true);
+
+        let action0 = state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                Some(0),
+            )
+            .expect("queue action");
+        let first_list = ir::Expr {
+            kind: Some(ir::expr::Kind::List(ir::ListExpr {
+                elements: vec![action_plus_two_expr()],
+            })),
+            span: None,
+        };
+        state
+            .record_assignment(vec!["results".to_string()], &first_list, None, None)
+            .expect("record assignment");
+
+        let action1 = state
+            .queue_action(
+                "action",
+                Some(vec!["action_result".to_string()]),
+                None,
+                None,
+                Some(1),
+            )
+            .expect("queue action");
+        let second_list = ir::Expr {
+            kind: Some(ir::expr::Kind::List(ir::ListExpr {
+                elements: vec![action_plus_two_expr()],
+            })),
+            span: None,
+        };
+        let concat_expr = ir::Expr {
+            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
+                left: Some(Box::new(ir::Expr {
+                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
+                        name: "results".to_string(),
+                    })),
+                    span: None,
+                })),
+                op: ir::BinaryOperator::BinaryOpAdd as i32,
+                right: Some(Box::new(second_list)),
+            }))),
+            span: None,
+        };
+        state
+            .record_assignment(vec!["results".to_string()], &concat_expr, None, None)
+            .expect("record assignment");
+
+        let replayed = replay_variables(
+            &state,
+            &HashMap::from([
+                (action0.node_id, Value::Number(1.into())),
+                (action1.node_id, Value::Number(2.into())),
+            ]),
+        )
+        .expect("replay");
+
+        assert_eq!(
+            replayed.variables.get("results"),
+            Some(&Value::Array(vec![3.into(), 4.into()])),
+        );
+    }
+
+    #[test]
+    fn test_replay_action_kwargs_resolves_variable_inputs() {
+        let mut state = RunnerState::new(None, None, None, true);
+
+        let number_expr = ir::Expr {
+            kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                value: Some(ir::literal::Value::IntValue(7)),
+            })),
+            span: None,
+        };
+        state
+            .record_assignment(
+                vec!["number".to_string()],
+                &number_expr,
+                None,
+                Some("number = 7".to_string()),
+            )
+            .expect("record assignment");
+
+        let kwargs = HashMap::from([(
+            "value".to_string(),
+            ValueExpr::Variable(VariableValue {
+                name: "number".to_string(),
+            }),
+        )]);
+
+        let action = state
+            .queue_action(
+                "compute",
+                Some(vec!["result".to_string()]),
+                Some(kwargs),
+                Some("tests".to_string()),
+                None,
+            )
+            .expect("queue action");
+
+        let kwargs = replay_action_kwargs(
+            &state,
+            &HashMap::from([(action.node_id, Value::Number(14.into()))]),
+            action.node_id,
+        )
+        .expect("replay kwargs");
+
+        assert_eq!(kwargs.get("value"), Some(&Value::Number(7.into())));
+    }
+}
diff --git a/crates/runner/src/retry.rs b/crates/runner/src/retry.rs
new file mode 100644
index 00000000..a24f7a2d
--- /dev/null
+++ b/crates/runner/src/retry.rs
@@ -0,0 +1,137 @@
+//! Retry/timeout policy helpers shared by runner components.
+
+use waymark_proto::ast as ir;
+
+#[derive(Clone, Debug)]
+pub(crate) struct RetryDecision {
+    pub(crate) should_retry: bool,
+}
+
+pub(crate) struct RetryPolicyEvaluator<'a> {
+    policies: &'a [ir::PolicyBracket],
+    exception_name: Option<&'a str>,
+}
+
+fn is_synthetic_runtime_exception(exception_name: Option<&str>) -> bool {
+    matches!(exception_name, Some("ExecutorResume" | "ActionTimeout"))
+}
+
+impl<'a> RetryPolicyEvaluator<'a> {
+    pub(crate) fn new(policies: &'a [ir::PolicyBracket], exception_name: Option<&'a str>) -> Self {
+        Self {
+            policies,
+            exception_name,
+        }
+    }
+
+    pub(crate) fn decision(&self, attempt: i32) -> RetryDecision {
+        let mut max_retries: i32 = 0;
+        let mut matched_policy = false;
+
+        for policy in self.policies {
+            let Some(ir::policy_bracket::Kind::Retry(retry)) = policy.kind.as_ref() else {
+                continue;
+            };
+            let matches_exception = if retry.exception_types.is_empty() {
+                // Synthetic runtime exceptions (resume/timeout) can represent in-flight
+                // work that may still be running out-of-band. Require explicit opt-in
+                // exception filters before retrying these cases.
+                !is_synthetic_runtime_exception(self.exception_name)
+            } else if let Some(name) = self.exception_name {
+                retry.exception_types.iter().any(|value| value == name)
+            } else {
+                false
+            };
+            if !matches_exception {
+                continue;
+            }
+            matched_policy = true;
+            max_retries = max_retries.max(retry.max_retries as i32);
+        }
+
+        let should_retry = matched_policy && attempt - 1 < max_retries;
+
+        RetryDecision { should_retry }
+    }
+}
+
+pub(crate) fn timeout_seconds_from_policies(policies: &[ir::PolicyBracket]) -> Option<u32> {
+    let mut timeout_seconds: Option<u64> = None;
+    for policy in policies {
+        let Some(ir::policy_bracket::Kind::Timeout(timeout)) = policy.kind.as_ref() else {
+            continue;
+        };
+        let seconds = timeout
+            .timeout
+            .as_ref()
+            .map(|duration| duration.seconds)
+            .unwrap_or(0);
+        if seconds == 0 {
+            continue;
+        }
+        timeout_seconds = Some(match timeout_seconds {
+            Some(existing) => existing.min(seconds),
+            None => seconds,
+        });
+    }
+    timeout_seconds.map(|seconds| seconds.min(u64::from(u32::MAX)) as u32)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn retry_policy(max_retries: u32, exception_types: Vec<&str>) -> ir::PolicyBracket {
+        ir::PolicyBracket {
+            kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
+                exception_types: exception_types
+                    .into_iter()
+                    .map(ToString::to_string)
+                    .collect(),
+                max_retries,
+                backoff: None,
+            })),
+        }
+    }
+
+    fn timeout_policy(seconds: u64) -> ir::PolicyBracket {
+        ir::PolicyBracket {
+            kind: Some(ir::policy_bracket::Kind::Timeout(ir::TimeoutPolicy {
+                timeout: Some(ir::Duration { seconds }),
+            })),
+        }
+    }
+
+    #[test]
+    fn retry_policy_evaluator_happy_path() {
+        let policies = vec![
+            retry_policy(1, vec!["ValueError"]),
+            retry_policy(3, Vec::new()),
+        ];
+        let decision = RetryPolicyEvaluator::new(&policies, Some("ValueError")).decision(2);
+        assert!(decision.should_retry);
+
+        let exhausted = RetryPolicyEvaluator::new(&policies, Some("ValueError")).decision(4);
+        assert!(!exhausted.should_retry);
+    }
+
+    #[test]
+    fn retry_policy_evaluator_wildcard_does_not_retry_synthetic_timeout() {
+        let policies = vec![retry_policy(3, Vec::new())];
+        let decision = RetryPolicyEvaluator::new(&policies, Some("ActionTimeout")).decision(1);
+        assert!(!decision.should_retry);
+    }
+
+    #[test]
+    fn retry_policy_evaluator_explicit_timeout_retry_happy_path() {
+        let policies = vec![retry_policy(2, vec!["ActionTimeout"])];
+        let decision = RetryPolicyEvaluator::new(&policies, Some("ActionTimeout")).decision(1);
+        assert!(decision.should_retry);
+    }
+
+    #[test]
+    fn timeout_seconds_from_policies_happy_path() {
+        let policies = vec![timeout_policy(30), timeout_policy(10), timeout_policy(0)];
+        assert_eq!(timeout_seconds_from_policies(&policies), Some(10));
+    }
+}
diff --git a/crates/runner/src/synthetic_exceptions.rs b/crates/runner/src/synthetic_exceptions.rs
new file mode 100644
index 00000000..df89b71f
--- /dev/null
+++ b/crates/runner/src/synthetic_exceptions.rs
@@ -0,0 +1,90 @@
+//! Synthetic exception helpers produced by Rust runtime coordination paths.
+
+use serde_json::Value;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum SyntheticExceptionType {
+    ExecutorResume,
+    ActionTimeout,
+}
+
+impl SyntheticExceptionType {
+    pub(crate) fn as_type_str(self) -> &'static str {
+        match self {
+            Self::ExecutorResume => "ExecutorResume",
+            Self::ActionTimeout => "ActionTimeout",
+        }
+    }
+
+    fn from_type_str(value: &str) -> Option<Self> {
+        match value {
+            "ExecutorResume" => Some(Self::ExecutorResume),
+            "ActionTimeout" => Some(Self::ActionTimeout),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn from_value(value: &Value) -> Option<Self> {
+        let Value::Object(map) = value else {
+            return None;
+        };
+        map.get("type")
+            .and_then(Value::as_str)
+            .and_then(Self::from_type_str)
+    }
+}
+
+pub(crate) fn build_synthetic_exception_value(
+    exception_type: SyntheticExceptionType,
+    message: impl Into<String>,
+    fields: Vec<(String, Value)>,
+) -> Value {
+    let mut map = serde_json::Map::new();
+    map.insert(
+        "type".to_string(),
+        Value::String(exception_type.as_type_str().to_string()),
+    );
+    map.insert("message".to_string(), Value::String(message.into()));
+    for (key, value) in fields {
+        map.insert(key, value);
+    }
+    Value::Object(map)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn synthetic_exception_from_value_happy_path() {
+        let value = serde_json::json!({"type": "ActionTimeout", "message": "x"});
+        assert_eq!(
+            SyntheticExceptionType::from_value(&value),
+            Some(SyntheticExceptionType::ActionTimeout)
+        );
+    }
+
+    #[test]
+    fn build_synthetic_exception_value_happy_path() {
+        let value = build_synthetic_exception_value(
+            SyntheticExceptionType::ExecutorResume,
+            "resume",
+            vec![(
+                "attempt".to_string(),
+                Value::Number(serde_json::Number::from(2)),
+            )],
+        );
+        let Value::Object(map) = value else {
+            panic!("expected object value");
+        };
+        assert_eq!(
+            map.get("type"),
+            Some(&Value::String("ExecutorResume".to_string()))
+        );
+        assert_eq!(
+            map.get("message"),
+            Some(&Value::String("resume".to_string()))
+        );
+        assert_eq!(map.get("attempt"), Some(&Value::Number(2.into())));
+    }
+}
diff --git a/crates/scheduler-backend/Cargo.toml b/crates/scheduler-backend/Cargo.toml
new file mode 100644
index 00000000..6af1c2bb
--- /dev/null
+++ b/crates/scheduler-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-scheduler-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-scheduler-core = { workspace = true }
diff --git a/crates/scheduler-backend/src/lib.rs b/crates/scheduler-backend/src/lib.rs
new file mode 100644
index 00000000..613cc4ac
--- /dev/null
+++ b/crates/scheduler-backend/src/lib.rs
@@ -0,0 +1,29 @@
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, WorkflowSchedule};
+
+/// Backend capability for workflow schedule persistence.
+#[async_trait::async_trait]
+pub trait SchedulerBackend: Send + Sync {
+    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId>;
+    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule>;
+    async fn get_schedule_by_name(
+        &self,
+        workflow_name: &str,
+        schedule_name: &str,
+    ) -> BackendResult<Option<WorkflowSchedule>>;
+    async fn list_schedules(&self, limit: i64, offset: i64)
+    -> BackendResult<Vec<WorkflowSchedule>>;
+    async fn count_schedules(&self) -> BackendResult<i64>;
+    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool>;
+    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool>;
+    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>>;
+    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool>;
+    async fn mark_schedule_executed(
+        &self,
+        schedule_id: ScheduleId,
+        instance_id: Uuid,
+    ) -> BackendResult<()>;
+    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>;
+}
diff --git a/crates/scheduler-core/Cargo.toml b/crates/scheduler-core/Cargo.toml
new file mode 100644
index 00000000..9659e878
--- /dev/null
+++ b/crates/scheduler-core/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "waymark-scheduler-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+uuid = { workspace = true, features = ["serde", "v4"] }
+chrono = { workspace = true, features = ["serde"] }
+serde = { workspace = true, features = ["derive"] }
+cron = { workspace = true }
+rand = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true, features = ["clock"] }
diff --git a/crates/scheduler-core/src/lib.rs b/crates/scheduler-core/src/lib.rs
new file mode 100644
index 00000000..02d2783b
--- /dev/null
+++ b/crates/scheduler-core/src/lib.rs
@@ -0,0 +1,6 @@
+mod types;
+mod utils;
+
+pub use self::types::*;
+
+pub use self::utils::*;
diff --git a/crates/scheduler-core/src/types.rs b/crates/scheduler-core/src/types.rs
new file mode 100644
index 00000000..4f8c9104
--- /dev/null
+++ b/crates/scheduler-core/src/types.rs
@@ -0,0 +1,139 @@
+//! Schedule types.
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+/// Unique identifier for a schedule.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct ScheduleId(pub Uuid);
+
+impl ScheduleId {
+    pub fn new() -> Self {
+        Self(Uuid::new_v4())
+    }
+}
+
+impl Default for ScheduleId {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl std::fmt::Display for ScheduleId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+/// Type of schedule.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum ScheduleType {
+    Cron,
+    Interval,
+}
+
+impl ScheduleType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Cron => "cron",
+            Self::Interval => "interval",
+        }
+    }
+
+    pub fn parse(s: &str) -> Option<Self> {
+        match s {
+            "cron" => Some(Self::Cron),
+            "interval" => Some(Self::Interval),
+            _ => None,
+        }
+    }
+}
+
+impl std::fmt::Display for ScheduleType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+/// Status of a workflow schedule.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum ScheduleStatus {
+    Active,
+    Paused,
+    Deleted,
+}
+
+impl ScheduleStatus {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Active => "active",
+            Self::Paused => "paused",
+            Self::Deleted => "deleted",
+        }
+    }
+
+    pub fn parse(s: &str) -> Option<Self> {
+        match s {
+            "active" => Some(Self::Active),
+            "paused" => Some(Self::Paused),
+            "deleted" => Some(Self::Deleted),
+            _ => None,
+        }
+    }
+}
+
+impl std::fmt::Display for ScheduleStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+/// A workflow schedule (recurring execution).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WorkflowSchedule {
+    pub id: Uuid,
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: String,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub jitter_seconds: i64,
+    pub input_payload: Option<Vec<u8>>,
+    pub status: String,
+    pub next_run_at: Option<DateTime<Utc>>,
+    pub last_run_at: Option<DateTime<Utc>>,
+    pub last_instance_id: Option<Uuid>,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub priority: i32,
+    pub allow_duplicate: bool,
+}
+
+impl WorkflowSchedule {
+    /// Get the schedule type as an enum.
+    pub fn schedule_type_enum(&self) -> Option<ScheduleType> {
+        ScheduleType::parse(&self.schedule_type)
+    }
+
+    /// Get the status as an enum.
+    pub fn status_enum(&self) -> Option<ScheduleStatus> {
+        ScheduleStatus::parse(&self.status)
+    }
+}
+
+/// Parameters for creating a schedule.
+#[derive(Debug, Clone)]
+pub struct CreateScheduleParams {
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: ScheduleType,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub jitter_seconds: i64,
+    pub input_payload: Option<Vec<u8>>,
+    pub priority: i32,
+    pub allow_duplicate: bool,
+}
diff --git a/crates/scheduler-core/src/utils.rs b/crates/scheduler-core/src/utils.rs
new file mode 100644
index 00000000..4530329f
--- /dev/null
+++ b/crates/scheduler-core/src/utils.rs
@@ -0,0 +1,181 @@
+//! Cron and interval schedule utilities.
+//!
+//! This module provides utilities for computing the next run time for
+//! cron expressions and fixed intervals.
+//!
+//! Note: This module accepts standard 5-field Unix cron expressions
+//! (minute, hour, day-of-month, month, day-of-week) and converts them
+//! to 6-field format (with seconds) for the `cron` crate.
+
+use chrono::{DateTime, Utc};
+use cron::Schedule;
+use rand::Rng;
+use std::str::FromStr;
+
+use super::ScheduleType;
+
+/// Convert a 5-field Unix cron expression to 6-field format.
+///
+/// The `cron` crate requires 6 fields (sec min hour dom month dow),
+/// but standard Unix cron uses 5 fields (min hour dom month dow).
+/// This function prepends "0 " to run at second 0 of each match.
+fn normalize_cron_expr(cron_expr: &str) -> String {
+    let fields: Vec<&str> = cron_expr.split_whitespace().collect();
+    if fields.len() == 5 {
+        // Standard 5-field cron: prepend "0" for seconds
+        format!("0 {}", cron_expr)
+    } else {
+        // Already 6+ fields, use as-is
+        cron_expr.to_string()
+    }
+}
+
+/// Compute the next run time for a cron expression.
+///
+/// Accepts standard 5-field Unix cron expressions (e.g., "0 * * * *" for hourly)
+/// or 6-field expressions with seconds.
+///
+/// Returns the next occurrence after the current time (UTC).
+pub fn next_cron_run(cron_expr: &str) -> Result<DateTime<Utc>, String> {
+    let normalized = normalize_cron_expr(cron_expr);
+    let schedule = Schedule::from_str(&normalized)
+        .map_err(|e| format!("Invalid cron expression '{}': {}", cron_expr, e))?;
+    schedule
+        .upcoming(Utc)
+        .next()
+        .ok_or_else(|| "No upcoming schedule found".to_string())
+}
+
+/// Compute the next run time for an interval-based schedule.
+///
+/// If `last_run_at` is provided, the next run is `last_run_at + interval_seconds`.
+/// Otherwise, the next run is `now + interval_seconds`.
+pub fn next_interval_run(
+    interval_seconds: i64,
+    last_run_at: Option<DateTime<Utc>>,
+) -> DateTime<Utc> {
+    let base = last_run_at.unwrap_or_else(Utc::now);
+    base + chrono::Duration::seconds(interval_seconds)
+}
+
+/// Validate a cron expression without computing the next run.
+///
+/// Accepts standard 5-field Unix cron expressions or 6-field expressions.
+pub fn validate_cron(cron_expr: &str) -> Result<(), String> {
+    let normalized = normalize_cron_expr(cron_expr);
+    Schedule::from_str(&normalized)
+        .map(|_| ())
+        .map_err(|e| format!("Invalid cron expression '{}': {}", cron_expr, e))
+}
+
+/// Apply a random jitter delay (in seconds) to a scheduled time.
+///
+/// If `jitter_seconds` is 0, the base time is returned unchanged.
+pub fn apply_jitter(base: DateTime<Utc>, jitter_seconds: i64) -> Result<DateTime<Utc>, String> {
+    if jitter_seconds < 0 {
+        return Err("jitter_seconds must be non-negative".to_string());
+    }
+    if jitter_seconds == 0 {
+        return Ok(base);
+    }
+    let jitter = rand::thread_rng().gen_range(0..=jitter_seconds);
+    Ok(base + chrono::Duration::seconds(jitter))
+}
+
+/// Compute the next run time for a schedule type with optional jitter.
+pub fn compute_next_run(
+    schedule_type: ScheduleType,
+    cron_expression: Option<&str>,
+    interval_seconds: Option<i64>,
+    jitter_seconds: i64,
+    last_run_at: Option<DateTime<Utc>>,
+) -> Result<DateTime<Utc>, String> {
+    let base = match schedule_type {
+        ScheduleType::Cron => {
+            let expr = cron_expression.ok_or_else(|| "cron expression required".to_string())?;
+            next_cron_run(expr)?
+        }
+        ScheduleType::Interval => {
+            let seconds =
+                interval_seconds.ok_or_else(|| "interval_seconds required".to_string())?;
+            next_interval_run(seconds, last_run_at)
+        }
+    };
+
+    apply_jitter(base, jitter_seconds)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalize_cron_expr() {
+        // 5-field should get "0 " prepended
+        assert_eq!(normalize_cron_expr("* * * * *"), "0 * * * * *");
+        assert_eq!(normalize_cron_expr("0 * * * *"), "0 0 * * * *");
+
+        // 6-field should remain unchanged
+        assert_eq!(normalize_cron_expr("0 0 * * * *"), "0 0 * * * *");
+    }
+
+    #[test]
+    fn test_valid_cron_expression() {
+        // Standard 5-field Unix cron expressions
+        assert!(validate_cron("0 * * * *").is_ok());
+        assert!(validate_cron("0 0 * * *").is_ok());
+        assert!(validate_cron("* * * * *").is_ok());
+
+        // 6-field expression with seconds
+        assert!(validate_cron("0 0 * * * *").is_ok());
+    }
+
+    #[test]
+    fn test_invalid_cron_expression() {
+        assert!(validate_cron("invalid").is_err());
+        assert!(validate_cron("").is_err());
+    }
+
+    #[test]
+    fn test_next_cron_run() {
+        // Every minute should return a time in the future
+        let next = next_cron_run("* * * * *").unwrap();
+        assert!(next > Utc::now());
+    }
+
+    #[test]
+    fn test_next_interval_run_from_now() {
+        let before = Utc::now();
+        let next = next_interval_run(3600, None);
+        let after = Utc::now();
+
+        // Should be approximately 1 hour from now
+        assert!(next >= before + chrono::Duration::seconds(3600));
+        assert!(next <= after + chrono::Duration::seconds(3600));
+    }
+
+    #[test]
+    fn test_next_interval_run_from_last() {
+        let last_run = Utc::now() - chrono::Duration::seconds(1800);
+        let next = next_interval_run(3600, Some(last_run));
+
+        // Should be 1 hour after last_run (30 minutes from now)
+        let expected = last_run + chrono::Duration::seconds(3600);
+        assert_eq!(next, expected);
+    }
+
+    #[test]
+    fn test_apply_jitter_zero() {
+        let base = Utc::now();
+        let jittered = apply_jitter(base, 0).unwrap();
+        assert_eq!(jittered, base);
+    }
+
+    #[test]
+    fn test_apply_jitter_range() {
+        let base = Utc::now();
+        let jittered = apply_jitter(base, 5).unwrap();
+        assert!(jittered >= base);
+        assert!(jittered <= base + chrono::Duration::seconds(5));
+    }
+}
diff --git a/crates/webapp-backend/Cargo.toml b/crates/webapp-backend/Cargo.toml
new file mode 100644
index 00000000..735810b3
--- /dev/null
+++ b/crates/webapp-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-webapp-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-webapp-core = { workspace = true }
diff --git a/crates/webapp-backend/src/lib.rs b/crates/webapp-backend/src/lib.rs
new file mode 100644
index 00000000..354e0e67
--- /dev/null
+++ b/crates/webapp-backend/src/lib.rs
@@ -0,0 +1,54 @@
+use uuid::Uuid;
+pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_webapp_core::{
+    ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary,
+    ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus,
+};
+
+/// Backend capability for webapp-specific queries.
+#[async_trait::async_trait]
+pub trait WebappBackend: Send + Sync {
+    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64>;
+    async fn list_instances(
+        &self,
+        search: Option<&str>,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<InstanceSummary>>;
+    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail>;
+    async fn get_execution_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>>;
+    async fn get_workflow_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>>;
+    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>>;
+    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>>;
+    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>>;
+    async fn count_schedules(&self) -> BackendResult<i64>;
+    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>>;
+    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail>;
+    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64>;
+    async fn list_schedule_invocations(
+        &self,
+        schedule_id: Uuid,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<ScheduleInvocationSummary>>;
+    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool>;
+    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>>;
+    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>>;
+    async fn get_worker_action_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<Vec<WorkerActionRow>>;
+    async fn get_worker_aggregate_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<WorkerAggregateStats>;
+    async fn worker_status_table_exists(&self) -> bool;
+    async fn schedules_table_exists(&self) -> bool;
+    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>>;
+}
diff --git a/crates/webapp-core/Cargo.toml b/crates/webapp-core/Cargo.toml
new file mode 100644
index 00000000..2b51dc6d
--- /dev/null
+++ b/crates/webapp-core/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-webapp-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+uuid = { workspace = true, features = ["serde"] }
+chrono = { workspace = true, features = ["serde"] }
+serde = { workspace = true, features = ["derive"] }
diff --git a/crates/webapp-core/src/lib.rs b/crates/webapp-core/src/lib.rs
new file mode 100644
index 00000000..7805c428
--- /dev/null
+++ b/crates/webapp-core/src/lib.rs
@@ -0,0 +1,299 @@
+//! Shared types for the webapp.
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+/// Configuration for the webapp server.
+#[derive(Debug, Clone)]
+pub struct WebappConfig {
+    pub enabled: bool,
+    pub host: String,
+    pub port: u16,
+}
+
+impl Default for WebappConfig {
+    fn default() -> Self {
+        Self {
+            enabled: false,
+            host: "0.0.0.0".to_string(),
+            port: 24119,
+        }
+    }
+}
+
+impl WebappConfig {
+    /// Create config from environment variables.
+    pub fn from_env() -> Self {
+        let enabled = std::env::var("WAYMARK_WEBAPP_ENABLED")
+            .map(|v| v == "true" || v == "1")
+            .unwrap_or(false);
+
+        let (host, port) = std::env::var("WAYMARK_WEBAPP_ADDR")
+            .ok()
+            .and_then(|addr| {
+                let parts: Vec<&str> = addr.split(':').collect();
+                if parts.len() == 2 {
+                    let host = parts[0].to_string();
+                    let port = parts[1].parse().ok()?;
+                    Some((host, port))
+                } else {
+                    None
+                }
+            })
+            .unwrap_or_else(|| ("0.0.0.0".to_string(), 24119));
+
+        Self {
+            enabled,
+            host,
+            port,
+        }
+    }
+
+    /// Get the bind address.
+    pub fn bind_addr(&self) -> String {
+        format!("{}:{}", self.host, self.port)
+    }
+}
+
+/// Instance status.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum InstanceStatus {
+    Queued,
+    Running,
+    Completed,
+    Failed,
+}
+
+impl std::fmt::Display for InstanceStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Queued => write!(f, "queued"),
+            Self::Running => write!(f, "running"),
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+        }
+    }
+}
+
+/// Summary of a workflow instance for listing.
+#[derive(Debug, Clone, Serialize)]
+pub struct InstanceSummary {
+    pub id: Uuid,
+    pub entry_node: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+    pub workflow_name: Option<String>,
+    pub input_preview: String,
+}
+
+/// Full details of a workflow instance.
+#[derive(Debug, Clone, Serialize)]
+pub struct InstanceDetail {
+    pub id: Uuid,
+    pub entry_node: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+    pub workflow_name: Option<String>,
+    pub input_payload: String,
+    pub result_payload: String,
+    pub error_payload: Option<String>,
+}
+
+/// Node in the execution graph for display.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionNodeView {
+    pub id: String,
+    pub node_type: String,
+    pub label: String,
+    pub status: String,
+    pub action_name: Option<String>,
+    pub module_name: Option<String>,
+}
+
+/// Edge in the execution graph for display.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionEdgeView {
+    pub source: String,
+    pub target: String,
+    pub edge_type: String,
+}
+
+/// Execution graph data for rendering.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionGraphView {
+    pub nodes: Vec<ExecutionNodeView>,
+    pub edges: Vec<ExecutionEdgeView>,
+}
+
+/// Timeline entry for an action execution.
+#[derive(Debug, Clone, Serialize)]
+pub struct TimelineEntry {
+    pub action_id: String,
+    pub action_name: String,
+    pub module_name: Option<String>,
+    pub status: String,
+    pub attempt_number: i32,
+    pub dispatched_at: Option<String>,
+    pub completed_at: Option<String>,
+    pub duration_ms: Option<i64>,
+    pub request_preview: String,
+    pub response_preview: String,
+    pub error: Option<String>,
+}
+
+/// Action log entry with full details.
+#[derive(Debug, Clone, Serialize)]
+pub struct ActionLogEntry {
+    pub action_id: String,
+    pub action_name: String,
+    pub module_name: Option<String>,
+    pub status: String,
+    pub attempt_number: i32,
+    pub dispatched_at: Option<String>,
+    pub completed_at: Option<String>,
+    pub duration_ms: Option<i64>,
+    pub request: String,
+    pub response: String,
+    pub error: Option<String>,
+}
+
+/// Response for the workflow run data API.
+#[derive(Debug, Serialize)]
+pub struct WorkflowRunDataResponse {
+    pub nodes: Vec<ExecutionNodeView>,
+    pub timeline: Vec<TimelineEntry>,
+    pub page: i64,
+    pub per_page: i64,
+    pub total: i64,
+    pub has_more: bool,
+}
+
+/// Response for action logs API.
+#[derive(Debug, Serialize)]
+pub struct ActionLogsResponse {
+    pub logs: Vec<ActionLogEntry>,
+}
+
+/// Filter values response.
+#[derive(Debug, Serialize)]
+pub struct FilterValuesResponse {
+    pub values: Vec<String>,
+}
+
+/// Health check response.
+#[derive(Debug, Serialize)]
+pub struct HealthResponse {
+    pub status: &'static str,
+    pub service: &'static str,
+}
+
+/// Export format for a workflow instance.
+#[derive(Debug, Serialize)]
+pub struct WorkflowInstanceExport {
+    pub export_version: &'static str,
+    pub exported_at: String,
+    pub instance: InstanceExportInfo,
+    pub nodes: Vec<ExecutionNodeView>,
+    pub timeline: Vec<TimelineEntry>,
+}
+
+/// Full worker status for webapp display.
+#[derive(Debug, Clone)]
+pub struct WorkerStatus {
+    pub pool_id: Uuid,
+    pub active_workers: i32,
+    pub throughput_per_min: f64,
+    pub actions_per_sec: f64,
+    pub total_completed: i64,
+    pub last_action_at: Option<DateTime<Utc>>,
+    pub updated_at: DateTime<Utc>,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub dispatch_queue_size: Option<i64>,
+    pub total_in_flight: Option<i64>,
+    pub median_instance_duration_secs: Option<f64>,
+    pub active_instance_count: i32,
+    pub total_instances_completed: i64,
+    pub instances_per_sec: f64,
+    pub instances_per_min: f64,
+    pub time_series: Option<Vec<u8>>,
+}
+
+/// Worker action stats row for display.
+#[derive(Debug, Clone)]
+pub struct WorkerActionRow {
+    pub pool_id: String,
+    pub active_workers: i64,
+    pub actions_per_sec: String,
+    pub throughput_per_min: i64,
+    pub total_completed: i64,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub last_action_at: Option<String>,
+    pub updated_at: String,
+}
+
+/// Aggregate worker stats for overview cards.
+#[derive(Debug, Clone)]
+pub struct WorkerAggregateStats {
+    pub active_worker_count: i64,
+    pub actions_per_sec: String,
+    pub total_in_flight: i64,
+    pub total_queue_depth: i64,
+}
+
+/// Instance info for export.
+#[derive(Debug, Serialize)]
+pub struct InstanceExportInfo {
+    pub id: String,
+    pub status: String,
+    pub created_at: String,
+    pub input_payload: String,
+    pub result_payload: String,
+}
+
+/// Schedule summary for listing.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleSummary {
+    pub id: String,
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: String,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub status: String,
+    pub next_run_at: Option<String>,
+    pub last_run_at: Option<String>,
+    pub created_at: String,
+}
+
+/// Full schedule details.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleDetail {
+    pub id: String,
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: String,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub jitter_seconds: i64,
+    pub status: String,
+    pub next_run_at: Option<String>,
+    pub last_run_at: Option<String>,
+    pub last_instance_id: Option<String>,
+    pub created_at: String,
+    pub updated_at: String,
+    pub priority: i32,
+    pub allow_duplicate: bool,
+    pub input_payload: Option<String>,
+}
+
+/// Invocation summary row for schedule detail pages.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleInvocationSummary {
+    pub id: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+}
diff --git a/crates/worker-status-backend/Cargo.toml b/crates/worker-status-backend/Cargo.toml
new file mode 100644
index 00000000..ff50466a
--- /dev/null
+++ b/crates/worker-status-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-worker-status-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+chrono = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/worker-status-backend/src/lib.rs b/crates/worker-status-backend/src/lib.rs
new file mode 100644
index 00000000..bc23eb4e
--- /dev/null
+++ b/crates/worker-status-backend/src/lib.rs
@@ -0,0 +1,32 @@
+//! Worker status backend.
+
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+/// Worker status update for persistence.
+#[derive(Clone, Debug)]
+pub struct WorkerStatusUpdate {
+    pub pool_id: Uuid,
+    pub throughput_per_min: f64,
+    pub total_completed: i64,
+    pub last_action_at: Option<chrono::DateTime<chrono::Utc>>,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub dispatch_queue_size: i64,
+    pub total_in_flight: i64,
+    pub active_workers: i32,
+    pub actions_per_sec: f64,
+    pub median_instance_duration_secs: Option<f64>,
+    pub active_instance_count: i32,
+    pub total_instances_completed: i64,
+    pub instances_per_sec: f64,
+    pub instances_per_min: f64,
+    pub time_series: Option<Vec<u8>>,
+}
+
+/// Backend capability for recording worker status metrics.
+#[async_trait::async_trait]
+pub trait WorkerStatusBackend: Send + Sync {
+    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>;
+}
diff --git a/crates/workflow-registry-backend/Cargo.toml b/crates/workflow-registry-backend/Cargo.toml
new file mode 100644
index 00000000..2dc85a4d
--- /dev/null
+++ b/crates/workflow-registry-backend/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-workflow-registry-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/workflow-registry-backend/src/lib.rs b/crates/workflow-registry-backend/src/lib.rs
new file mode 100644
index 00000000..041c8482
--- /dev/null
+++ b/crates/workflow-registry-backend/src/lib.rs
@@ -0,0 +1,35 @@
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+/// Registration payload for storing workflow DAG metadata.
+#[derive(Clone, Debug)]
+pub struct WorkflowRegistration {
+    pub workflow_name: String,
+    pub workflow_version: String,
+    pub ir_hash: String,
+    pub program_proto: Vec<u8>,
+    pub concurrent: bool,
+}
+
+#[derive(Clone, Debug)]
+/// Stored workflow version metadata and IR payload.
+pub struct WorkflowVersion {
+    pub id: Uuid,
+    pub workflow_name: String,
+    pub workflow_version: String,
+    pub ir_hash: String,
+    pub program_proto: Vec<u8>,
+    pub concurrent: bool,
+}
+
+/// Backend capability for registering workflow DAGs.
+#[async_trait::async_trait]
+pub trait WorkflowRegistryBackend: Send + Sync {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid>;
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>>;
+}

From 6105f1679cab71aff3e33ac96241266372c40654 Mon Sep 17 00:00:00 2001
From: MOZGIII <mozgiii@mzg.io>
Date: Wed, 25 Feb 2026 21:23:46 +0400
Subject: [PATCH 4/5] Extract the rest of the crates

---
 Cargo.lock                                    |   74 +-
 Cargo.toml                                    |    6 +
 crates/backend-fault-injection/Cargo.toml     |   12 +
 crates/backend-fault-injection/src/lib.rs     |  128 +
 crates/backend-memory/Cargo.toml              |    1 +
 crates/backend-memory/src/core_backend.rs     |    5 +-
 .../src/garbage_collector_backend.rs          |    5 +-
 .../backend-memory/src/scheduler_backend.rs   |    2 +-
 crates/backend-memory/src/webapp_backend.rs   |    3 +-
 crates/backend-postgres-migrations/Cargo.toml |    7 +
 crates/backend-postgres-migrations/build.rs   |    3 +
 .../migrations/0001_init.sql                  |  115 +
 .../0002_runner_actions_done_execution_id.sql |    7 +
 .../migrations/0003_instance_locks.sql        |   12 +
 .../migrations/0004_workflow_versions.sql     |   21 +
 ...5_runner_instances_workflow_version_id.sql |    7 +
 .../0006_drop_unused_runner_tables.sql        |    4 +
 .../0007_runner_instances_schedule_id.sql     |    5 +
 .../0008_runner_actions_done_timing.sql       |   14 +
 .../0009_instance_search_columns.sql          |   63 +
 crates/backend-postgres-migrations/src/lib.rs |    8 +
 crates/backend-postgres/Cargo.toml            |   39 +
 crates/backend-postgres/src/core.rs           | 1993 ++++++++++++++
 crates/backend-postgres/src/lib.rs            |  115 +
 crates/backend-postgres/src/registry.rs       |  146 ++
 crates/backend-postgres/src/scheduler.rs      |  605 +++++
 crates/backend-postgres/src/test_helpers.rs   |   27 +
 crates/backend-postgres/src/webapp.rs         | 2329 +++++++++++++++++
 crates/backends-core/src/lib.rs               |    7 +
 crates/core-backend/src/lib.rs                |    2 +-
 crates/dag/Cargo.toml                         |    2 +-
 crates/dag/src/builder/test_helpers.rs        |    2 +-
 crates/dag/src/validate.rs                    |    2 +-
 crates/garbage-collector-backend/src/lib.rs   |    2 +-
 crates/integration-support/Cargo.toml         |   10 +
 crates/integration-support/src/lib.rs         |    5 +
 crates/integration-support/src/postgres.rs    |  103 +
 crates/runner/src/lib.rs                      |    4 +-
 crates/runner/src/synthetic_exceptions.rs     |    8 +-
 crates/test-support/Cargo.toml                |    8 +
 crates/test-support/src/lib.rs                |    5 +
 crates/test-support/src/postgres.rs           |   15 +
 crates/webapp-backend/src/lib.rs              |    2 +-
 crates/webapp-core/src/lib.rs                 |   52 -
 44 files changed, 5914 insertions(+), 71 deletions(-)
 create mode 100644 crates/backend-fault-injection/Cargo.toml
 create mode 100644 crates/backend-fault-injection/src/lib.rs
 create mode 100644 crates/backend-postgres-migrations/Cargo.toml
 create mode 100644 crates/backend-postgres-migrations/build.rs
 create mode 100644 crates/backend-postgres-migrations/migrations/0001_init.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0003_instance_locks.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql
 create mode 100644 crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql
 create mode 100644 crates/backend-postgres-migrations/src/lib.rs
 create mode 100644 crates/backend-postgres/Cargo.toml
 create mode 100644 crates/backend-postgres/src/core.rs
 create mode 100644 crates/backend-postgres/src/lib.rs
 create mode 100644 crates/backend-postgres/src/registry.rs
 create mode 100644 crates/backend-postgres/src/scheduler.rs
 create mode 100644 crates/backend-postgres/src/test_helpers.rs
 create mode 100644 crates/backend-postgres/src/webapp.rs
 create mode 100644 crates/integration-support/Cargo.toml
 create mode 100644 crates/integration-support/src/lib.rs
 create mode 100644 crates/integration-support/src/postgres.rs
 create mode 100644 crates/test-support/Cargo.toml
 create mode 100644 crates/test-support/src/lib.rs
 create mode 100644 crates/test-support/src/postgres.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4b31811b..9737bb86 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3372,6 +3372,18 @@ dependencies = [
  "waymark-proto",
 ]
 
+[[package]]
+name = "waymark-backend-fault-injection"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backend-memory",
+ "waymark-backends-core",
+ "waymark-core-backend",
+ "waymark-workflow-registry-backend",
+]
+
 [[package]]
 name = "waymark-backend-memory"
 version = "0.1.0"
@@ -3381,16 +3393,58 @@ dependencies = [
  "rmp-serde",
  "serde_json",
  "uuid",
+ "waymark-backends-core",
+ "waymark-core-backend",
+ "waymark-garbage-collector-backend",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backend-postgres"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "prost 0.12.6",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "serial_test",
+ "sqlx",
+ "tokio",
+ "tracing",
+ "uuid",
+ "waymark-backend-postgres-migrations",
+ "waymark-backends-core",
  "waymark-core-backend",
+ "waymark-dag",
  "waymark-garbage-collector-backend",
+ "waymark-ir-parser",
+ "waymark-observability",
+ "waymark-proto",
+ "waymark-runner",
+ "waymark-runner-state",
  "waymark-scheduler-backend",
  "waymark-scheduler-core",
+ "waymark-test-support",
  "waymark-webapp-backend",
  "waymark-webapp-core",
  "waymark-worker-status-backend",
  "waymark-workflow-registry-backend",
 ]
 
+[[package]]
+name = "waymark-backend-postgres-migrations"
+version = "0.1.0"
+dependencies = [
+ "sqlx",
+]
+
 [[package]]
 name = "waymark-backends-core"
 version = "0.1.0"
@@ -3422,7 +3476,7 @@ dependencies = [
  "serde",
  "thiserror",
  "uuid",
- "waymark",
+ "waymark-ir-parser",
  "waymark-proto",
 ]
 
@@ -3452,6 +3506,16 @@ dependencies = [
  "waymark-backends-core",
 ]
 
+[[package]]
+name = "waymark-integration-support"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "sqlx",
+ "tokio",
+ "waymark-backend-postgres-migrations",
+]
+
 [[package]]
 name = "waymark-ir-parser"
 version = "0.1.0"
@@ -3541,6 +3605,14 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "waymark-test-support"
+version = "0.1.0"
+dependencies = [
+ "sqlx",
+ "waymark-integration-support",
+]
+
 [[package]]
 name = "waymark-webapp-backend"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index aa2f4ab0..4c881345 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,11 +4,15 @@ members = ["crates/*"]
 
 [workspace.dependencies]
 waymark = { path = "crates/waymark" }
+waymark-backend-fault-injection = { path = "crates/backend-fault-injection" }
 waymark-backend-memory = { path = "crates/backend-memory" }
+waymark-backend-postgres = { path = "crates/backend-postgres" }
+waymark-backend-postgres-migrations = { path = "crates/backend-postgres-migrations" }
 waymark-backends-core = { path = "crates/backends-core" }
 waymark-core-backend = { path = "crates/core-backend" }
 waymark-dag = { path = "crates/dag" }
 waymark-garbage-collector-backend = { path = "crates/garbage-collector-backend" }
+waymark-integration-support = { path = "crates/integration-support" }
 waymark-ir-parser = { path = "crates/ir-parser" }
 waymark-observability = { path = "crates/observability" }
 waymark-observability-macros = { path = "crates/observability-macros" }
@@ -17,6 +21,7 @@ waymark-runner = { path = "crates/runner" }
 waymark-runner-state = { path = "crates/runner-state" }
 waymark-scheduler-backend = { path = "crates/scheduler-backend" }
 waymark-scheduler-core = { path = "crates/scheduler-core" }
+waymark-test-support = { path = "crates/test-support" }
 waymark-webapp-backend = { path = "crates/webapp-backend" }
 waymark-webapp-core = { path = "crates/webapp-core" }
 waymark-worker-status-backend = { path = "crates/worker-status-backend" }
@@ -36,6 +41,7 @@ rmp-serde = "1"
 rustc-hash = "2"
 serde = "1"
 serde_json = "1"
+serial_test = "2"
 sha2 = "0.10"
 sqlx = { version = "0.8", default-features = false }
 thiserror = "2"
diff --git a/crates/backend-fault-injection/Cargo.toml b/crates/backend-fault-injection/Cargo.toml
new file mode 100644
index 00000000..1b592ba1
--- /dev/null
+++ b/crates/backend-fault-injection/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "waymark-backend-fault-injection"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
diff --git a/crates/backend-fault-injection/src/lib.rs b/crates/backend-fault-injection/src/lib.rs
new file mode 100644
index 00000000..4a43d305
--- /dev/null
+++ b/crates/backend-fault-injection/src/lib.rs
@@ -0,0 +1,128 @@
+use std::sync::{
+    Arc,
+    atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
+};
+
+use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::{
+    CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstanceBatch,
+};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
+
+#[derive(Clone)]
+pub struct FaultInjectingBackend {
+    inner: MemoryBackend,
+    fail_get_queued_instances_with_depth_limit: Arc<AtomicBool>,
+    get_queued_instances_calls: Arc<AtomicUsize>,
+}
+
+impl FaultInjectingBackend {
+    pub fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self {
+        Self {
+            inner,
+            fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)),
+            get_queued_instances_calls: Arc::new(AtomicUsize::new(0)),
+        }
+    }
+
+    pub fn get_queued_instances_calls(&self) -> usize {
+        self.get_queued_instances_calls.load(AtomicOrdering::SeqCst)
+    }
+
+    pub fn queue_len(&self) -> usize {
+        self.inner
+            .instance_queue()
+            .as_ref()
+            .map(|queue| queue.lock().expect("queue poisoned").len())
+            .unwrap_or(0)
+    }
+
+    pub fn instances_done_len(&self) -> usize {
+        self.inner.instances_done().len()
+    }
+}
+
+#[async_trait::async_trait]
+impl CoreBackend for FaultInjectingBackend {
+    fn clone_box(&self) -> Box<dyn CoreBackend> {
+        Box::new(self.clone())
+    }
+
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        self.inner.save_graphs(claim, graphs).await
+    }
+
+    async fn save_actions_done(
+        &self,
+        actions: &[waymark_core_backend::ActionDone],
+    ) -> BackendResult<()> {
+        self.inner.save_actions_done(actions).await
+    }
+
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        self.inner.save_instances_done(instances).await
+    }
+
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        self.get_queued_instances_calls
+            .fetch_add(1, AtomicOrdering::SeqCst);
+        if self
+            .fail_get_queued_instances_with_depth_limit
+            .load(AtomicOrdering::SeqCst)
+        {
+            return Err(BackendError::Message("depth limit exceeded".to_string()));
+        }
+        self.inner.get_queued_instances(size, claim).await
+    }
+
+    async fn queue_instances(
+        &self,
+        instances: &[waymark_core_backend::QueuedInstance],
+    ) -> BackendResult<()> {
+        self.inner.queue_instances(instances).await
+    }
+
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        self.inner.refresh_instance_locks(claim, instance_ids).await
+    }
+
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()> {
+        self.inner
+            .release_instance_locks(lock_uuid, instance_ids)
+            .await
+    }
+}
+
+#[async_trait::async_trait]
+impl WorkflowRegistryBackend for FaultInjectingBackend {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid> {
+        self.inner.upsert_workflow_version(registration).await
+    }
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
+        self.inner.get_workflow_versions(ids).await
+    }
+}
diff --git a/crates/backend-memory/Cargo.toml b/crates/backend-memory/Cargo.toml
index 4346bbda..203e0f35 100644
--- a/crates/backend-memory/Cargo.toml
+++ b/crates/backend-memory/Cargo.toml
@@ -9,6 +9,7 @@ chrono = { workspace = true }
 rmp-serde = { workspace = true }
 serde_json = { workspace = true }
 uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
 waymark-core-backend = { workspace = true }
 waymark-garbage-collector-backend = { workspace = true, optional = true }
 waymark-scheduler-backend = { workspace = true, optional = true }
diff --git a/crates/backend-memory/src/core_backend.rs b/crates/backend-memory/src/core_backend.rs
index d6e2da1e..49a40330 100644
--- a/crates/backend-memory/src/core_backend.rs
+++ b/crates/backend-memory/src/core_backend.rs
@@ -1,8 +1,9 @@
 use chrono::Utc;
 use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
 use waymark_core_backend::{
-    ActionDone, BackendError, BackendResult, GraphUpdate, InstanceDone, InstanceLockStatus,
-    LockClaim, QueuedInstance, QueuedInstanceBatch,
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
 };
 
 #[async_trait::async_trait]
diff --git a/crates/backend-memory/src/garbage_collector_backend.rs b/crates/backend-memory/src/garbage_collector_backend.rs
index a1274935..6a4cda66 100644
--- a/crates/backend-memory/src/garbage_collector_backend.rs
+++ b/crates/backend-memory/src/garbage_collector_backend.rs
@@ -1,7 +1,6 @@
 use chrono::{DateTime, Utc};
-use waymark_garbage_collector_backend::{
-    BackendResult, GarbageCollectionResult, GarbageCollectorBackend,
-};
+use waymark_backends_core::BackendResult;
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
 
 #[async_trait::async_trait]
 impl GarbageCollectorBackend for crate::MemoryBackend {
diff --git a/crates/backend-memory/src/scheduler_backend.rs b/crates/backend-memory/src/scheduler_backend.rs
index a69aa94e..3764f489 100644
--- a/crates/backend-memory/src/scheduler_backend.rs
+++ b/crates/backend-memory/src/scheduler_backend.rs
@@ -1,6 +1,6 @@
 use chrono::Utc;
 use uuid::Uuid;
-use waymark_core_backend::{BackendError, BackendResult};
+use waymark_backends_core::{BackendError, BackendResult};
 use waymark_scheduler_backend::SchedulerBackend;
 use waymark_scheduler_core::{
     CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule, compute_next_run,
diff --git a/crates/backend-memory/src/webapp_backend.rs b/crates/backend-memory/src/webapp_backend.rs
index 883f4076..5bcca7c1 100644
--- a/crates/backend-memory/src/webapp_backend.rs
+++ b/crates/backend-memory/src/webapp_backend.rs
@@ -2,7 +2,8 @@ use std::collections::HashMap;
 
 use chrono::Utc;
 use uuid::Uuid;
-use waymark_webapp_backend::{BackendError, BackendResult, WebappBackend};
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_webapp_backend::WebappBackend;
 use waymark_webapp_core::{
     ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail,
     ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow,
diff --git a/crates/backend-postgres-migrations/Cargo.toml b/crates/backend-postgres-migrations/Cargo.toml
new file mode 100644
index 00000000..f84ad14c
--- /dev/null
+++ b/crates/backend-postgres-migrations/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "waymark-backend-postgres-migrations"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sqlx = { workspace = true, features = ["postgres", "macros", "migrate"] }
diff --git a/crates/backend-postgres-migrations/build.rs b/crates/backend-postgres-migrations/build.rs
new file mode 100644
index 00000000..3a8149ef
--- /dev/null
+++ b/crates/backend-postgres-migrations/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+    println!("cargo:rerun-if-changed=migrations");
+}
diff --git a/crates/backend-postgres-migrations/migrations/0001_init.sql b/crates/backend-postgres-migrations/migrations/0001_init.sql
new file mode 100644
index 00000000..dbb6b7da
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0001_init.sql
@@ -0,0 +1,115 @@
+-- Waymark core schema (baseline)
+
+CREATE EXTENSION IF NOT EXISTS pgcrypto;
+
+-- ---------------------------------------------------------------------------
+-- Workflow definitions
+-- ---------------------------------------------------------------------------
+
+CREATE TABLE workflow_versions (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    workflow_name TEXT NOT NULL,
+    dag_hash TEXT NOT NULL,
+    program_proto BYTEA NOT NULL,
+    concurrent BOOLEAN NOT NULL DEFAULT false,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE(workflow_name, dag_hash)
+);
+
+CREATE INDEX idx_workflow_versions_name ON workflow_versions(workflow_name);
+
+-- ---------------------------------------------------------------------------
+-- Runner persistence tables
+-- ---------------------------------------------------------------------------
+
+CREATE TABLE runner_graph_updates (
+    id BIGSERIAL PRIMARY KEY,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    state BYTEA NOT NULL
+);
+
+CREATE TABLE runner_actions_done (
+    id BIGSERIAL PRIMARY KEY,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    node_id UUID NOT NULL,
+    action_name TEXT NOT NULL,
+    attempt INTEGER NOT NULL,
+    result BYTEA
+);
+
+CREATE TABLE runner_instances (
+    instance_id UUID PRIMARY KEY,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    entry_node UUID NOT NULL,
+    state BYTEA,
+    result BYTEA,
+    error BYTEA
+);
+
+CREATE TABLE runner_instances_done (
+    id BIGSERIAL PRIMARY KEY,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    executor_id UUID NOT NULL,
+    entry_node UUID NOT NULL,
+    result BYTEA,
+    error BYTEA
+);
+
+CREATE TABLE queued_instances (
+    instance_id UUID PRIMARY KEY,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    payload BYTEA NOT NULL
+);
+
+-- ---------------------------------------------------------------------------
+-- Scheduler
+-- ---------------------------------------------------------------------------
+
+CREATE TABLE workflow_schedules (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    workflow_name TEXT NOT NULL,
+    schedule_name TEXT NOT NULL,
+    schedule_type TEXT NOT NULL,
+    cron_expression TEXT,
+    interval_seconds BIGINT,
+    jitter_seconds BIGINT NOT NULL DEFAULT 0,
+    input_payload BYTEA,
+    status TEXT NOT NULL DEFAULT 'active',
+    next_run_at TIMESTAMPTZ,
+    last_run_at TIMESTAMPTZ,
+    last_instance_id UUID,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    priority INT NOT NULL DEFAULT 0,
+    allow_duplicate BOOLEAN NOT NULL DEFAULT false,
+    UNIQUE(workflow_name, schedule_name)
+);
+
+CREATE INDEX idx_schedules_due ON workflow_schedules(next_run_at)
+    WHERE status = 'active' AND next_run_at IS NOT NULL;
+
+-- ---------------------------------------------------------------------------
+-- Worker status metrics
+-- ---------------------------------------------------------------------------
+
+CREATE TABLE worker_status (
+    pool_id UUID NOT NULL,
+    worker_id BIGINT NOT NULL,
+    throughput_per_min DOUBLE PRECISION NOT NULL DEFAULT 0,
+    total_completed BIGINT NOT NULL DEFAULT 0,
+    last_action_at TIMESTAMPTZ,
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    median_dequeue_ms BIGINT,
+    median_handling_ms BIGINT,
+    dispatch_queue_size BIGINT,
+    total_in_flight BIGINT,
+    active_workers INT NOT NULL DEFAULT 0,
+    actions_per_sec DOUBLE PRECISION NOT NULL DEFAULT 0,
+    median_instance_duration_secs DOUBLE PRECISION,
+    active_instance_count INT NOT NULL DEFAULT 0,
+    total_instances_completed BIGINT NOT NULL DEFAULT 0,
+    instances_per_sec DOUBLE PRECISION NOT NULL DEFAULT 0,
+    instances_per_min DOUBLE PRECISION NOT NULL DEFAULT 0,
+    time_series BYTEA,
+    PRIMARY KEY (pool_id, worker_id)
+);
diff --git a/crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql b/crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql
new file mode 100644
index 00000000..b4bce178
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql
@@ -0,0 +1,7 @@
+-- Rename runner action identifier to execution_id and drop stored action name.
+
+ALTER TABLE runner_actions_done
+    RENAME COLUMN node_id TO execution_id;
+
+ALTER TABLE runner_actions_done
+    DROP COLUMN action_name;
diff --git a/crates/backend-postgres-migrations/migrations/0003_instance_locks.sql b/crates/backend-postgres-migrations/migrations/0003_instance_locks.sql
new file mode 100644
index 00000000..6b826d18
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0003_instance_locks.sql
@@ -0,0 +1,12 @@
+-- Add scheduling and locking for queued instances.
+
+ALTER TABLE queued_instances
+    ADD COLUMN scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    ADD COLUMN lock_uuid UUID,
+    ADD COLUMN lock_expires_at TIMESTAMPTZ;
+
+CREATE INDEX IF NOT EXISTS idx_queued_instances_scheduled_at
+    ON queued_instances(scheduled_at);
+
+CREATE INDEX IF NOT EXISTS idx_queued_instances_lock_expires_at
+    ON queued_instances(lock_expires_at);
diff --git a/crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql b/crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql
new file mode 100644
index 00000000..daf3b54d
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql
@@ -0,0 +1,21 @@
+-- Workflow versions: replace dag_hash with workflow_version + ir_hash
+
+ALTER TABLE workflow_versions
+    RENAME COLUMN dag_hash TO workflow_version;
+
+ALTER TABLE workflow_versions
+    ADD COLUMN ir_hash TEXT;
+
+UPDATE workflow_versions
+SET ir_hash = workflow_version
+WHERE ir_hash IS NULL;
+
+ALTER TABLE workflow_versions
+    ALTER COLUMN ir_hash SET NOT NULL;
+
+ALTER TABLE workflow_versions
+    DROP CONSTRAINT IF EXISTS workflow_versions_workflow_name_dag_hash_key;
+
+ALTER TABLE workflow_versions
+    ADD CONSTRAINT workflow_versions_workflow_name_version_key
+    UNIQUE (workflow_name, workflow_version);
diff --git a/crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql b/crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql
new file mode 100644
index 00000000..6d09937b
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql
@@ -0,0 +1,7 @@
+-- Persist workflow version on instances so webapp can show workflow names.
+
+ALTER TABLE runner_instances
+    ADD COLUMN workflow_version_id UUID;
+
+CREATE INDEX IF NOT EXISTS idx_runner_instances_workflow_version_id
+    ON runner_instances(workflow_version_id);
diff --git a/crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql b/crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql
new file mode 100644
index 00000000..d3b1f272
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql
@@ -0,0 +1,4 @@
+-- Remove legacy tables no longer used by runtime or webapp.
+
+DROP TABLE IF EXISTS runner_graph_updates;
+DROP TABLE IF EXISTS runner_instances_done;
diff --git a/crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql b/crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql
new file mode 100644
index 00000000..06cb1385
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql
@@ -0,0 +1,5 @@
+ALTER TABLE runner_instances
+ADD COLUMN IF NOT EXISTS schedule_id UUID;
+
+CREATE INDEX IF NOT EXISTS idx_runner_instances_schedule_id_created_at
+    ON runner_instances(schedule_id, created_at DESC);
diff --git a/crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql b/crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql
new file mode 100644
index 00000000..b1b5551d
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql
@@ -0,0 +1,14 @@
+-- Persist per-attempt lifecycle metadata for action history and timeline rendering.
+
+ALTER TABLE runner_actions_done
+    ADD COLUMN status TEXT,
+    ADD COLUMN started_at TIMESTAMPTZ,
+    ADD COLUMN completed_at TIMESTAMPTZ,
+    ADD COLUMN duration_ms BIGINT;
+
+ALTER TABLE runner_actions_done
+    ADD CONSTRAINT runner_actions_done_status_check
+    CHECK (status IS NULL OR status IN ('completed', 'failed', 'timed_out'));
+
+CREATE INDEX idx_runner_actions_done_execution_attempt
+    ON runner_actions_done (execution_id, attempt);
diff --git a/crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql b/crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql
new file mode 100644
index 00000000..948c6aca
--- /dev/null
+++ b/crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql
@@ -0,0 +1,63 @@
+-- Persist workflow/status instance metadata for indexed search in webapp queries.
+
+ALTER TABLE runner_instances
+    ADD COLUMN IF NOT EXISTS workflow_name TEXT,
+    ADD COLUMN IF NOT EXISTS current_status TEXT;
+
+ALTER TABLE queued_instances
+    ADD COLUMN IF NOT EXISTS workflow_name TEXT,
+    ADD COLUMN IF NOT EXISTS current_status TEXT;
+
+UPDATE runner_instances AS ri
+SET workflow_name = wv.workflow_name
+FROM workflow_versions wv
+WHERE ri.workflow_name IS NULL
+  AND ri.workflow_version_id = wv.id;
+
+UPDATE runner_instances
+SET current_status = CASE
+    WHEN error IS NOT NULL THEN 'failed'
+    WHEN result IS NOT NULL THEN 'completed'
+    WHEN state IS NOT NULL THEN 'running'
+    ELSE 'queued'
+END
+WHERE current_status IS NULL;
+
+UPDATE queued_instances AS qi
+SET workflow_name = ri.workflow_name
+FROM runner_instances ri
+WHERE qi.workflow_name IS NULL
+  AND qi.instance_id = ri.instance_id;
+
+UPDATE queued_instances
+SET current_status = CASE
+    WHEN lock_uuid IS NULL THEN 'queued'
+    ELSE 'running'
+END
+WHERE current_status IS NULL;
+
+ALTER TABLE runner_instances
+    ADD CONSTRAINT runner_instances_current_status_check
+    CHECK (
+        current_status IS NULL
+        OR current_status IN ('queued', 'running', 'completed', 'failed')
+    );
+
+ALTER TABLE queued_instances
+    ADD CONSTRAINT queued_instances_current_status_check
+    CHECK (
+        current_status IS NULL
+        OR current_status IN ('queued', 'running')
+    );
+
+CREATE INDEX IF NOT EXISTS idx_runner_instances_workflow_name
+    ON runner_instances(workflow_name);
+
+CREATE INDEX IF NOT EXISTS idx_runner_instances_current_status
+    ON runner_instances(current_status);
+
+CREATE INDEX IF NOT EXISTS idx_queued_instances_workflow_name
+    ON queued_instances(workflow_name);
+
+CREATE INDEX IF NOT EXISTS idx_queued_instances_current_status
+    ON queued_instances(current_status);
diff --git a/crates/backend-postgres-migrations/src/lib.rs b/crates/backend-postgres-migrations/src/lib.rs
new file mode 100644
index 00000000..82495aeb
--- /dev/null
+++ b/crates/backend-postgres-migrations/src/lib.rs
@@ -0,0 +1,8 @@
+//! Migrations for the postgres backend.
+
+use sqlx::PgPool;
+
+/// Run the embedded SQLx migrations.
+pub async fn run(pool: &PgPool) -> Result<(), sqlx::migrate::MigrateError> {
+    sqlx::migrate!().run(pool).await
+}
diff --git a/crates/backend-postgres/Cargo.toml b/crates/backend-postgres/Cargo.toml
new file mode 100644
index 00000000..f61f582f
--- /dev/null
+++ b/crates/backend-postgres/Cargo.toml
@@ -0,0 +1,39 @@
+[package]
+name = "waymark-backend-postgres"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+rmp-serde = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+sqlx = { workspace = true, features = ["uuid", "chrono"] }
+tokio = { workspace = true, features = ["macros"] }
+tracing = { workspace = true }
+uuid = { workspace = true }
+waymark-backend-postgres-migrations = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-runner = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-scheduler-backend = { workspace = true }
+waymark-scheduler-core = { workspace = true }
+waymark-webapp-backend = { workspace = true }
+waymark-webapp-core = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+prost = { workspace = true }
+
+[dev-dependencies]
+serial_test = { workspace = true }
+waymark-test-support = { workspace = true }
+waymark-ir-parser = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/backend-postgres/src/core.rs b/crates/backend-postgres/src/core.rs
new file mode 100644
index 00000000..bc46747e
--- /dev/null
+++ b/crates/backend-postgres/src/core.rs
@@ -0,0 +1,1993 @@
+use std::collections::HashMap;
+use std::future::Future;
+use std::time::Duration as StdDuration;
+
+use chrono::{DateTime, Utc};
+use sqlx::{Postgres, QueryBuilder, Row};
+use tracing::warn;
+use uuid::Uuid;
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
+use waymark_scheduler_backend::{BackendError, BackendResult};
+use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
+
+use super::PostgresBackend;
+use waymark_core_backend::{
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
+};
+use waymark_observability::obs;
+use waymark_runner_state::RunnerState;
+
+const INSTANCE_STATUS_QUEUED: &str = "queued";
+const INSTANCE_STATUS_RUNNING: &str = "running";
+const INSTANCE_STATUS_COMPLETED: &str = "completed";
+const INSTANCE_STATUS_FAILED: &str = "failed";
+const TRANSIENT_DEADLOCK_SQLSTATE: &str = "40P01";
+const TRANSIENT_SERIALIZATION_SQLSTATE: &str = "40001";
+const TRANSIENT_RETRY_MAX_ATTEMPTS: usize = 3;
+const TRANSIENT_RETRY_INITIAL_BACKOFF_MS: u64 = 25;
+const TRANSIENT_RETRY_MAX_BACKOFF_MS: u64 = 250;
+
+fn instance_result_is_error_wrapper(result: &serde_json::Value) -> bool {
+    let serde_json::Value::Object(map) = result else {
+        return false;
+    };
+    map.len() == 1
+        && (map.contains_key("error")
+            || map.contains_key("__exception__")
+            || map.contains_key("exception"))
+}
+
+fn instance_done_status(instance: &InstanceDone) -> &'static str {
+    if instance.error.is_some()
+        || instance
+            .result
+            .as_ref()
+            .is_some_and(instance_result_is_error_wrapper)
+    {
+        INSTANCE_STATUS_FAILED
+    } else {
+        INSTANCE_STATUS_COMPLETED
+    }
+}
+
+fn is_transient_sqlstate(code: &str) -> bool {
+    matches!(
+        code,
+        TRANSIENT_DEADLOCK_SQLSTATE | TRANSIENT_SERIALIZATION_SQLSTATE
+    )
+}
+
+fn is_transient_backend_error(err: &BackendError) -> bool {
+    match err {
+        BackendError::Inner(sqlx::Error::Database(db_err)) => {
+            db_err.code().as_deref().is_some_and(is_transient_sqlstate)
+        }
+        // Fallback for cases where sqlstate is not preserved in wrapping.
+        BackendError::Message(message) => {
+            message.contains("deadlock detected")
+                || message.contains("could not serialize access due to")
+        }
+        _ => false,
+    }
+}
+
+async fn retry_transient_backend<T, Op, Fut>(
+    operation: &'static str,
+    mut op: Op,
+) -> BackendResult<T>
+where
+    Op: FnMut() -> Fut,
+    Fut: Future<Output = BackendResult<T>>,
+{
+    let mut attempt = 0usize;
+    let mut backoff_ms = TRANSIENT_RETRY_INITIAL_BACKOFF_MS;
+    loop {
+        match op().await {
+            Ok(value) => return Ok(value),
+            Err(err)
+                if attempt < TRANSIENT_RETRY_MAX_ATTEMPTS && is_transient_backend_error(&err) =>
+            {
+                attempt += 1;
+                warn!(
+                    operation,
+                    attempt,
+                    error = %err,
+                    "transient database error; retrying"
+                );
+                tokio::time::sleep(StdDuration::from_millis(backoff_ms)).await;
+                backoff_ms =
+                    std::cmp::min(backoff_ms.saturating_mul(2), TRANSIENT_RETRY_MAX_BACKOFF_MS);
+            }
+            Err(err) => return Err(err),
+        }
+    }
+}
+
+impl PostgresBackend {
+    /// Insert queued instances for run-loop consumption.
+    #[obs]
+    pub async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
+        if instances.is_empty() {
+            return Ok(());
+        }
+        let workflow_version_ids: Vec<Uuid> = instances
+            .iter()
+            .map(|instance| instance.workflow_version_id)
+            .collect();
+        let workflow_rows =
+            sqlx::query("SELECT id, workflow_name FROM workflow_versions WHERE id = ANY($1)")
+                .bind(&workflow_version_ids)
+                .fetch_all(&self.pool)
+                .await?;
+        let mut workflow_names_by_version_id: HashMap<Uuid, String> =
+            HashMap::with_capacity(workflow_rows.len());
+        for row in workflow_rows {
+            workflow_names_by_version_id.insert(row.get("id"), row.get("workflow_name"));
+        }
+
+        let mut queued_payloads = Vec::new();
+        let mut runner_payloads = Vec::new();
+        for instance in instances {
+            let state = instance.state.as_ref().ok_or_else(|| {
+                BackendError::Message("queued instance missing runner state".to_string())
+            })?;
+            let scheduled_at = instance.scheduled_at.unwrap_or_else(Utc::now);
+            let workflow_name = workflow_names_by_version_id
+                .get(&instance.workflow_version_id)
+                .cloned();
+            let mut payload_instance = instance.clone();
+            payload_instance.scheduled_at = Some(scheduled_at);
+            queued_payloads.push((
+                payload_instance.instance_id,
+                scheduled_at,
+                workflow_name.clone(),
+                INSTANCE_STATUS_QUEUED,
+                Self::serialize(&payload_instance)?,
+            ));
+            let graph = GraphUpdate::from_state(instance.instance_id, state);
+            runner_payloads.push((
+                instance.instance_id,
+                instance.entry_node,
+                instance.workflow_version_id,
+                instance.schedule_id,
+                workflow_name,
+                INSTANCE_STATUS_QUEUED,
+                Self::serialize(&graph)?,
+            ));
+        }
+
+        let mut queued_builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            "INSERT INTO queued_instances (instance_id, scheduled_at, workflow_name, current_status, payload) ",
+        );
+        queued_builder.push_values(
+            queued_payloads.iter(),
+            |mut builder, (id, scheduled_at, workflow_name, current_status, payload)| {
+                builder
+                    .push_bind(*id)
+                    .push_bind(*scheduled_at)
+                    .push_bind(workflow_name.as_deref())
+                    .push_bind(*current_status)
+                    .push_bind(payload.as_slice());
+            },
+        );
+
+        let mut runner_builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, schedule_id, workflow_name, current_status, state) ",
+        );
+        runner_builder.push_values(
+            runner_payloads.iter(),
+            |mut builder,
+             (
+                id,
+                entry,
+                workflow_version_id,
+                schedule_id,
+                workflow_name,
+                current_status,
+                payload,
+            )| {
+                builder
+                    .push_bind(*id)
+                    .push_bind(*entry)
+                    .push_bind(*workflow_version_id)
+                    .push_bind(*schedule_id)
+                    .push_bind(workflow_name.as_deref())
+                    .push_bind(*current_status)
+                    .push_bind(payload.as_slice());
+            },
+        );
+
+        let mut tx = self.pool.begin().await?;
+        Self::count_query(&self.query_counts, "insert:queued_instances");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "insert:queued_instances",
+            instances.len(),
+        );
+        queued_builder.build().execute(&mut *tx).await?;
+        Self::count_query(&self.query_counts, "insert:runner_instances");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "insert:runner_instances",
+            instances.len(),
+        );
+        runner_builder.build().execute(&mut *tx).await?;
+        tx.commit().await?;
+        Ok(())
+    }
+
+    /// Upsert worker status for monitoring and activity graphs.
+    #[obs]
+    pub async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
+        Self::count_query(&self.query_counts, "upsert:worker_status");
+        sqlx::query(
+            r#"
+            INSERT INTO worker_status (
+                pool_id,
+                worker_id,
+                throughput_per_min,
+                total_completed,
+                last_action_at,
+                updated_at,
+                median_dequeue_ms,
+                median_handling_ms,
+                dispatch_queue_size,
+                total_in_flight,
+                active_workers,
+                actions_per_sec,
+                median_instance_duration_secs,
+                active_instance_count,
+                total_instances_completed,
+                instances_per_sec,
+                instances_per_min,
+                time_series
+            )
+            VALUES ($1, 0, $2, $3, $4, NOW(), $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
+            ON CONFLICT (pool_id, worker_id)
+            DO UPDATE SET
+                throughput_per_min = EXCLUDED.throughput_per_min,
+                total_completed = EXCLUDED.total_completed,
+                last_action_at = EXCLUDED.last_action_at,
+                updated_at = EXCLUDED.updated_at,
+                median_dequeue_ms = EXCLUDED.median_dequeue_ms,
+                median_handling_ms = EXCLUDED.median_handling_ms,
+                dispatch_queue_size = EXCLUDED.dispatch_queue_size,
+                total_in_flight = EXCLUDED.total_in_flight,
+                active_workers = EXCLUDED.active_workers,
+                actions_per_sec = EXCLUDED.actions_per_sec,
+                median_instance_duration_secs = EXCLUDED.median_instance_duration_secs,
+                active_instance_count = EXCLUDED.active_instance_count,
+                total_instances_completed = EXCLUDED.total_instances_completed,
+                instances_per_sec = EXCLUDED.instances_per_sec,
+                instances_per_min = EXCLUDED.instances_per_min,
+                time_series = EXCLUDED.time_series
+            "#,
+        )
+        .bind(status.pool_id)
+        .bind(status.throughput_per_min)
+        .bind(status.total_completed)
+        .bind(status.last_action_at)
+        .bind(status.median_dequeue_ms)
+        .bind(status.median_handling_ms)
+        .bind(status.dispatch_queue_size)
+        .bind(status.total_in_flight)
+        .bind(status.active_workers)
+        .bind(status.actions_per_sec)
+        .bind(status.median_instance_duration_secs)
+        .bind(status.active_instance_count)
+        .bind(status.total_instances_completed)
+        .bind(status.instances_per_sec)
+        .bind(status.instances_per_min)
+        .bind(&status.time_series)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(())
+    }
+
+    /// Clear expired queue locks so they can be claimed again by the runloop.
+    ///
+    /// This uses the same `FOR UPDATE SKIP LOCKED` claim pattern as dequeue to
+    /// avoid blocking under concurrent sweepers.
+    #[obs]
+    pub async fn reclaim_expired_instance_locks(&self, size: usize) -> BackendResult<usize> {
+        if size == 0 {
+            return Ok(0);
+        }
+
+        let now = Utc::now();
+        let mut tx = self.pool.begin().await?;
+        Self::count_query(&self.query_counts, "update:queued_instances_expired_unlock");
+        let rows = sqlx::query(
+            r#"
+            WITH expired AS (
+                SELECT instance_id
+                FROM queued_instances
+                WHERE lock_uuid IS NOT NULL
+                  AND lock_expires_at <= $1
+                ORDER BY lock_expires_at, scheduled_at, created_at
+                LIMIT $2
+                FOR UPDATE SKIP LOCKED
+            )
+            UPDATE queued_instances AS qi
+            SET lock_uuid = NULL,
+                lock_expires_at = NULL
+            FROM expired
+            WHERE qi.instance_id = expired.instance_id
+            RETURNING qi.instance_id
+            "#,
+        )
+        .bind(now)
+        .bind(size as i64)
+        .fetch_all(&mut *tx)
+        .await?;
+
+        if !rows.is_empty() {
+            let instance_ids: Vec<Uuid> = rows.iter().map(|row| row.get("instance_id")).collect();
+            sqlx::query(
+                "UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1) AND result IS NULL AND error IS NULL",
+            )
+            .bind(&instance_ids)
+            .bind(INSTANCE_STATUS_QUEUED)
+            .execute(&mut *tx)
+            .await?;
+        }
+
+        tx.commit().await?;
+
+        if !rows.is_empty() {
+            Self::count_batch_size(
+                &self.batch_size_counts,
+                "update:queued_instances_expired_unlock",
+                rows.len(),
+            );
+        }
+
+        Ok(rows.len())
+    }
+
+    /// Delete old finished instances and their action attempt rows.
+    #[obs]
+    pub async fn collect_done_instances_impl(
+        &self,
+        older_than: DateTime<Utc>,
+        limit: usize,
+    ) -> BackendResult<GarbageCollectionResult> {
+        if limit == 0 {
+            return Ok(GarbageCollectionResult::default());
+        }
+
+        let mut tx = self.pool.begin().await?;
+        Self::count_query(&self.query_counts, "select:runner_instances_gc_candidates");
+        let candidate_rows = sqlx::query(
+            r#"
+            SELECT instance_id, state
+            FROM runner_instances
+            WHERE created_at < $1
+              AND (result IS NOT NULL OR error IS NOT NULL)
+            ORDER BY created_at, instance_id
+            LIMIT $2
+            FOR UPDATE SKIP LOCKED
+            "#,
+        )
+        .bind(older_than)
+        .bind(limit as i64)
+        .fetch_all(&mut *tx)
+        .await?;
+
+        if candidate_rows.is_empty() {
+            tx.commit().await?;
+            return Ok(GarbageCollectionResult::default());
+        }
+
+        let mut instance_ids = Vec::with_capacity(candidate_rows.len());
+        let mut action_execution_ids = Vec::new();
+        for row in candidate_rows {
+            let instance_id: Uuid = row.get("instance_id");
+            let state_payload: Option<Vec<u8>> = row.get("state");
+            instance_ids.push(instance_id);
+
+            let Some(state_payload) = state_payload else {
+                continue;
+            };
+            match Self::deserialize::<GraphUpdate>(&state_payload) {
+                Ok(graph) => {
+                    for (execution_id, node) in graph.nodes {
+                        if node.is_action_call() {
+                            action_execution_ids.push(execution_id);
+                        }
+                    }
+                }
+                Err(err) => {
+                    warn!(
+                        %instance_id,
+                        error = %err,
+                        "failed to decode runner state while collecting garbage"
+                    );
+                }
+            }
+        }
+
+        action_execution_ids.sort_unstable();
+        action_execution_ids.dedup();
+        let deleted_actions = if action_execution_ids.is_empty() {
+            0
+        } else {
+            Self::count_query(&self.query_counts, "delete:runner_actions_done_gc");
+            let result =
+                sqlx::query("DELETE FROM runner_actions_done WHERE execution_id = ANY($1)")
+                    .bind(&action_execution_ids)
+                    .execute(&mut *tx)
+                    .await?;
+            let rows = result.rows_affected() as usize;
+            Self::count_batch_size(
+                &self.batch_size_counts,
+                "delete:runner_actions_done_gc",
+                rows,
+            );
+            rows
+        };
+
+        Self::count_query(&self.query_counts, "delete:queued_instances_gc");
+        let _ = sqlx::query("DELETE FROM queued_instances WHERE instance_id = ANY($1)")
+            .bind(&instance_ids)
+            .execute(&mut *tx)
+            .await?;
+
+        Self::count_query(&self.query_counts, "delete:runner_instances_gc");
+        let deleted_instances_result =
+            sqlx::query("DELETE FROM runner_instances WHERE instance_id = ANY($1)")
+                .bind(&instance_ids)
+                .execute(&mut *tx)
+                .await?;
+        let deleted_instances = deleted_instances_result.rows_affected() as usize;
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "delete:runner_instances_gc",
+            deleted_instances,
+        );
+        tx.commit().await?;
+
+        Ok(GarbageCollectionResult {
+            deleted_instances,
+            deleted_actions,
+        })
+    }
+
+    #[obs]
+    async fn save_graphs_impl(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        retry_transient_backend("save_graphs_impl", || {
+            let claim = claim.clone();
+            async move { self.save_graphs_once(claim, graphs).await }
+        })
+        .await
+    }
+
+    async fn save_graphs_once(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        if graphs.is_empty() {
+            return Ok(Vec::new());
+        }
+        let mut payloads = Vec::with_capacity(graphs.len());
+        for graph in graphs {
+            payloads.push((
+                graph.instance_id,
+                graph.next_scheduled_at(),
+                claim.lock_expires_at,
+                Self::serialize(graph)?,
+            ));
+        }
+
+        Self::count_query(&self.query_counts, "update:queued_instances_scheduled_at");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "update:queued_instances_scheduled_at",
+            payloads.len(),
+        );
+        let now = Utc::now();
+        let mut schedule_builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            "UPDATE queued_instances AS qi SET scheduled_at = v.scheduled_at, lock_expires_at = CASE WHEN qi.lock_expires_at IS NULL OR qi.lock_expires_at < v.lock_expires_at THEN v.lock_expires_at ELSE qi.lock_expires_at END FROM (",
+        );
+        schedule_builder.push_values(
+            payloads.iter(),
+            |mut b, (instance_id, scheduled_at, lock_expires_at, _payload)| {
+                b.push_bind(*instance_id)
+                    .push_bind(*scheduled_at)
+                    .push_bind(*lock_expires_at);
+            },
+        );
+        schedule_builder.push(
+            ") AS v(instance_id, scheduled_at, lock_expires_at)
+             WHERE qi.instance_id = v.instance_id
+               AND qi.lock_uuid = ",
+        );
+        schedule_builder.push_bind(claim.lock_uuid);
+        schedule_builder.push(" AND (qi.lock_expires_at IS NULL OR qi.lock_expires_at > ");
+        schedule_builder.push_bind(now);
+        schedule_builder.push(")");
+        schedule_builder.build().execute(&self.pool).await?;
+
+        Self::count_query(&self.query_counts, "update:runner_instances_state");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "update:runner_instances_state",
+            payloads.len(),
+        );
+        let mut runner_builder: QueryBuilder<Postgres> =
+            QueryBuilder::new("UPDATE runner_instances AS ri SET state = v.state FROM (");
+        runner_builder.push_values(
+            payloads.iter(),
+            |mut b, (instance_id, _scheduled_at, _lock_expires_at, payload)| {
+                b.push_bind(*instance_id).push_bind(payload.as_slice());
+            },
+        );
+        runner_builder.push(
+            ") AS v(instance_id, state)
+             JOIN queued_instances qi ON qi.instance_id = v.instance_id
+             WHERE ri.instance_id = v.instance_id
+               AND qi.lock_uuid = ",
+        );
+        runner_builder.push_bind(claim.lock_uuid);
+        runner_builder.push(" AND (qi.lock_expires_at IS NULL OR qi.lock_expires_at > ");
+        runner_builder.push_bind(now);
+        runner_builder.push(")");
+        runner_builder.build().execute(&self.pool).await?;
+
+        let ids: Vec<Uuid> = graphs.iter().map(|graph| graph.instance_id).collect();
+        let lock_rows = sqlx::query(
+            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = ANY($1)",
+        )
+        .bind(&ids)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut lock_map: HashMap<Uuid, InstanceLockStatus> = HashMap::new();
+        for row in lock_rows {
+            let instance_id: Uuid = row.get(0);
+            lock_map.insert(
+                instance_id,
+                InstanceLockStatus {
+                    instance_id,
+                    lock_uuid: row.get(1),
+                    lock_expires_at: row.get(2),
+                },
+            );
+        }
+
+        let mut locks = Vec::with_capacity(ids.len());
+        for instance_id in ids {
+            locks.push(
+                lock_map
+                    .get(&instance_id)
+                    .cloned()
+                    .unwrap_or(InstanceLockStatus {
+                        instance_id,
+                        lock_uuid: None,
+                        lock_expires_at: None,
+                    }),
+            );
+        }
+        Ok(locks)
+    }
+
+    #[obs]
+    async fn save_actions_done_impl(&self, actions: &[ActionDone]) -> BackendResult<()> {
+        if actions.is_empty() {
+            return Ok(());
+        }
+        Self::count_query(&self.query_counts, "insert:runner_actions_done");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "insert:runner_actions_done",
+            actions.len(),
+        );
+        let mut payloads = Vec::new();
+        for action in actions {
+            payloads.push((
+                action.execution_id,
+                action.attempt,
+                action.status.to_string(),
+                action.started_at,
+                action.completed_at,
+                action.duration_ms,
+                Self::serialize(&action.result)?,
+            ));
+        }
+        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            "INSERT INTO runner_actions_done (execution_id, attempt, status, started_at, completed_at, duration_ms, result) ",
+        );
+        builder.push_values(
+            payloads.iter(),
+            |mut b, (execution_id, attempt, status, started_at, completed_at, duration_ms, payload)| {
+                b.push_bind(*execution_id)
+                    .push_bind(*attempt)
+                    .push_bind(status.as_str())
+                    .push_bind(*started_at)
+                    .push_bind(*completed_at)
+                    .push_bind(*duration_ms)
+                    .push_bind(payload.as_slice());
+            },
+        );
+        builder.build().execute(&self.pool).await?;
+        Ok(())
+    }
+
+    #[obs]
+    async fn get_queued_instances_impl(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        retry_transient_backend("get_queued_instances_impl", || {
+            let claim = claim.clone();
+            async move { self.get_queued_instances_once(size, claim).await }
+        })
+        .await
+    }
+
+    async fn get_queued_instances_once(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        if size == 0 {
+            return Ok(QueuedInstanceBatch {
+                instances: Vec::new(),
+            });
+        }
+        let now = Utc::now();
+        let mut tx = self.pool.begin().await?;
+        Self::count_query(&self.query_counts, "select:queued_instances");
+        let rows = sqlx::query(
+            r#"
+            WITH claimed AS (
+                SELECT instance_id, payload
+                FROM queued_instances
+                WHERE scheduled_at <= $1
+                  AND (lock_uuid IS NULL OR lock_expires_at <= $1)
+                ORDER BY scheduled_at, created_at
+                LIMIT $2
+                FOR UPDATE SKIP LOCKED
+            ),
+            updated AS (
+                UPDATE queued_instances AS qi
+                SET lock_uuid = $3,
+                    lock_expires_at = $4
+                FROM claimed
+                WHERE qi.instance_id = claimed.instance_id
+                RETURNING qi.instance_id, claimed.payload
+            )
+            SELECT updated.instance_id, updated.payload, ri.state
+            FROM updated
+            JOIN runner_instances ri ON ri.instance_id = updated.instance_id
+            "#,
+        )
+        .bind(now)
+        .bind(size as i64)
+        .bind(claim.lock_uuid)
+        .bind(claim.lock_expires_at)
+        .fetch_all(&mut *tx)
+        .await?;
+
+        if rows.is_empty() {
+            tx.commit().await?;
+            return Ok(QueuedInstanceBatch {
+                instances: Vec::new(),
+            });
+        }
+
+        let claimed_instance_ids: Vec<Uuid> =
+            rows.iter().map(|row| row.get("instance_id")).collect();
+        sqlx::query("UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1)")
+            .bind(&claimed_instance_ids)
+            .bind(INSTANCE_STATUS_RUNNING)
+            .execute(&mut *tx)
+            .await?;
+
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "select:queued_instances",
+            rows.len(),
+        );
+        tx.commit().await?;
+
+        let mut instances = Vec::new();
+        let mut action_node_ids_by_instance: HashMap<Uuid, Vec<Uuid>> = HashMap::new();
+        let mut all_action_node_ids: Vec<Uuid> = Vec::new();
+        for row in rows {
+            let instance_id: Uuid = row.get(0);
+            let payload: Vec<u8> = row.get(1);
+            let state_payload: Option<Vec<u8>> = row.get(2);
+            let mut instance: QueuedInstance = Self::deserialize(&payload)?;
+            instance.instance_id = instance_id;
+            if let Some(state_payload) = state_payload {
+                let graph: GraphUpdate = Self::deserialize(&state_payload)?;
+                let action_node_ids: Vec<Uuid> = graph
+                    .nodes
+                    .iter()
+                    .filter_map(|(node_id, node)| node.is_action_call().then_some(*node_id))
+                    .collect();
+                if !action_node_ids.is_empty() {
+                    all_action_node_ids.extend(action_node_ids.iter().copied());
+                    action_node_ids_by_instance.insert(instance_id, action_node_ids);
+                }
+                instance.state = Some(RunnerState::new(
+                    None,
+                    Some(graph.nodes),
+                    Some(graph.edges),
+                    false,
+                ));
+            }
+            instances.push(instance);
+        }
+
+        if !all_action_node_ids.is_empty() {
+            all_action_node_ids.sort_unstable();
+            all_action_node_ids.dedup();
+
+            Self::count_query(
+                &self.query_counts,
+                "select:runner_actions_done_by_execution_id",
+            );
+            let rows = sqlx::query(
+                r#"
+                SELECT DISTINCT ON (execution_id)
+                    execution_id,
+                    result
+                FROM runner_actions_done
+                WHERE execution_id = ANY($1)
+                ORDER BY execution_id, attempt DESC, id DESC
+                "#,
+            )
+            .bind(&all_action_node_ids)
+            .fetch_all(&self.pool)
+            .await?;
+
+            let mut action_results_by_execution_id: HashMap<Uuid, serde_json::Value> =
+                HashMap::new();
+            for row in rows {
+                let execution_id: Uuid = row.get("execution_id");
+                let result_payload: Option<Vec<u8>> = row.get("result");
+                let Some(result_payload) = result_payload else {
+                    continue;
+                };
+                let result: serde_json::Value = Self::deserialize(&result_payload)?;
+                action_results_by_execution_id.insert(execution_id, result);
+            }
+
+            for instance in &mut instances {
+                let Some(action_node_ids) = action_node_ids_by_instance.get(&instance.instance_id)
+                else {
+                    continue;
+                };
+                for node_id in action_node_ids {
+                    if let Some(result) = action_results_by_execution_id.get(node_id) {
+                        instance.action_results.insert(*node_id, result.clone());
+                    }
+                }
+            }
+        }
+
+        Ok(QueuedInstanceBatch { instances })
+    }
+
+    #[obs]
+    async fn save_instances_done_impl(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        retry_transient_backend("save_instances_done_impl", || async move {
+            self.save_instances_done_once(instances).await
+        })
+        .await
+    }
+
+    async fn save_instances_done_once(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        if instances.is_empty() {
+            return Ok(());
+        }
+        let ids: Vec<Uuid> = instances
+            .iter()
+            .map(|instance| instance.executor_id)
+            .collect();
+
+        let mut tx = self.pool.begin().await?;
+        Self::count_query(&self.query_counts, "delete:queued_instances_by_id");
+        sqlx::query("DELETE FROM queued_instances WHERE instance_id = ANY($1)")
+            .bind(&ids)
+            .execute(&mut *tx)
+            .await?;
+
+        Self::count_query(&self.query_counts, "update:runner_instances_result");
+        Self::count_batch_size(
+            &self.batch_size_counts,
+            "update:runner_instances_result",
+            instances.len(),
+        );
+        let mut payloads = Vec::with_capacity(instances.len());
+        for instance in instances {
+            let current_status = instance_done_status(instance);
+            let result = match &instance.result {
+                Some(value) => Some(Self::serialize(value)?),
+                None => None,
+            };
+            let error = match &instance.error {
+                Some(value) => Some(Self::serialize(value)?),
+                None => None,
+            };
+            payloads.push((instance.executor_id, current_status, result, error));
+        }
+        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            "UPDATE runner_instances AS ri SET result = v.result, error = v.error, current_status = v.current_status FROM (",
+        );
+        builder.push_values(
+            payloads.iter(),
+            |mut b, (instance_id, current_status, result, error)| {
+                b.push_bind(*instance_id)
+                    .push_bind(*current_status)
+                    .push_bind(result.as_deref())
+                    .push_bind(error.as_deref());
+            },
+        );
+        builder.push(
+            ") AS v(instance_id, current_status, result, error) WHERE ri.instance_id = v.instance_id",
+        );
+        builder.build().execute(&mut *tx).await?;
+        tx.commit().await?;
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+impl waymark_core_backend::CoreBackend for PostgresBackend {
+    fn clone_box(&self) -> Box<dyn waymark_core_backend::CoreBackend> {
+        Box::new(self.clone())
+    }
+
+    async fn save_graphs(
+        &self,
+        claim: waymark_core_backend::LockClaim,
+        graphs: &[waymark_core_backend::GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        self.save_graphs_impl(claim, graphs).await
+    }
+
+    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
+        self.save_actions_done_impl(actions).await
+    }
+
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        self.get_queued_instances_impl(size, claim).await
+    }
+
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        self.save_instances_done_impl(instances).await
+    }
+
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        retry_transient_backend("refresh_instance_locks", || {
+            let claim = claim.clone();
+            async move { self.refresh_instance_locks_once(claim, instance_ids).await }
+        })
+        .await
+    }
+
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()> {
+        if instance_ids.is_empty() {
+            return Ok(());
+        }
+        Self::count_query(&self.query_counts, "update:queued_instances_release");
+        let released_rows = sqlx::query(
+            r#"
+            WITH releasable AS (
+                SELECT instance_id
+                FROM queued_instances
+                WHERE instance_id = ANY($1)
+                  AND lock_uuid = $2
+                FOR UPDATE SKIP LOCKED
+            ),
+            released AS (
+                UPDATE queued_instances AS qi
+                SET lock_uuid = NULL,
+                    lock_expires_at = NULL
+                FROM releasable
+                WHERE qi.instance_id = releasable.instance_id
+                RETURNING qi.instance_id
+            )
+            SELECT instance_id FROM released
+            "#,
+        )
+        .bind(instance_ids)
+        .bind(lock_uuid)
+        .fetch_all(&self.pool)
+        .await?;
+
+        if !released_rows.is_empty() {
+            let released_instance_ids: Vec<Uuid> = released_rows
+                .iter()
+                .map(|row| row.get("instance_id"))
+                .collect();
+            sqlx::query(
+                "UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1) AND result IS NULL AND error IS NULL",
+            )
+            .bind(&released_instance_ids)
+            .bind(INSTANCE_STATUS_QUEUED)
+            .execute(&self.pool)
+            .await?;
+        }
+
+        Ok(())
+    }
+
+    async fn queue_instances(
+        &self,
+        instances: &[waymark_core_backend::QueuedInstance],
+    ) -> BackendResult<()> {
+        PostgresBackend::queue_instances(self, instances).await
+    }
+}
+
+impl PostgresBackend {
+    async fn refresh_instance_locks_once(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        if instance_ids.is_empty() {
+            return Ok(Vec::new());
+        }
+        Self::count_query(&self.query_counts, "update:queued_instances_lock");
+        sqlx::query(
+            r#"
+            WITH claimable AS (
+                SELECT instance_id
+                FROM queued_instances
+                WHERE instance_id = ANY($2)
+                  AND lock_uuid = $3
+                FOR UPDATE SKIP LOCKED
+            )
+            UPDATE queued_instances AS qi
+            SET lock_expires_at = $1
+            FROM claimable
+            WHERE qi.instance_id = claimable.instance_id
+            "#,
+        )
+        .bind(claim.lock_expires_at)
+        .bind(instance_ids)
+        .bind(claim.lock_uuid)
+        .execute(&self.pool)
+        .await?;
+        let rows = sqlx::query(
+            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = ANY($1)",
+        )
+        .bind(instance_ids)
+        .fetch_all(&self.pool)
+        .await?;
+        let mut locks = Vec::with_capacity(rows.len());
+        for row in rows {
+            locks.push(InstanceLockStatus {
+                instance_id: row.get(0),
+                lock_uuid: row.get(1),
+                lock_expires_at: row.get(2),
+            });
+        }
+        Ok(locks)
+    }
+}
+
+#[async_trait::async_trait]
+impl GarbageCollectorBackend for PostgresBackend {
+    async fn collect_done_instances(
+        &self,
+        older_than: DateTime<Utc>,
+        limit: usize,
+    ) -> BackendResult<GarbageCollectionResult> {
+        self.collect_done_instances_impl(older_than, limit).await
+    }
+}
+
+#[async_trait::async_trait]
+impl WorkerStatusBackend for PostgresBackend {
+    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
+        PostgresBackend::upsert_worker_status(self, status).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::{HashMap, HashSet};
+    use std::sync::Arc;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration as StdDuration;
+
+    use chrono::{DateTime, Duration, Utc};
+    use serial_test::serial;
+    use sqlx::Row;
+    use uuid::Uuid;
+    use waymark_core_backend::{ActionAttemptStatus, CoreBackend};
+
+    use super::super::test_helpers::setup_backend;
+    use super::*;
+
+    use waymark_dag::EdgeType;
+    use waymark_runner_state::{ActionCallSpec, ExecutionNode, NodeStatus};
+
+    fn sample_runner_state() -> RunnerState {
+        RunnerState::new(None, None, None, false)
+    }
+
+    fn sample_queued_instance(instance_id: Uuid, entry_node: Uuid) -> QueuedInstance {
+        QueuedInstance {
+            workflow_version_id: Uuid::new_v4(),
+            schedule_id: None,
+            dag: None,
+            entry_node,
+            state: Some(sample_runner_state()),
+            action_results: HashMap::new(),
+            instance_id,
+            scheduled_at: Some(Utc::now() - Duration::seconds(1)),
+        }
+    }
+
+    fn sample_execution_node(node_id: Uuid) -> ExecutionNode {
+        ExecutionNode {
+            node_id,
+            node_type: "action_call".to_string(),
+            label: "@tests.action()".to_string(),
+            status: NodeStatus::Queued,
+            template_id: Some("n0".to_string()),
+            targets: Vec::new(),
+            action: Some(ActionCallSpec {
+                action_name: "tests.action".to_string(),
+                module_name: Some("tests".to_string()),
+                kwargs: HashMap::new(),
+            }),
+            value_expr: None,
+            assignments: HashMap::new(),
+            action_attempt: 1,
+            started_at: None,
+            completed_at: None,
+            scheduled_at: Some(Utc::now() + Duration::seconds(15)),
+        }
+    }
+
+    fn sample_lock_claim() -> LockClaim {
+        LockClaim {
+            lock_uuid: Uuid::new_v4(),
+            lock_expires_at: Utc::now() + Duration::seconds(30),
+        }
+    }
+
+    async fn insert_workflow_version_row(
+        backend: &PostgresBackend,
+        workflow_version_id: Uuid,
+        workflow_name: &str,
+    ) {
+        sqlx::query(
+            "INSERT INTO workflow_versions (id, workflow_name, workflow_version, ir_hash, program_proto, concurrent) VALUES ($1, $2, $3, $4, $5, $6)",
+        )
+        .bind(workflow_version_id)
+        .bind(workflow_name)
+        .bind("v1")
+        .bind(format!("hash-{workflow_name}"))
+        .bind(vec![0_u8])
+        .bind(false)
+        .execute(backend.pool())
+        .await
+        .expect("insert workflow version row");
+    }
+
+    async fn claim_instance(backend: &PostgresBackend, instance_id: Uuid) -> LockClaim {
+        let claim = sample_lock_claim();
+        let batch = CoreBackend::get_queued_instances(backend, 10, claim.clone())
+            .await
+            .expect("claim queued instance");
+        assert_eq!(batch.instances.len(), 1);
+        assert_eq!(batch.instances[0].instance_id, instance_id);
+        claim
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_queue_instances_happy_path() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let queued = sample_queued_instance(instance_id, entry_node);
+        let expected_workflow_version_id = queued.workflow_version_id;
+
+        CoreBackend::queue_instances(&backend, &[queued])
+            .await
+            .expect("queue instances");
+
+        let queued_count: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM queued_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("queued count");
+        assert_eq!(queued_count, 1);
+
+        let runner_count: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("runner count");
+        assert_eq!(runner_count, 1);
+
+        let workflow_version_id: Option<Uuid> = sqlx::query_scalar(
+            "SELECT workflow_version_id FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner workflow version");
+        assert_eq!(workflow_version_id, Some(expected_workflow_version_id));
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner current status");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+
+        let queued_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("queued current status");
+        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_queue_instances_persists_workflow_name_when_registered() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let workflow_version_id = Uuid::new_v4();
+        insert_workflow_version_row(&backend, workflow_version_id, "tests.searchable").await;
+
+        let queued = QueuedInstance {
+            workflow_version_id,
+            schedule_id: None,
+            dag: None,
+            entry_node,
+            state: Some(sample_runner_state()),
+            action_results: HashMap::new(),
+            instance_id,
+            scheduled_at: Some(Utc::now()),
+        };
+
+        CoreBackend::queue_instances(&backend, &[queued])
+            .await
+            .expect("queue instances");
+
+        let runner_workflow_name: Option<String> =
+            sqlx::query_scalar("SELECT workflow_name FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("runner workflow_name");
+        assert_eq!(runner_workflow_name.as_deref(), Some("tests.searchable"));
+
+        let queued_workflow_name: Option<String> =
+            sqlx::query_scalar("SELECT workflow_name FROM queued_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("queued workflow_name");
+        assert_eq!(queued_workflow_name.as_deref(), Some("tests.searchable"));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_get_queued_instances_updates_runner_status_without_mutating_queue_status() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let queued = sample_queued_instance(instance_id, entry_node);
+        CoreBackend::queue_instances(&backend, &[queued])
+            .await
+            .expect("queue instances");
+
+        let claim = sample_lock_claim();
+        let batch = CoreBackend::get_queued_instances(&backend, 1, claim.clone())
+            .await
+            .expect("get queued instances");
+        assert_eq!(batch.instances.len(), 1);
+        assert_eq!(batch.instances[0].instance_id, instance_id);
+
+        let row = sqlx::query("SELECT lock_uuid FROM queued_instances WHERE instance_id = $1")
+            .bind(instance_id)
+            .fetch_one(backend.pool())
+            .await
+            .expect("queued lock row");
+        let lock_uuid: Option<Uuid> = row.get("lock_uuid");
+        assert_eq!(lock_uuid, Some(claim.lock_uuid));
+
+        let queued_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("queued current status");
+        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner current status");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_RUNNING));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_get_queued_instances_restores_action_results_from_actions_done() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+
+        let initial_claim = sample_lock_claim();
+        let initial_batch = CoreBackend::get_queued_instances(&backend, 1, initial_claim.clone())
+            .await
+            .expect("initial claim");
+        assert_eq!(initial_batch.instances.len(), 1);
+
+        let execution_id = Uuid::new_v4();
+        let mut completed_action_node = sample_execution_node(execution_id);
+        completed_action_node.status = NodeStatus::Completed;
+        completed_action_node.scheduled_at = None;
+
+        let graph = GraphUpdate {
+            instance_id,
+            nodes: HashMap::from([(execution_id, completed_action_node)]),
+            edges: std::collections::HashSet::new(),
+        };
+        CoreBackend::save_graphs(
+            &backend,
+            initial_claim.clone(),
+            std::slice::from_ref(&graph),
+        )
+        .await
+        .expect("persist graph");
+
+        CoreBackend::save_actions_done(
+            &backend,
+            &[ActionDone {
+                execution_id,
+                attempt: 1,
+                status: ActionAttemptStatus::Completed,
+                started_at: None,
+                completed_at: Some(Utc::now()),
+                duration_ms: None,
+                result: serde_json::json!({"ok": true}),
+            }],
+        )
+        .await
+        .expect("persist action result");
+
+        CoreBackend::release_instance_locks(&backend, initial_claim.lock_uuid, &[instance_id])
+            .await
+            .expect("release initial lock");
+
+        let queued_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("queued current status after release");
+        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner current status after release");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+
+        let second_claim = sample_lock_claim();
+        let batch = CoreBackend::get_queued_instances(&backend, 1, second_claim)
+            .await
+            .expect("rehydrate instance");
+        assert_eq!(batch.instances.len(), 1);
+        assert_eq!(
+            batch.instances[0].action_results.get(&execution_id),
+            Some(&serde_json::json!({"ok": true}))
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_save_graphs_happy_path() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+        let claim = claim_instance(&backend, instance_id).await;
+
+        let execution_id = Uuid::new_v4();
+        let mut nodes = HashMap::new();
+        nodes.insert(execution_id, sample_execution_node(execution_id));
+        let graph = GraphUpdate {
+            instance_id,
+            nodes,
+            edges: std::collections::HashSet::from([waymark_runner_state::ExecutionEdge {
+                source: execution_id,
+                target: execution_id,
+                edge_type: EdgeType::StateMachine,
+            }]),
+        };
+        let extended_claim = LockClaim {
+            lock_uuid: claim.lock_uuid,
+            lock_expires_at: claim.lock_expires_at + Duration::seconds(120),
+        };
+
+        let locks = CoreBackend::save_graphs(
+            &backend,
+            extended_claim.clone(),
+            std::slice::from_ref(&graph),
+        )
+        .await
+        .expect("save graphs");
+        assert_eq!(locks.len(), 1);
+        assert_eq!(locks[0].instance_id, instance_id);
+        assert_eq!(locks[0].lock_uuid, Some(claim.lock_uuid));
+        assert_eq!(
+            locks[0]
+                .lock_expires_at
+                .map(|value| value.timestamp_micros()),
+            Some(extended_claim.lock_expires_at.timestamp_micros()),
+        );
+
+        let state_payload: Option<Vec<u8>> =
+            sqlx::query_scalar("SELECT state FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("runner state payload");
+        let decoded: GraphUpdate = rmp_serde::from_slice(&state_payload.expect("state payload"))
+            .expect("decode graph update");
+        assert_eq!(decoded.nodes.len(), 1);
+        assert_eq!(decoded.edges.len(), 1);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_save_graphs_returns_lock_status_for_duplicate_instance_updates() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+        let claim = claim_instance(&backend, instance_id).await;
+
+        let first_node_id = Uuid::new_v4();
+        let second_node_id = Uuid::new_v4();
+        let first_graph = GraphUpdate {
+            instance_id,
+            nodes: HashMap::from([(first_node_id, sample_execution_node(first_node_id))]),
+            edges: HashSet::new(),
+        };
+        let second_graph = GraphUpdate {
+            instance_id,
+            nodes: HashMap::from([(second_node_id, sample_execution_node(second_node_id))]),
+            edges: HashSet::new(),
+        };
+
+        let locks = CoreBackend::save_graphs(
+            &backend,
+            claim.clone(),
+            &[first_graph.clone(), second_graph.clone()],
+        )
+        .await
+        .expect("save duplicate instance graphs");
+        assert_eq!(locks.len(), 2);
+        assert_eq!(locks[0].instance_id, instance_id);
+        assert_eq!(locks[1].instance_id, instance_id);
+        assert_eq!(locks[0].lock_uuid, Some(claim.lock_uuid));
+        assert_eq!(locks[1].lock_uuid, Some(claim.lock_uuid));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_save_actions_done_happy_path() {
+        let backend = setup_backend().await;
+        let execution_id = Uuid::new_v4();
+        CoreBackend::save_actions_done(
+            &backend,
+            &[ActionDone {
+                execution_id,
+                attempt: 1,
+                status: ActionAttemptStatus::Completed,
+                started_at: None,
+                completed_at: Some(Utc::now()),
+                duration_ms: None,
+                result: serde_json::json!({"ok": true}),
+            }],
+        )
+        .await
+        .expect("save actions done");
+
+        let row = sqlx::query(
+            "SELECT execution_id, attempt, status, started_at, completed_at, duration_ms, result FROM runner_actions_done WHERE execution_id = $1",
+        )
+        .bind(execution_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("action row");
+
+        assert_eq!(row.get::<Uuid, _>("execution_id"), execution_id);
+        assert_eq!(row.get::<i32, _>("attempt"), 1);
+        assert_eq!(row.get::<String, _>("status"), "completed");
+        assert!(
+            row.get::<Option<DateTime<Utc>>, _>("completed_at")
+                .is_some()
+        );
+        let payload: Option<Vec<u8>> = row.get("result");
+        let decoded: serde_json::Value =
+            rmp_serde::from_slice(&payload.expect("action payload")).expect("decode action");
+        assert_eq!(decoded, serde_json::json!({"ok": true}));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_refresh_instance_locks_happy_path() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+        let claim = claim_instance(&backend, instance_id).await;
+
+        let refreshed_expiry = Utc::now() + Duration::seconds(120);
+        let refreshed = CoreBackend::refresh_instance_locks(
+            &backend,
+            LockClaim {
+                lock_uuid: claim.lock_uuid,
+                lock_expires_at: refreshed_expiry,
+            },
+            &[instance_id],
+        )
+        .await
+        .expect("refresh locks");
+
+        assert_eq!(refreshed.len(), 1);
+        assert_eq!(refreshed[0].instance_id, instance_id);
+        assert_eq!(refreshed[0].lock_uuid, Some(claim.lock_uuid));
+        assert_eq!(
+            refreshed[0]
+                .lock_expires_at
+                .map(|value| value.timestamp_micros()),
+            Some(refreshed_expiry.timestamp_micros()),
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_refresh_instance_locks_skip_locked_does_not_block_or_override() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+        let claim = claim_instance(&backend, instance_id).await;
+
+        let mut tx = backend.pool().begin().await.expect("begin lock tx");
+        sqlx::query("SELECT instance_id FROM queued_instances WHERE instance_id = $1 FOR UPDATE")
+            .bind(instance_id)
+            .fetch_one(&mut *tx)
+            .await
+            .expect("lock queued row");
+
+        let refreshed_expiry = Utc::now() + Duration::seconds(120);
+        let refreshed = tokio::time::timeout(
+            StdDuration::from_millis(300),
+            CoreBackend::refresh_instance_locks(
+                &backend,
+                LockClaim {
+                    lock_uuid: claim.lock_uuid,
+                    lock_expires_at: refreshed_expiry,
+                },
+                &[instance_id],
+            ),
+        )
+        .await
+        .expect("refresh should not block")
+        .expect("refresh locks");
+
+        assert_eq!(refreshed.len(), 1);
+        assert_eq!(refreshed[0].instance_id, instance_id);
+        assert_eq!(refreshed[0].lock_uuid, Some(claim.lock_uuid));
+        assert_eq!(
+            refreshed[0]
+                .lock_expires_at
+                .map(|value| value.timestamp_micros()),
+            Some(claim.lock_expires_at.timestamp_micros()),
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_release_instance_locks_happy_path() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+        let claim = claim_instance(&backend, instance_id).await;
+
+        CoreBackend::release_instance_locks(&backend, claim.lock_uuid, &[instance_id])
+            .await
+            .expect("release locks");
+
+        let row = sqlx::query(
+            "SELECT lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("lock row");
+        let lock_uuid: Option<Uuid> = row.get("lock_uuid");
+        let lock_expires_at: Option<chrono::DateTime<Utc>> = row.get("lock_expires_at");
+        assert!(lock_uuid.is_none());
+        assert!(lock_expires_at.is_none());
+
+        let queued_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("queued current status after release");
+        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner current status after release");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_reclaim_expired_instance_locks_happy_path() {
+        let backend = setup_backend().await;
+        let expired_id = Uuid::new_v4();
+        let live_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(
+            &backend,
+            &[
+                sample_queued_instance(expired_id, entry_node),
+                sample_queued_instance(live_id, entry_node),
+            ],
+        )
+        .await
+        .expect("queue instances");
+
+        let claim = sample_lock_claim();
+        let claimed = CoreBackend::get_queued_instances(&backend, 10, claim.clone())
+            .await
+            .expect("claim queued instances");
+        assert_eq!(claimed.instances.len(), 2);
+
+        let expired_at = Utc::now() - Duration::seconds(1);
+        let live_at = Utc::now() + Duration::seconds(60);
+        sqlx::query(
+            r#"
+            UPDATE queued_instances
+            SET lock_expires_at = CASE
+                WHEN instance_id = $1 THEN $3
+                ELSE $4
+            END
+            WHERE instance_id IN ($1, $2)
+            "#,
+        )
+        .bind(expired_id)
+        .bind(live_id)
+        .bind(expired_at)
+        .bind(live_at)
+        .execute(backend.pool())
+        .await
+        .expect("set lock expiries");
+
+        let reclaimed = backend
+            .reclaim_expired_instance_locks(10)
+            .await
+            .expect("reclaim expired locks");
+        assert_eq!(reclaimed, 1);
+
+        let rows = sqlx::query(
+            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id IN ($1, $2)",
+        )
+        .bind(expired_id)
+        .bind(live_id)
+        .fetch_all(backend.pool())
+        .await
+        .expect("fetch lock rows");
+        let mut lock_rows: HashMap<Uuid, (Option<Uuid>, Option<chrono::DateTime<Utc>>)> =
+            HashMap::new();
+        for row in rows {
+            let instance_id: Uuid = row.get("instance_id");
+            let lock_uuid: Option<Uuid> = row.get("lock_uuid");
+            let lock_expires_at: Option<chrono::DateTime<Utc>> = row.get("lock_expires_at");
+            lock_rows.insert(instance_id, (lock_uuid, lock_expires_at));
+        }
+
+        let expired_lock = lock_rows.get(&expired_id).expect("expired lock row");
+        assert_eq!(*expired_lock, (None, None));
+
+        let expired_runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(expired_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("expired runner status");
+        assert_eq!(
+            expired_runner_status.as_deref(),
+            Some(INSTANCE_STATUS_QUEUED)
+        );
+
+        let live_lock = lock_rows.get(&live_id).expect("live lock row");
+        assert_eq!(live_lock.0, Some(claim.lock_uuid));
+        assert_eq!(
+            live_lock.1.map(|value| value.timestamp_micros()),
+            Some(live_at.timestamp_micros()),
+        );
+
+        let live_runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(live_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("live runner status");
+        assert_eq!(live_runner_status.as_deref(), Some(INSTANCE_STATUS_RUNNING));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_save_instances_done_happy_path() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+
+        CoreBackend::save_instances_done(
+            &backend,
+            &[InstanceDone {
+                executor_id: instance_id,
+                entry_node,
+                result: Some(serde_json::json!({"value": 3})),
+                error: None,
+            }],
+        )
+        .await
+        .expect("save instances done");
+
+        let result_payload: Option<Vec<u8>> =
+            sqlx::query_scalar("SELECT result FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("result payload");
+        let decoded: serde_json::Value =
+            rmp_serde::from_slice(&result_payload.expect("stored result")).expect("decode result");
+        assert_eq!(decoded, serde_json::json!({"value": 3}));
+
+        let queued_count: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM queued_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("queued count");
+        assert_eq!(queued_count, 0);
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner status");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_COMPLETED));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_save_instances_done_updates_runner_even_if_queue_row_missing() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
+            .await
+            .expect("queue instances");
+
+        sqlx::query("DELETE FROM queued_instances WHERE instance_id = $1")
+            .bind(instance_id)
+            .execute(backend.pool())
+            .await
+            .expect("delete queued row");
+
+        CoreBackend::save_instances_done(
+            &backend,
+            &[InstanceDone {
+                executor_id: instance_id,
+                entry_node,
+                result: Some(serde_json::json!({"value": 11})),
+                error: None,
+            }],
+        )
+        .await
+        .expect("save instances done without queue row");
+
+        let runner_status: Option<String> = sqlx::query_scalar(
+            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
+        )
+        .bind(instance_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("runner status");
+        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_COMPLETED));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_retry_transient_deadlock_sqlstate_happy_path() {
+        let backend = setup_backend().await;
+        let pool = backend.pool().clone();
+        let attempts = Arc::new(AtomicUsize::new(0));
+        let result = retry_transient_backend("core_retry_test", || {
+            let pool = pool.clone();
+            let attempts = Arc::clone(&attempts);
+            async move {
+                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
+                if attempt < 2 {
+                    sqlx::query(
+                        "DO $$ BEGIN RAISE EXCEPTION 'simulated deadlock' USING ERRCODE='40P01'; END $$;",
+                    )
+                    .execute(&pool)
+                    .await?;
+                }
+                Ok(())
+            }
+        })
+        .await;
+
+        assert!(result.is_ok());
+        assert_eq!(attempts.load(Ordering::SeqCst), 3);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn core_retry_non_transient_sqlstate_fails_without_retry() {
+        let backend = setup_backend().await;
+        let pool = backend.pool().clone();
+        let attempts = Arc::new(AtomicUsize::new(0));
+        let result = retry_transient_backend("core_retry_non_transient_test", || {
+            let pool = pool.clone();
+            let attempts = Arc::clone(&attempts);
+            async move {
+                attempts.fetch_add(1, Ordering::SeqCst);
+                sqlx::query(
+                    "DO $$ BEGIN RAISE EXCEPTION 'simulated unique violation' USING ERRCODE='23505'; END $$;",
+                )
+                .execute(&pool)
+                .await?;
+                Ok::<(), BackendError>(())
+            }
+        })
+        .await;
+
+        assert!(result.is_err());
+        assert_eq!(attempts.load(Ordering::SeqCst), 1);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn garbage_collector_deletes_old_done_instances_and_actions() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let execution_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let workflow_version_id = Uuid::new_v4();
+
+        let state = GraphUpdate {
+            instance_id,
+            nodes: HashMap::from([(execution_id, sample_execution_node(execution_id))]),
+            edges: HashSet::new(),
+        };
+        let state_payload = PostgresBackend::serialize(&state).expect("serialize state");
+        let result_payload =
+            PostgresBackend::serialize(&serde_json::json!({"ok": true})).expect("serialize done");
+        let action_payload =
+            PostgresBackend::serialize(&serde_json::json!({"value": 1})).expect("serialize action");
+
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, created_at, state, result) VALUES ($1, $2, $3, $4, $5, $6)",
+        )
+        .bind(instance_id)
+        .bind(entry_node)
+        .bind(workflow_version_id)
+        .bind(Utc::now() - Duration::hours(30))
+        .bind(state_payload)
+        .bind(result_payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert old done instance");
+
+        sqlx::query(
+            "INSERT INTO runner_actions_done (execution_id, attempt, status, result) VALUES ($1, $2, $3, $4)",
+        )
+        .bind(execution_id)
+        .bind(1_i32)
+        .bind("completed")
+        .bind(action_payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert action row");
+
+        let result = GarbageCollectorBackend::collect_done_instances(
+            &backend,
+            Utc::now() - Duration::hours(24),
+            100,
+        )
+        .await
+        .expect("collect done instances");
+
+        assert_eq!(result.deleted_instances, 1);
+        assert_eq!(result.deleted_actions, 1);
+
+        let remaining_instances: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("count instances");
+        assert_eq!(remaining_instances, 0);
+
+        let remaining_actions: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM runner_actions_done WHERE execution_id = $1")
+                .bind(execution_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("count actions");
+        assert_eq!(remaining_actions, 0);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn garbage_collector_keeps_recent_done_instances() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let workflow_version_id = Uuid::new_v4();
+        let state_payload = PostgresBackend::serialize(&GraphUpdate {
+            instance_id,
+            nodes: HashMap::new(),
+            edges: HashSet::new(),
+        })
+        .expect("serialize state");
+        let result_payload =
+            PostgresBackend::serialize(&serde_json::json!({"ok": true})).expect("serialize done");
+
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, created_at, state, result) VALUES ($1, $2, $3, $4, $5, $6)",
+        )
+        .bind(instance_id)
+        .bind(entry_node)
+        .bind(workflow_version_id)
+        .bind(Utc::now() - Duration::hours(1))
+        .bind(state_payload)
+        .bind(result_payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert recent done instance");
+
+        let result = GarbageCollectorBackend::collect_done_instances(
+            &backend,
+            Utc::now() - Duration::hours(24),
+            100,
+        )
+        .await
+        .expect("collect done instances");
+
+        assert_eq!(result.deleted_instances, 0);
+        assert_eq!(result.deleted_actions, 0);
+
+        let remaining_instances: i64 =
+            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
+                .bind(instance_id)
+                .fetch_one(backend.pool())
+                .await
+                .expect("count instances");
+        assert_eq!(remaining_instances, 1);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn worker_status_backend_upsert_worker_status_happy_path() {
+        let backend = setup_backend().await;
+        let pool_id = Uuid::new_v4();
+
+        WorkerStatusBackend::upsert_worker_status(
+            &backend,
+            &WorkerStatusUpdate {
+                pool_id,
+                throughput_per_min: 180.0,
+                total_completed: 20,
+                last_action_at: Some(Utc::now()),
+                median_dequeue_ms: Some(5),
+                median_handling_ms: Some(12),
+                dispatch_queue_size: 3,
+                total_in_flight: 2,
+                active_workers: 4,
+                actions_per_sec: 3.0,
+                median_instance_duration_secs: Some(0.2),
+                active_instance_count: 1,
+                total_instances_completed: 8,
+                instances_per_sec: 0.5,
+                instances_per_min: 30.0,
+                time_series: None,
+            },
+        )
+        .await
+        .expect("upsert worker status");
+
+        let row = sqlx::query(
+            "SELECT total_completed, active_workers, actions_per_sec FROM worker_status WHERE pool_id = $1",
+        )
+        .bind(pool_id)
+        .fetch_one(backend.pool())
+        .await
+        .expect("worker status row");
+        assert_eq!(row.get::<i64, _>("total_completed"), 20);
+        assert_eq!(row.get::<i32, _>("active_workers"), 4);
+        assert_eq!(row.get::<f64, _>("actions_per_sec"), 3.0);
+    }
+}
diff --git a/crates/backend-postgres/src/lib.rs b/crates/backend-postgres/src/lib.rs
new file mode 100644
index 00000000..2b4e3821
--- /dev/null
+++ b/crates/backend-postgres/src/lib.rs
@@ -0,0 +1,115 @@
+//! Postgres backend for persisting runner state and action results.
+
+mod core;
+mod registry;
+mod scheduler;
+#[cfg(test)]
+mod test_helpers;
+mod webapp;
+
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+
+use sqlx::PgPool;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_observability::obs;
+
+/// Persist runner state and action results in Postgres.
+#[derive(Clone)]
+pub struct PostgresBackend {
+    pool: PgPool,
+    query_counts: Arc<Mutex<HashMap<String, usize>>>,
+    batch_size_counts: Arc<Mutex<HashMap<String, HashMap<usize, usize>>>>,
+}
+
+impl PostgresBackend {
+    pub fn new(pool: PgPool) -> Self {
+        Self {
+            pool,
+            query_counts: Arc::new(Mutex::new(HashMap::new())),
+            batch_size_counts: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    #[obs]
+    pub async fn connect(dsn: &str) -> BackendResult<Self> {
+        let pool = PgPool::connect(dsn).await?;
+        waymark_backend_postgres_migrations::run(&pool)
+            .await
+            .map_err(|err| BackendError::Message(err.to_string()))?;
+        Ok(Self::new(pool))
+    }
+
+    pub fn pool(&self) -> &PgPool {
+        &self.pool
+    }
+
+    /// Delete all queued instances from the backing table.
+    #[obs]
+    pub async fn clear_queue(&self) -> BackendResult<()> {
+        Self::count_query(&self.query_counts, "delete:queued_instances_all");
+        sqlx::query("DELETE FROM queued_instances")
+            .execute(&self.pool)
+            .await?;
+        Ok(())
+    }
+
+    /// Delete all persisted runner data for a clean benchmark run.
+    #[obs]
+    pub async fn clear_all(&self) -> BackendResult<()> {
+        Self::count_query(&self.query_counts, "truncate:runner_tables");
+        sqlx::query(
+            r#"
+            TRUNCATE runner_actions_done,
+                     runner_instances,
+                     queued_instances
+            RESTART IDENTITY
+            "#,
+        )
+        .execute(&self.pool)
+        .await?;
+        Ok(())
+    }
+
+    pub fn query_counts(&self) -> HashMap<String, usize> {
+        self.query_counts
+            .lock()
+            .expect("query counts poisoned")
+            .clone()
+    }
+
+    pub fn batch_size_counts(&self) -> HashMap<String, HashMap<usize, usize>> {
+        self.batch_size_counts
+            .lock()
+            .expect("batch size counts poisoned")
+            .clone()
+    }
+
+    pub(crate) fn count_query(counts: &Arc<Mutex<HashMap<String, usize>>>, label: &str) {
+        let mut guard = counts.lock().expect("query counts poisoned");
+        *guard.entry(label.to_string()).or_insert(0) += 1;
+    }
+
+    pub(crate) fn count_batch_size(
+        counts: &Arc<Mutex<HashMap<String, HashMap<usize, usize>>>>,
+        label: &str,
+        size: usize,
+    ) {
+        if size == 0 {
+            return;
+        }
+        let mut guard = counts.lock().expect("batch size counts poisoned");
+        let entry = guard.entry(label.to_string()).or_default();
+        *entry.entry(size).or_insert(0) += 1;
+    }
+
+    pub(crate) fn serialize<T: serde::Serialize>(value: &T) -> Result<Vec<u8>, BackendError> {
+        rmp_serde::to_vec_named(value).map_err(|e| BackendError::Message(e.to_string()))
+    }
+
+    pub(crate) fn deserialize<T: serde::de::DeserializeOwned>(
+        payload: &[u8],
+    ) -> Result<T, BackendError> {
+        rmp_serde::from_slice(payload).map_err(|e| BackendError::Message(e.to_string()))
+    }
+}
diff --git a/crates/backend-postgres/src/registry.rs b/crates/backend-postgres/src/registry.rs
new file mode 100644
index 00000000..94fc1e2c
--- /dev/null
+++ b/crates/backend-postgres/src/registry.rs
@@ -0,0 +1,146 @@
+use sqlx::Row;
+use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
+
+use super::PostgresBackend;
+
+#[async_trait::async_trait]
+impl WorkflowRegistryBackend for PostgresBackend {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid> {
+        let inserted = sqlx::query(
+            r#"
+            INSERT INTO workflow_versions
+                (workflow_name, workflow_version, ir_hash, program_proto, concurrent)
+            VALUES ($1, $2, $3, $4, $5)
+            ON CONFLICT (workflow_name, workflow_version)
+            DO NOTHING
+            RETURNING id
+            "#,
+        )
+        .bind(&registration.workflow_name)
+        .bind(&registration.workflow_version)
+        .bind(&registration.ir_hash)
+        .bind(&registration.program_proto)
+        .bind(registration.concurrent)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        if let Some(row) = inserted {
+            let id: Uuid = row.get("id");
+            return Ok(id);
+        }
+
+        let row = sqlx::query(
+            r#"
+            SELECT id, ir_hash
+            FROM workflow_versions
+            WHERE workflow_name = $1 AND workflow_version = $2
+            "#,
+        )
+        .bind(&registration.workflow_name)
+        .bind(&registration.workflow_version)
+        .fetch_one(&self.pool)
+        .await?;
+
+        let id: Uuid = row.get("id");
+        let existing_hash: String = row.get("ir_hash");
+        if existing_hash != registration.ir_hash {
+            return Err(BackendError::Message(format!(
+                "workflow version already exists with different IR hash: {}@{}",
+                registration.workflow_name, registration.workflow_version
+            )));
+        }
+
+        Ok(id)
+    }
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
+        if ids.is_empty() {
+            return Ok(Vec::new());
+        }
+        let rows = sqlx::query(
+            r#"
+            SELECT id, workflow_name, workflow_version, ir_hash, program_proto, concurrent
+            FROM workflow_versions
+            WHERE id = ANY($1)
+            "#,
+        )
+        .bind(ids)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut versions = Vec::with_capacity(rows.len());
+        for row in rows {
+            versions.push(WorkflowVersion {
+                id: row.get("id"),
+                workflow_name: row.get("workflow_name"),
+                workflow_version: row.get("workflow_version"),
+                ir_hash: row.get("ir_hash"),
+                program_proto: row.get("program_proto"),
+                concurrent: row.get("concurrent"),
+            });
+        }
+        Ok(versions)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serial_test::serial;
+
+    use super::super::test_helpers::setup_backend;
+    use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
+
+    fn sample_registration(version: &str) -> WorkflowRegistration {
+        WorkflowRegistration {
+            workflow_name: "tests.workflow".to_string(),
+            workflow_version: version.to_string(),
+            ir_hash: format!("hash-{version}"),
+            program_proto: vec![1, 2, 3, 4],
+            concurrent: true,
+        }
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn workflow_registry_upsert_workflow_version_happy_path() {
+        let backend = setup_backend().await;
+        let registration = sample_registration("v1");
+
+        let id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
+            .await
+            .expect("insert workflow version");
+        let repeat_id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
+            .await
+            .expect("idempotent workflow upsert");
+
+        assert_eq!(id, repeat_id);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn workflow_registry_get_workflow_versions_happy_path() {
+        let backend = setup_backend().await;
+        let registration = sample_registration("v2");
+        let id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
+            .await
+            .expect("insert workflow version");
+
+        let versions = WorkflowRegistryBackend::get_workflow_versions(&backend, &[id])
+            .await
+            .expect("get workflow versions");
+        assert_eq!(versions.len(), 1);
+        assert_eq!(versions[0].id, id);
+        assert_eq!(versions[0].workflow_name, registration.workflow_name);
+        assert_eq!(versions[0].workflow_version, registration.workflow_version);
+        assert_eq!(versions[0].ir_hash, registration.ir_hash);
+        assert_eq!(versions[0].program_proto, registration.program_proto);
+        assert_eq!(versions[0].concurrent, registration.concurrent);
+    }
+}
diff --git a/crates/backend-postgres/src/scheduler.rs b/crates/backend-postgres/src/scheduler.rs
new file mode 100644
index 00000000..e47f2114
--- /dev/null
+++ b/crates/backend-postgres/src/scheduler.rs
@@ -0,0 +1,605 @@
+use chrono::{DateTime, Utc};
+use sqlx::Row;
+use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_scheduler_backend::SchedulerBackend;
+
+use waymark_scheduler_core::compute_next_run;
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
+
+#[async_trait::async_trait]
+impl SchedulerBackend for crate::PostgresBackend {
+    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
+        let next_run_at = compute_next_run(
+            params.schedule_type,
+            params.cron_expression.as_deref(),
+            params.interval_seconds,
+            params.jitter_seconds,
+            None,
+        )
+        .map_err(BackendError::Message)?;
+
+        let row = sqlx::query(
+            r#"
+            INSERT INTO workflow_schedules
+                (workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                 jitter_seconds, input_payload, next_run_at, priority, allow_duplicate)
+            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
+            ON CONFLICT (workflow_name, schedule_name)
+            DO UPDATE SET
+                schedule_type = EXCLUDED.schedule_type,
+                cron_expression = EXCLUDED.cron_expression,
+                interval_seconds = EXCLUDED.interval_seconds,
+                jitter_seconds = EXCLUDED.jitter_seconds,
+                input_payload = EXCLUDED.input_payload,
+                next_run_at = COALESCE(workflow_schedules.next_run_at, EXCLUDED.next_run_at),
+                priority = EXCLUDED.priority,
+                allow_duplicate = EXCLUDED.allow_duplicate,
+                status = 'active',
+                updated_at = NOW()
+            RETURNING id
+            "#,
+        )
+        .bind(&params.workflow_name)
+        .bind(&params.schedule_name)
+        .bind(params.schedule_type.as_str())
+        .bind(&params.cron_expression)
+        .bind(params.interval_seconds)
+        .bind(params.jitter_seconds)
+        .bind(&params.input_payload)
+        .bind(next_run_at)
+        .bind(params.priority)
+        .bind(params.allow_duplicate)
+        .fetch_one(&self.pool)
+        .await?;
+
+        let id: Uuid = row.get("id");
+        Ok(ScheduleId(id))
+    }
+
+    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
+        let schedule = sqlx::query_as::<_, ScheduleRow>(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
+                   created_at, updated_at, priority, allow_duplicate
+            FROM workflow_schedules
+            WHERE id = $1
+            "#,
+        )
+        .bind(id.0)
+        .fetch_optional(&self.pool)
+        .await?
+        .ok_or_else(|| BackendError::Message(format!("schedule not found: {}", id)))?;
+
+        Ok(schedule.into())
+    }
+
+    async fn get_schedule_by_name(
+        &self,
+        workflow_name: &str,
+        schedule_name: &str,
+    ) -> BackendResult<Option<WorkflowSchedule>> {
+        let schedule = sqlx::query_as::<_, ScheduleRow>(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
+                   created_at, updated_at, priority, allow_duplicate
+            FROM workflow_schedules
+            WHERE workflow_name = $1 AND schedule_name = $2 AND status != 'deleted'
+            "#,
+        )
+        .bind(workflow_name)
+        .bind(schedule_name)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        Ok(schedule.map(Into::into))
+    }
+
+    async fn list_schedules(
+        &self,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<WorkflowSchedule>> {
+        let rows = sqlx::query_as::<_, ScheduleRow>(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
+                   created_at, updated_at, priority, allow_duplicate
+            FROM workflow_schedules
+            WHERE status != 'deleted'
+            ORDER BY workflow_name, schedule_name
+            LIMIT $1 OFFSET $2
+            "#,
+        )
+        .bind(limit)
+        .bind(offset)
+        .fetch_all(&self.pool)
+        .await?;
+
+        Ok(rows.into_iter().map(Into::into).collect())
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let count = sqlx::query_scalar::<_, i64>(
+            "SELECT COUNT(*) FROM workflow_schedules WHERE status != 'deleted'",
+        )
+        .fetch_one(&self.pool)
+        .await?;
+
+        Ok(count)
+    }
+
+    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
+        let result = sqlx::query(
+            r#"
+            UPDATE workflow_schedules
+            SET status = $2, updated_at = NOW()
+            WHERE id = $1
+            "#,
+        )
+        .bind(id.0)
+        .bind(status)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(result.rows_affected() > 0)
+    }
+
+    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
+        SchedulerBackend::update_schedule_status(self, id, "deleted").await
+    }
+
+    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
+        let rows = sqlx::query_as::<_, ScheduleRow>(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
+                   created_at, updated_at, priority, allow_duplicate
+            FROM workflow_schedules
+            WHERE status = 'active'
+              AND next_run_at IS NOT NULL
+              AND next_run_at <= NOW()
+            ORDER BY next_run_at
+            FOR UPDATE SKIP LOCKED
+            LIMIT $1
+            "#,
+        )
+        .bind(limit)
+        .fetch_all(&self.pool)
+        .await?;
+
+        Ok(rows.into_iter().map(Into::into).collect())
+    }
+
+    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool> {
+        let has_running = sqlx::query_scalar::<_, bool>(
+            r#"
+            SELECT EXISTS(
+                SELECT 1
+                FROM runner_instances ri
+                JOIN queued_instances qi ON qi.instance_id = ri.instance_id
+                WHERE ri.schedule_id = $1
+            )
+            "#,
+        )
+        .bind(schedule_id.0)
+        .fetch_one(&self.pool)
+        .await?;
+
+        Ok(has_running)
+    }
+
+    async fn mark_schedule_executed(
+        &self,
+        schedule_id: ScheduleId,
+        instance_id: Uuid,
+    ) -> BackendResult<()> {
+        let schedule = SchedulerBackend::get_schedule(self, schedule_id).await?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+
+        sqlx::query(
+            r#"
+            UPDATE workflow_schedules
+            SET last_run_at = NOW(),
+                last_instance_id = $2,
+                next_run_at = $3,
+                updated_at = NOW()
+            WHERE id = $1
+            "#,
+        )
+        .bind(schedule_id.0)
+        .bind(instance_id)
+        .bind(next_run_at)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(())
+    }
+
+    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
+        let schedule = SchedulerBackend::get_schedule(self, schedule_id).await?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+
+        sqlx::query(
+            r#"
+            UPDATE workflow_schedules
+            SET next_run_at = $2, updated_at = NOW()
+            WHERE id = $1
+            "#,
+        )
+        .bind(schedule_id.0)
+        .bind(next_run_at)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(())
+    }
+}
+
+#[derive(sqlx::FromRow)]
+struct ScheduleRow {
+    id: Uuid,
+    workflow_name: String,
+    schedule_name: String,
+    schedule_type: String,
+    cron_expression: Option<String>,
+    interval_seconds: Option<i64>,
+    jitter_seconds: i64,
+    input_payload: Option<Vec<u8>>,
+    status: String,
+    next_run_at: Option<DateTime<Utc>>,
+    last_run_at: Option<DateTime<Utc>>,
+    last_instance_id: Option<Uuid>,
+    created_at: DateTime<Utc>,
+    updated_at: DateTime<Utc>,
+    priority: i32,
+    allow_duplicate: bool,
+}
+
+impl From<ScheduleRow> for WorkflowSchedule {
+    fn from(row: ScheduleRow) -> Self {
+        Self {
+            id: row.id,
+            workflow_name: row.workflow_name,
+            schedule_name: row.schedule_name,
+            schedule_type: row.schedule_type,
+            cron_expression: row.cron_expression,
+            interval_seconds: row.interval_seconds,
+            jitter_seconds: row.jitter_seconds,
+            input_payload: row.input_payload,
+            status: row.status,
+            next_run_at: row.next_run_at,
+            last_run_at: row.last_run_at,
+            last_instance_id: row.last_instance_id,
+            created_at: row.created_at,
+            updated_at: row.updated_at,
+            priority: row.priority,
+            allow_duplicate: row.allow_duplicate,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use chrono::Utc;
+    use serial_test::serial;
+    use sqlx::Row;
+    use uuid::Uuid;
+
+    use crate::PostgresBackend;
+
+    use super::super::test_helpers::setup_backend;
+    use super::*;
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_scheduler_core::CreateScheduleParams;
+
+    fn sample_params(schedule_name: &str) -> CreateScheduleParams {
+        CreateScheduleParams {
+            workflow_name: "tests.workflow".to_string(),
+            schedule_name: schedule_name.to_string(),
+            schedule_type: ScheduleType::Interval,
+            cron_expression: None,
+            interval_seconds: Some(60),
+            jitter_seconds: 0,
+            input_payload: Some(vec![1, 2, 3]),
+            priority: 3,
+            allow_duplicate: true,
+        }
+    }
+
+    async fn insert_schedule(backend: &PostgresBackend, schedule_name: &str) -> ScheduleId {
+        SchedulerBackend::upsert_schedule(backend, &sample_params(schedule_name))
+            .await
+            .expect("upsert schedule")
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_upsert_schedule_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "upsert").await;
+        let row = sqlx::query("SELECT id FROM workflow_schedules WHERE id = $1")
+            .bind(id.0)
+            .fetch_one(backend.pool())
+            .await
+            .expect("select schedule");
+
+        assert_eq!(row.get::<Uuid, _>("id"), id.0);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_upsert_schedule_preserves_existing_next_run_at() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "preserve-next-run").await;
+        sqlx::query(
+            "UPDATE workflow_schedules SET next_run_at = NOW() + INTERVAL '2 days' WHERE id = $1",
+        )
+        .bind(id.0)
+        .execute(backend.pool())
+        .await
+        .expect("force next_run_at");
+
+        let before: Option<chrono::DateTime<Utc>> =
+            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
+                .bind(id.0)
+                .fetch_one(backend.pool())
+                .await
+                .expect("select next_run_at before");
+
+        let upserted_id =
+            SchedulerBackend::upsert_schedule(&backend, &sample_params("preserve-next-run"))
+                .await
+                .expect("upsert existing schedule");
+        assert_eq!(upserted_id.0, id.0);
+
+        let after: Option<chrono::DateTime<Utc>> =
+            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
+                .bind(id.0)
+                .fetch_one(backend.pool())
+                .await
+                .expect("select next_run_at after");
+
+        assert_eq!(after, before);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_get_schedule_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "get").await;
+        let schedule = SchedulerBackend::get_schedule(&backend, id)
+            .await
+            .expect("get schedule");
+
+        assert_eq!(schedule.id, id.0);
+        assert_eq!(schedule.schedule_name, "get");
+        assert_eq!(schedule.workflow_name, "tests.workflow");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_get_schedule_by_name_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "by-name").await;
+        let schedule =
+            SchedulerBackend::get_schedule_by_name(&backend, "tests.workflow", "by-name")
+                .await
+                .expect("get schedule by name")
+                .expect("expected schedule");
+
+        assert_eq!(schedule.id, id.0);
+        assert_eq!(schedule.schedule_name, "by-name");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_list_schedules_happy_path() {
+        let backend = setup_backend().await;
+
+        insert_schedule(&backend, "a-list").await;
+        insert_schedule(&backend, "b-list").await;
+
+        let schedules = SchedulerBackend::list_schedules(&backend, 10, 0)
+            .await
+            .expect("list schedules");
+
+        assert_eq!(schedules.len(), 2);
+        assert_eq!(schedules[0].schedule_name, "a-list");
+        assert_eq!(schedules[1].schedule_name, "b-list");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_count_schedules_happy_path() {
+        let backend = setup_backend().await;
+
+        insert_schedule(&backend, "count-a").await;
+        insert_schedule(&backend, "count-b").await;
+
+        let count = SchedulerBackend::count_schedules(&backend)
+            .await
+            .expect("count schedules");
+        assert_eq!(count, 2);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_update_schedule_status_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "status").await;
+        let updated = SchedulerBackend::update_schedule_status(&backend, id, "paused")
+            .await
+            .expect("update schedule status");
+        assert!(updated);
+
+        let status: String =
+            sqlx::query_scalar("SELECT status FROM workflow_schedules WHERE id = $1")
+                .bind(id.0)
+                .fetch_one(backend.pool())
+                .await
+                .expect("select status");
+        assert_eq!(status, "paused");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_delete_schedule_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "delete").await;
+        let deleted = SchedulerBackend::delete_schedule(&backend, id)
+            .await
+            .expect("delete schedule");
+        assert!(deleted);
+
+        let status: String =
+            sqlx::query_scalar("SELECT status FROM workflow_schedules WHERE id = $1")
+                .bind(id.0)
+                .fetch_one(backend.pool())
+                .await
+                .expect("select status");
+        assert_eq!(status, "deleted");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_find_due_schedules_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "due").await;
+        sqlx::query(
+            "UPDATE workflow_schedules SET next_run_at = NOW() - INTERVAL '1 minute' WHERE id = $1",
+        )
+        .bind(id.0)
+        .execute(backend.pool())
+        .await
+        .expect("force schedule due");
+
+        let due = SchedulerBackend::find_due_schedules(&backend, 10)
+            .await
+            .expect("find due schedules");
+        assert_eq!(due.len(), 1);
+        assert_eq!(due[0].id, id.0);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_has_running_instance_happy_path() {
+        let backend = setup_backend().await;
+
+        let has_running = SchedulerBackend::has_running_instance(&backend, ScheduleId::new())
+            .await
+            .expect("has running instance");
+        assert!(!has_running);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_has_running_instance_true_with_queued_instance() {
+        let backend = setup_backend().await;
+
+        let schedule_id = insert_schedule(&backend, "running-instance").await;
+        let instance_id = Uuid::new_v4();
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, schedule_id) VALUES ($1, $2, $3)",
+        )
+        .bind(instance_id)
+        .bind(Uuid::new_v4())
+        .bind(schedule_id.0)
+        .execute(backend.pool())
+        .await
+        .expect("insert runner instance");
+        sqlx::query("INSERT INTO queued_instances (instance_id, payload) VALUES ($1, $2)")
+            .bind(instance_id)
+            .bind(vec![0_u8])
+            .execute(backend.pool())
+            .await
+            .expect("insert queued instance");
+
+        let has_running = SchedulerBackend::has_running_instance(&backend, schedule_id)
+            .await
+            .expect("has running instance");
+        assert!(has_running);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_mark_schedule_executed_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "mark-executed").await;
+        let instance_id = Uuid::new_v4();
+        SchedulerBackend::mark_schedule_executed(&backend, id, instance_id)
+            .await
+            .expect("mark schedule executed");
+
+        let row = sqlx::query(
+            "SELECT last_instance_id, last_run_at, next_run_at FROM workflow_schedules WHERE id = $1",
+        )
+        .bind(id.0)
+        .fetch_one(backend.pool())
+        .await
+        .expect("select schedule");
+
+        let last_instance_id: Option<Uuid> = row.get("last_instance_id");
+        let last_run_at: Option<chrono::DateTime<Utc>> = row.get("last_run_at");
+        let next_run_at: Option<chrono::DateTime<Utc>> = row.get("next_run_at");
+
+        assert_eq!(last_instance_id, Some(instance_id));
+        assert!(last_run_at.is_some());
+        assert!(next_run_at.is_some());
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn scheduler_skip_schedule_run_happy_path() {
+        let backend = setup_backend().await;
+
+        let id = insert_schedule(&backend, "skip").await;
+        sqlx::query(
+            "UPDATE workflow_schedules SET next_run_at = NOW() - INTERVAL '1 minute' WHERE id = $1",
+        )
+        .bind(id.0)
+        .execute(backend.pool())
+        .await
+        .expect("force schedule due");
+
+        SchedulerBackend::skip_schedule_run(&backend, id)
+            .await
+            .expect("skip schedule run");
+
+        let next_run_at: Option<chrono::DateTime<Utc>> =
+            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
+                .bind(id.0)
+                .fetch_one(backend.pool())
+                .await
+                .expect("select next_run_at");
+        assert!(next_run_at.expect("next_run_at").gt(&Utc::now()));
+    }
+}
diff --git a/crates/backend-postgres/src/test_helpers.rs b/crates/backend-postgres/src/test_helpers.rs
new file mode 100644
index 00000000..addb1ad4
--- /dev/null
+++ b/crates/backend-postgres/src/test_helpers.rs
@@ -0,0 +1,27 @@
+use sqlx::PgPool;
+
+use super::PostgresBackend;
+use waymark_test_support::postgres_setup;
+
+pub(super) async fn setup_backend() -> PostgresBackend {
+    let pool = postgres_setup().await;
+    reset_database(&pool).await;
+    PostgresBackend::new(pool)
+}
+
+pub(super) async fn reset_database(pool: &PgPool) {
+    sqlx::query(
+        r#"
+        TRUNCATE runner_actions_done,
+                 queued_instances,
+                 runner_instances,
+                 workflow_versions,
+                 workflow_schedules,
+                 worker_status
+        RESTART IDENTITY CASCADE
+        "#,
+    )
+    .execute(pool)
+    .await
+    .expect("truncate postgres tables");
+}
diff --git a/crates/backend-postgres/src/webapp.rs b/crates/backend-postgres/src/webapp.rs
new file mode 100644
index 00000000..e3f50ced
--- /dev/null
+++ b/crates/backend-postgres/src/webapp.rs
@@ -0,0 +1,2329 @@
+use std::collections::HashMap;
+
+use chrono::{DateTime, Utc};
+use prost::Message;
+use serde_json::Value;
+use sqlx::{Postgres, QueryBuilder, Row};
+
+use uuid::Uuid;
+
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::GraphUpdate;
+use waymark_dag::{DAGNode, EdgeType, convert_to_dag};
+use waymark_proto::ast as ir;
+use waymark_runner::replay_action_kwargs;
+use waymark_runner_state::{
+    ActionCallSpec, ExecutionNode, NodeStatus, RunnerState, format_value, value_visitor::ValueExpr,
+};
+use waymark_webapp_core::{
+    ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView, InstanceDetail, InstanceStatus,
+    InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, ScheduleSummary, TimelineEntry,
+    WorkerActionRow, WorkerAggregateStats, WorkerStatus,
+};
+
+const INSTANCE_STATUS_FALLBACK_SQL: &str = r#"
+CASE
+    WHEN ri.error IS NOT NULL THEN 'failed'
+    WHEN ri.result IS NOT NULL THEN 'completed'
+    WHEN ri.state IS NOT NULL THEN 'running'
+    ELSE 'queued'
+END
+"#;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum InstanceSearchToken {
+    Term(String),
+    And,
+    Or,
+    LParen,
+    RParen,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum InstanceSearchExpr {
+    Term(String),
+    And(Box<InstanceSearchExpr>, Box<InstanceSearchExpr>),
+    Or(Box<InstanceSearchExpr>, Box<InstanceSearchExpr>),
+}
+
+struct InstanceSearchParser {
+    tokens: Vec<InstanceSearchToken>,
+    position: usize,
+}
+
+impl InstanceSearchParser {
+    fn new(tokens: Vec<InstanceSearchToken>) -> Self {
+        Self {
+            tokens,
+            position: 0,
+        }
+    }
+
+    fn parse(mut self) -> Option<InstanceSearchExpr> {
+        let expr = self.parse_or()?;
+        if self.position == self.tokens.len() {
+            Some(expr)
+        } else {
+            None
+        }
+    }
+
+    fn parse_or(&mut self) -> Option<InstanceSearchExpr> {
+        let mut expr = self.parse_and()?;
+        while self.consume_or() {
+            let rhs = self.parse_and()?;
+            expr = InstanceSearchExpr::Or(Box::new(expr), Box::new(rhs));
+        }
+        Some(expr)
+    }
+
+    fn parse_and(&mut self) -> Option<InstanceSearchExpr> {
+        let mut expr = self.parse_primary()?;
+        loop {
+            if self.consume_and() || self.peek_is_primary_start() {
+                let rhs = self.parse_primary()?;
+                expr = InstanceSearchExpr::And(Box::new(expr), Box::new(rhs));
+                continue;
+            }
+            break;
+        }
+        Some(expr)
+    }
+
+    fn parse_primary(&mut self) -> Option<InstanceSearchExpr> {
+        match self.peek()? {
+            InstanceSearchToken::Term(term) => {
+                let term = term.clone();
+                self.position += 1;
+                Some(InstanceSearchExpr::Term(term))
+            }
+            InstanceSearchToken::LParen => {
+                self.position += 1;
+                let expr = self.parse_or()?;
+                if !self.consume_rparen() {
+                    return None;
+                }
+                Some(expr)
+            }
+            InstanceSearchToken::And | InstanceSearchToken::Or | InstanceSearchToken::RParen => {
+                None
+            }
+        }
+    }
+
+    fn consume_and(&mut self) -> bool {
+        if matches!(self.peek(), Some(InstanceSearchToken::And)) {
+            self.position += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    fn consume_or(&mut self) -> bool {
+        if matches!(self.peek(), Some(InstanceSearchToken::Or)) {
+            self.position += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    fn consume_rparen(&mut self) -> bool {
+        if matches!(self.peek(), Some(InstanceSearchToken::RParen)) {
+            self.position += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    fn peek_is_primary_start(&self) -> bool {
+        matches!(
+            self.peek(),
+            Some(InstanceSearchToken::Term(_)) | Some(InstanceSearchToken::LParen)
+        )
+    }
+
+    fn peek(&self) -> Option<&InstanceSearchToken> {
+        self.tokens.get(self.position)
+    }
+}
+
+fn tokenize_instance_search(search: &str) -> Vec<InstanceSearchToken> {
+    let mut chars = search.chars().peekable();
+    let mut tokens = Vec::new();
+
+    while let Some(ch) = chars.peek().copied() {
+        if ch.is_whitespace() {
+            chars.next();
+            continue;
+        }
+        if ch == '(' {
+            chars.next();
+            tokens.push(InstanceSearchToken::LParen);
+            continue;
+        }
+        if ch == ')' {
+            chars.next();
+            tokens.push(InstanceSearchToken::RParen);
+            continue;
+        }
+        if ch == '"' {
+            chars.next();
+            let mut quoted = String::new();
+            for next in chars.by_ref() {
+                if next == '"' {
+                    break;
+                }
+                quoted.push(next);
+            }
+            if !quoted.is_empty() {
+                tokens.push(InstanceSearchToken::Term(quoted));
+            }
+            continue;
+        }
+
+        let mut term = String::new();
+        while let Some(next) = chars.peek().copied() {
+            if next.is_whitespace() || next == '(' || next == ')' {
+                break;
+            }
+            term.push(next);
+            chars.next();
+        }
+        if term.is_empty() {
+            continue;
+        }
+
+        match term.to_ascii_uppercase().as_str() {
+            "AND" => tokens.push(InstanceSearchToken::And),
+            "OR" => tokens.push(InstanceSearchToken::Or),
+            _ => tokens.push(InstanceSearchToken::Term(term)),
+        }
+    }
+
+    tokens
+}
+
+fn parse_instance_search_expr(search: &str) -> Option<InstanceSearchExpr> {
+    let trimmed = search.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let tokens = tokenize_instance_search(trimmed);
+    if tokens.is_empty() {
+        return None;
+    }
+
+    InstanceSearchParser::new(tokens)
+        .parse()
+        .or_else(|| Some(InstanceSearchExpr::Term(trimmed.to_string())))
+}
+
+fn push_instance_search_expr_sql(
+    builder: &mut QueryBuilder<'_, Postgres>,
+    expr: &InstanceSearchExpr,
+) {
+    match expr {
+        InstanceSearchExpr::Term(term) => {
+            let pattern = format!("%{term}%");
+            builder.push("(");
+            builder.push("COALESCE(ri.workflow_name, wv.workflow_name, '') ILIKE ");
+            builder.push_bind(pattern.clone());
+            builder.push(" OR COALESCE(ri.current_status, ");
+            builder.push(INSTANCE_STATUS_FALLBACK_SQL);
+            builder.push(", '') ILIKE ");
+            builder.push_bind(pattern);
+            builder.push(")");
+        }
+        InstanceSearchExpr::And(left, right) => {
+            builder.push("(");
+            push_instance_search_expr_sql(builder, left);
+            builder.push(" AND ");
+            push_instance_search_expr_sql(builder, right);
+            builder.push(")");
+        }
+        InstanceSearchExpr::Or(left, right) => {
+            builder.push("(");
+            push_instance_search_expr_sql(builder, left);
+            builder.push(" OR ");
+            push_instance_search_expr_sql(builder, right);
+            builder.push(")");
+        }
+    }
+}
+
+fn parse_instance_status(status: &str) -> Option<InstanceStatus> {
+    match status {
+        "queued" => Some(InstanceStatus::Queued),
+        "running" => Some(InstanceStatus::Running),
+        "completed" => Some(InstanceStatus::Completed),
+        "failed" => Some(InstanceStatus::Failed),
+        _ => None,
+    }
+}
+
+#[async_trait::async_trait]
+impl waymark_webapp_backend::WebappBackend for crate::PostgresBackend {
+    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64> {
+        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            r#"
+            SELECT COUNT(*)::BIGINT
+            FROM runner_instances ri
+            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
+            "#,
+        );
+
+        if let Some(search_expr) = search.and_then(parse_instance_search_expr) {
+            builder.push(" WHERE ");
+            push_instance_search_expr_sql(&mut builder, &search_expr);
+        }
+
+        let count: i64 = builder.build_query_scalar().fetch_one(&self.pool).await?;
+        Ok(count)
+    }
+
+    async fn list_instances(
+        &self,
+        search: Option<&str>,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<InstanceSummary>> {
+        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
+            r#"
+            SELECT
+                ri.instance_id,
+                ri.entry_node,
+                ri.created_at,
+                ri.state,
+                ri.result,
+                ri.error,
+                COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name,
+                COALESCE(ri.current_status,
+                    CASE
+                        WHEN ri.error IS NOT NULL THEN 'failed'
+                        WHEN ri.result IS NOT NULL THEN 'completed'
+                        WHEN ri.state IS NOT NULL THEN 'running'
+                        ELSE 'queued'
+                    END
+                ) AS current_status
+            FROM runner_instances ri
+            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
+            "#,
+        );
+        if let Some(search_expr) = search.and_then(parse_instance_search_expr) {
+            builder.push(" WHERE ");
+            push_instance_search_expr_sql(&mut builder, &search_expr);
+        }
+        builder.push(" ORDER BY ri.created_at DESC, ri.instance_id DESC LIMIT ");
+        builder.push_bind(limit);
+        builder.push(" OFFSET ");
+        builder.push_bind(offset);
+        let rows = builder.build().fetch_all(&self.pool).await?;
+
+        let mut instances = Vec::new();
+        for row in rows {
+            let instance_id: Uuid = row.get("instance_id");
+            let entry_node: Uuid = row.get("entry_node");
+            let created_at: DateTime<Utc> = row.get("created_at");
+            let state_bytes: Option<Vec<u8>> = row.get("state");
+            let result_bytes: Option<Vec<u8>> = row.get("result");
+            let error_bytes: Option<Vec<u8>> = row.get("error");
+            let workflow_name: Option<String> = row.get("workflow_name");
+            let current_status: Option<String> = row.get("current_status");
+
+            let status = current_status
+                .as_deref()
+                .and_then(parse_instance_status)
+                .unwrap_or_else(|| determine_status(&state_bytes, &result_bytes, &error_bytes));
+            let input_preview = extract_input_preview(&state_bytes);
+
+            instances.push(InstanceSummary {
+                id: instance_id,
+                entry_node,
+                created_at,
+                status,
+                workflow_name,
+                input_preview,
+            });
+        }
+
+        Ok(instances)
+    }
+
+    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
+        let row = sqlx::query(
+            r#"
+            SELECT
+                ri.instance_id,
+                ri.entry_node,
+                ri.created_at,
+                ri.state,
+                ri.result,
+                ri.error,
+                COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name,
+                COALESCE(ri.current_status,
+                    CASE
+                        WHEN ri.error IS NOT NULL THEN 'failed'
+                        WHEN ri.result IS NOT NULL THEN 'completed'
+                        WHEN ri.state IS NOT NULL THEN 'running'
+                        ELSE 'queued'
+                    END
+                ) AS current_status
+            FROM runner_instances ri
+            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
+            WHERE ri.instance_id = $1
+            "#,
+        )
+        .bind(instance_id)
+        .fetch_optional(&self.pool)
+        .await?
+        .ok_or_else(|| BackendError::Message(format!("instance not found: {}", instance_id)))?;
+
+        let instance_id: Uuid = row.get("instance_id");
+        let entry_node: Uuid = row.get("entry_node");
+        let created_at: DateTime<Utc> = row.get("created_at");
+        let state_bytes: Option<Vec<u8>> = row.get("state");
+        let result_bytes: Option<Vec<u8>> = row.get("result");
+        let error_bytes: Option<Vec<u8>> = row.get("error");
+        let workflow_name: Option<String> = row.get("workflow_name");
+        let current_status: Option<String> = row.get("current_status");
+
+        let status = current_status
+            .as_deref()
+            .and_then(parse_instance_status)
+            .unwrap_or_else(|| determine_status(&state_bytes, &result_bytes, &error_bytes));
+        let input_payload = format_input_payload(&state_bytes);
+        let result_payload = format_instance_result_payload(status, &result_bytes, &error_bytes);
+        let error_payload = format_error(&error_bytes);
+
+        Ok(InstanceDetail {
+            id: instance_id,
+            entry_node,
+            created_at,
+            status,
+            workflow_name,
+            input_payload,
+            result_payload,
+            error_payload,
+        })
+    }
+
+    async fn get_execution_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        let row = sqlx::query(
+            r#"
+            SELECT state FROM runner_instances WHERE instance_id = $1
+            "#,
+        )
+        .bind(instance_id)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        let Some(row) = row else {
+            return Ok(None);
+        };
+
+        let state_bytes: Option<Vec<u8>> = row.get("state");
+        let Some(state_bytes) = state_bytes else {
+            return Ok(None);
+        };
+
+        let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
+            .map_err(|e| BackendError::Message(format!("failed to decode state: {}", e)))?;
+
+        let nodes: Vec<ExecutionNodeView> = graph_update
+            .nodes
+            .values()
+            .map(|node| ExecutionNodeView {
+                id: node.node_id.to_string(),
+                node_type: node.node_type.clone(),
+                label: node.label.clone(),
+                status: format_node_status(&node.status),
+                action_name: node.action.as_ref().map(|a| a.action_name.clone()),
+                module_name: node.action.as_ref().and_then(|a| a.module_name.clone()),
+            })
+            .collect();
+
+        let edges: Vec<ExecutionEdgeView> = graph_update
+            .edges
+            .iter()
+            .map(|edge| ExecutionEdgeView {
+                source: edge.source.to_string(),
+                target: edge.target.to_string(),
+                edge_type: format!("{:?}", edge.edge_type),
+            })
+            .collect();
+
+        Ok(Some(ExecutionGraphView { nodes, edges }))
+    }
+
+    async fn get_workflow_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        let row = sqlx::query(
+            r#"
+            SELECT ri.state, wv.program_proto
+            FROM runner_instances ri
+            JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
+            WHERE ri.instance_id = $1
+            "#,
+        )
+        .bind(instance_id)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        let Some(row) = row else {
+            return Ok(None);
+        };
+
+        let program_proto: Vec<u8> = row.get("program_proto");
+        let program = ir::Program::decode(&program_proto[..])
+            .map_err(|err| BackendError::Message(format!("failed to decode workflow IR: {err}")))?;
+        let dag = convert_to_dag(&program).map_err(|err| {
+            BackendError::Message(format!("failed to convert workflow DAG: {err}"))
+        })?;
+
+        let mut template_statuses: HashMap<String, NodeStatus> = HashMap::new();
+        let state_bytes: Option<Vec<u8>> = row.get("state");
+        if let Some(state_bytes) = state_bytes {
+            let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
+                .map_err(|err| BackendError::Message(format!("failed to decode state: {err}")))?;
+
+            for node in graph_update.nodes.values() {
+                let Some(template_id) = node.template_id.as_ref() else {
+                    continue;
+                };
+                template_statuses
+                    .entry(template_id.clone())
+                    .and_modify(|existing| {
+                        *existing = merge_template_status(existing, &node.status);
+                    })
+                    .or_insert_with(|| node.status.clone());
+            }
+        }
+
+        let mut node_ids: Vec<String> = dag.nodes.keys().cloned().collect();
+        node_ids.sort();
+        let nodes: Vec<ExecutionNodeView> = node_ids
+            .into_iter()
+            .filter_map(|node_id| {
+                let node = dag.nodes.get(&node_id)?;
+                let status = template_statuses
+                    .get(&node_id)
+                    .map(format_node_status)
+                    .unwrap_or_else(|| "pending".to_string());
+                let (action_name, module_name) = match node {
+                    DAGNode::ActionCall(action) => {
+                        (Some(action.action_name.clone()), action.module_name.clone())
+                    }
+                    _ => (None, None),
+                };
+
+                Some(ExecutionNodeView {
+                    id: node_id,
+                    node_type: node.node_type().to_string(),
+                    label: node.label(),
+                    status,
+                    action_name,
+                    module_name,
+                })
+            })
+            .collect();
+
+        let edges: Vec<ExecutionEdgeView> = dag
+            .edges
+            .iter()
+            .filter(|edge| edge.edge_type == EdgeType::StateMachine)
+            .map(|edge| ExecutionEdgeView {
+                source: edge.source.clone(),
+                target: edge.target.clone(),
+                edge_type: if edge.is_loop_back {
+                    "state_machine_loop_back".to_string()
+                } else {
+                    "state_machine".to_string()
+                },
+            })
+            .collect();
+
+        Ok(Some(ExecutionGraphView { nodes, edges }))
+    }
+
+    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
+        let row = sqlx::query(
+            r#"
+            SELECT state
+            FROM runner_instances
+            WHERE instance_id = $1
+            "#,
+        )
+        .bind(instance_id)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        let Some(row) = row else {
+            return Ok(Vec::new());
+        };
+        let state_bytes: Option<Vec<u8>> = row.get("state");
+        let Some(state_bytes) = state_bytes else {
+            return Ok(Vec::new());
+        };
+        let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
+            .map_err(|e| BackendError::Message(format!("failed to decode state: {}", e)))?;
+
+        let runner_state = RunnerState::new(
+            None,
+            Some(graph_update.nodes.clone()),
+            Some(graph_update.edges),
+            false,
+        );
+        let action_nodes: HashMap<Uuid, ExecutionNode> = graph_update
+            .nodes
+            .into_iter()
+            .filter(|(_, node)| node.is_action_call())
+            .collect();
+        if action_nodes.is_empty() {
+            return Ok(Vec::new());
+        }
+        let execution_ids: Vec<Uuid> = action_nodes.keys().copied().collect();
+
+        let rows = sqlx::query(
+            r#"
+            SELECT created_at, execution_id, attempt, status, started_at, completed_at, duration_ms, result
+            FROM runner_actions_done
+            WHERE execution_id = ANY($1)
+            ORDER BY created_at ASC, attempt ASC
+            "#,
+        )
+        .bind(&execution_ids)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut decoded_rows = Vec::with_capacity(rows.len());
+        for row in rows {
+            let created_at: DateTime<Utc> = row.get("created_at");
+            let execution_id: Uuid = row.get("execution_id");
+            let attempt: i32 = row.get("attempt");
+            let status: Option<String> = row.get("status");
+            let started_at: Option<DateTime<Utc>> = row.get("started_at");
+            let completed_at: Option<DateTime<Utc>> = row.get("completed_at");
+            let duration_ms: Option<i64> = row.get("duration_ms");
+            let result_bytes: Option<Vec<u8>> = row.get("result");
+            let result = result_bytes
+                .as_deref()
+                .map(decode_msgpack_json)
+                .transpose()?;
+            decoded_rows.push(DecodedActionResultRow {
+                created_at,
+                execution_id,
+                attempt,
+                status,
+                started_at,
+                completed_at,
+                duration_ms,
+                result,
+            });
+        }
+
+        // Replay needs the current known action outputs by execution id.
+        let mut action_results = HashMap::new();
+        for row in &decoded_rows {
+            if let Some(result) = &row.result {
+                action_results.insert(row.execution_id, result.clone());
+            }
+        }
+
+        let mut request_preview_cache: HashMap<Uuid, String> = HashMap::new();
+        let mut entries = Vec::with_capacity(decoded_rows.len());
+        for row in decoded_rows {
+            let node = action_nodes.get(&row.execution_id);
+            let action_name = node
+                .and_then(|n| n.action.as_ref().map(|a| a.action_name.clone()))
+                .unwrap_or_default();
+            let module_name =
+                node.and_then(|n| n.action.as_ref().and_then(|a| a.module_name.clone()));
+
+            let request_preview =
+                if let Some(existing) = request_preview_cache.get(&row.execution_id) {
+                    existing.clone()
+                } else {
+                    let rendered = render_action_request_preview(
+                        node.and_then(|n| n.action.as_ref()),
+                        &runner_state,
+                        &action_results,
+                        row.execution_id,
+                    );
+                    request_preview_cache.insert(row.execution_id, rendered.clone());
+                    rendered
+                };
+
+            let (response_preview, error) = match &row.result {
+                Some(value) => format_action_result(value),
+                None => ("(no result)".to_string(), None),
+            };
+            let status = row.status.clone().unwrap_or_else(|| {
+                if error.is_some() {
+                    "failed".to_string()
+                } else {
+                    "completed".to_string()
+                }
+            });
+            let (dispatched_at, completed_at, duration_ms) = if row.started_at.is_some()
+                || row.completed_at.is_some()
+                || row.duration_ms.is_some()
+            {
+                (
+                    Some(row.started_at.unwrap_or(row.created_at).to_rfc3339()),
+                    Some(row.completed_at.unwrap_or(row.created_at).to_rfc3339()),
+                    row.duration_ms,
+                )
+            } else {
+                action_timing_from_state(node, row.attempt, row.created_at)
+            };
+
+            entries.push(TimelineEntry {
+                action_id: row.execution_id.to_string(),
+                action_name,
+                module_name,
+                status,
+                attempt_number: row.attempt,
+                dispatched_at,
+                completed_at,
+                duration_ms,
+                request_preview,
+                response_preview,
+                error,
+            });
+        }
+
+        Ok(entries)
+    }
+
+    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT DISTINCT COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name
+            FROM runner_instances ri
+            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
+            WHERE COALESCE(ri.workflow_name, wv.workflow_name) IS NOT NULL
+            ORDER BY workflow_name
+            "#,
+        )
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut workflows = Vec::with_capacity(rows.len());
+        for row in rows {
+            let workflow_name: String = row.get("workflow_name");
+            workflows.push(workflow_name);
+        }
+        Ok(workflows)
+    }
+
+    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec![
+            "queued".to_string(),
+            "running".to_string(),
+            "completed".to_string(),
+            "failed".to_string(),
+        ])
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let count = sqlx::query_scalar::<_, i64>(
+            "SELECT COUNT(*) FROM workflow_schedules WHERE status != 'deleted'",
+        )
+        .fetch_one(&self.pool)
+        .await?;
+
+        Ok(count)
+    }
+
+    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   status, next_run_at, last_run_at, created_at
+            FROM workflow_schedules
+            WHERE status != 'deleted'
+            ORDER BY workflow_name, schedule_name
+            LIMIT $1 OFFSET $2
+            "#,
+        )
+        .bind(limit)
+        .bind(offset)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut schedules = Vec::new();
+        for row in rows {
+            schedules.push(ScheduleSummary {
+                id: row.get::<Uuid, _>("id").to_string(),
+                workflow_name: row.get("workflow_name"),
+                schedule_name: row.get("schedule_name"),
+                schedule_type: row.get("schedule_type"),
+                cron_expression: row.get("cron_expression"),
+                interval_seconds: row.get("interval_seconds"),
+                status: row.get("status"),
+                next_run_at: row
+                    .get::<Option<DateTime<Utc>>, _>("next_run_at")
+                    .map(|dt| dt.to_rfc3339()),
+                last_run_at: row
+                    .get::<Option<DateTime<Utc>>, _>("last_run_at")
+                    .map(|dt| dt.to_rfc3339()),
+                created_at: row.get::<DateTime<Utc>, _>("created_at").to_rfc3339(),
+            });
+        }
+
+        Ok(schedules)
+    }
+
+    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
+        let row = sqlx::query(
+            r#"
+            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
+                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
+                   created_at, updated_at, priority, allow_duplicate
+            FROM workflow_schedules
+            WHERE id = $1
+            "#,
+        )
+        .bind(schedule_id)
+        .fetch_optional(&self.pool)
+        .await?
+        .ok_or_else(|| BackendError::Message(format!("schedule not found: {}", schedule_id)))?;
+
+        let input_payload: Option<String> = row
+            .get::<Option<Vec<u8>>, _>("input_payload")
+            .and_then(|bytes| {
+                rmp_serde::from_slice::<serde_json::Value>(&bytes)
+                    .ok()
+                    .map(|v| serde_json::to_string_pretty(&v).unwrap_or_default())
+            });
+
+        Ok(ScheduleDetail {
+            id: row.get::<Uuid, _>("id").to_string(),
+            workflow_name: row.get("workflow_name"),
+            schedule_name: row.get("schedule_name"),
+            schedule_type: row.get("schedule_type"),
+            cron_expression: row.get("cron_expression"),
+            interval_seconds: row.get("interval_seconds"),
+            jitter_seconds: row.get("jitter_seconds"),
+            status: row.get("status"),
+            next_run_at: row
+                .get::<Option<DateTime<Utc>>, _>("next_run_at")
+                .map(|dt| dt.to_rfc3339()),
+            last_run_at: row
+                .get::<Option<DateTime<Utc>>, _>("last_run_at")
+                .map(|dt| dt.to_rfc3339()),
+            last_instance_id: row
+                .get::<Option<Uuid>, _>("last_instance_id")
+                .map(|id| id.to_string()),
+            created_at: row.get::<DateTime<Utc>, _>("created_at").to_rfc3339(),
+            updated_at: row.get::<DateTime<Utc>, _>("updated_at").to_rfc3339(),
+            priority: row.get("priority"),
+            allow_duplicate: row.get("allow_duplicate"),
+            input_payload,
+        })
+    }
+
+    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64> {
+        let count = sqlx::query_scalar::<_, i64>(
+            r#"
+            SELECT COUNT(*)
+            FROM runner_instances
+            WHERE schedule_id = $1
+            "#,
+        )
+        .bind(schedule_id)
+        .fetch_one(&self.pool)
+        .await?;
+        Ok(count)
+    }
+
+    async fn list_schedule_invocations(
+        &self,
+        schedule_id: Uuid,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT instance_id, created_at, state, result, error
+            FROM runner_instances
+            WHERE schedule_id = $1
+            ORDER BY created_at DESC, instance_id DESC
+            LIMIT $2 OFFSET $3
+            "#,
+        )
+        .bind(schedule_id)
+        .bind(limit)
+        .bind(offset)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut invocations = Vec::with_capacity(rows.len());
+        for row in rows {
+            let state_bytes: Option<Vec<u8>> = row.get("state");
+            let result_bytes: Option<Vec<u8>> = row.get("result");
+            let error_bytes: Option<Vec<u8>> = row.get("error");
+
+            invocations.push(ScheduleInvocationSummary {
+                id: row.get("instance_id"),
+                created_at: row.get("created_at"),
+                status: determine_status(&state_bytes, &result_bytes, &error_bytes),
+            });
+        }
+
+        Ok(invocations)
+    }
+
+    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
+        let result = sqlx::query(
+            r#"
+            UPDATE workflow_schedules
+            SET status = $2, updated_at = NOW()
+            WHERE id = $1
+            "#,
+        )
+        .bind(schedule_id)
+        .bind(status)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(result.rows_affected() > 0)
+    }
+
+    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["active".to_string(), "paused".to_string()])
+    }
+
+    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["cron".to_string(), "interval".to_string()])
+    }
+
+    async fn get_worker_action_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<Vec<WorkerActionRow>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT
+                pool_id,
+                COUNT(DISTINCT worker_id) as active_workers,
+                SUM(throughput_per_min) / 60.0 as actions_per_sec,
+                SUM(throughput_per_min) as throughput_per_min,
+                COALESCE(SUM(total_completed), 0)::BIGINT as total_completed,
+                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_dequeue_ms) as median_dequeue_ms,
+                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_handling_ms) as median_handling_ms,
+                MAX(last_action_at) as last_action_at,
+                MAX(updated_at) as updated_at
+            FROM worker_status
+            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
+            GROUP BY pool_id
+            ORDER BY actions_per_sec DESC
+            "#,
+        )
+        .bind(window_minutes)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut stats = Vec::new();
+        for row in rows {
+            stats.push(WorkerActionRow {
+                pool_id: row.get::<Uuid, _>("pool_id").to_string(),
+                active_workers: row.get::<i64, _>("active_workers"),
+                actions_per_sec: format!("{:.1}", row.get::<f64, _>("actions_per_sec")),
+                throughput_per_min: row.get::<f64, _>("throughput_per_min") as i64,
+                total_completed: row.get::<i64, _>("total_completed"),
+                median_dequeue_ms: row
+                    .get::<Option<f64>, _>("median_dequeue_ms")
+                    .map(|v| v as i64),
+                median_handling_ms: row
+                    .get::<Option<f64>, _>("median_handling_ms")
+                    .map(|v| v as i64),
+                last_action_at: row
+                    .get::<Option<DateTime<Utc>>, _>("last_action_at")
+                    .map(|dt| dt.to_rfc3339()),
+                updated_at: row.get::<DateTime<Utc>, _>("updated_at").to_rfc3339(),
+            });
+        }
+
+        Ok(stats)
+    }
+
+    async fn get_worker_aggregate_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<WorkerAggregateStats> {
+        let row = sqlx::query(
+            r#"
+            SELECT
+                COUNT(DISTINCT worker_id) as active_worker_count,
+                COALESCE(SUM(throughput_per_min) / 60.0, 0) as actions_per_sec,
+                COALESCE(SUM(total_in_flight), 0)::BIGINT as total_in_flight,
+                COALESCE(SUM(dispatch_queue_size), 0)::BIGINT as total_queue_depth
+            FROM worker_status
+            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
+            "#,
+        )
+        .bind(window_minutes)
+        .fetch_one(&self.pool)
+        .await?;
+
+        Ok(WorkerAggregateStats {
+            active_worker_count: row.get::<i64, _>("active_worker_count"),
+            actions_per_sec: format!("{:.1}", row.get::<f64, _>("actions_per_sec")),
+            total_in_flight: row.get::<i64, _>("total_in_flight"),
+            total_queue_depth: row.get::<i64, _>("total_queue_depth"),
+        })
+    }
+
+    async fn worker_status_table_exists(&self) -> bool {
+        sqlx::query_scalar::<_, bool>(
+            r#"
+            SELECT EXISTS (
+                SELECT FROM information_schema.tables
+                WHERE table_name = 'worker_status'
+            )
+            "#,
+        )
+        .fetch_one(&self.pool)
+        .await
+        .unwrap_or(false)
+    }
+
+    async fn schedules_table_exists(&self) -> bool {
+        sqlx::query_scalar::<_, bool>(
+            r#"
+            SELECT EXISTS (
+                SELECT FROM information_schema.tables
+                WHERE table_name = 'workflow_schedules'
+            )
+            "#,
+        )
+        .fetch_one(&self.pool)
+        .await
+        .unwrap_or(false)
+    }
+
+    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT
+                pool_id,
+                MAX(active_workers) as active_workers,
+                COALESCE(SUM(throughput_per_min), 0) as throughput_per_min,
+                COALESCE(SUM(throughput_per_min) / 60.0, 0) as actions_per_sec,
+                COALESCE(SUM(total_completed), 0)::BIGINT as total_completed,
+                MAX(last_action_at) as last_action_at,
+                MAX(updated_at) as updated_at,
+                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_dequeue_ms) as median_dequeue_ms,
+                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_handling_ms) as median_handling_ms,
+                MAX(dispatch_queue_size) as dispatch_queue_size,
+                MAX(total_in_flight) as total_in_flight,
+                MAX(median_instance_duration_secs) as median_instance_duration_secs,
+                MAX(active_instance_count) as active_instance_count,
+                (
+                    SELECT COUNT(*)::BIGINT
+                    FROM runner_instances ri
+                    WHERE ri.result IS NOT NULL
+                      AND ri.error IS NULL
+                ) as total_instances_completed,
+                MAX(instances_per_sec) as instances_per_sec,
+                MAX(instances_per_min) as instances_per_min,
+                (
+                    SELECT time_series FROM worker_status ws2
+                    WHERE ws2.pool_id = worker_status.pool_id
+                      AND ws2.time_series IS NOT NULL
+                    ORDER BY ws2.updated_at DESC LIMIT 1
+                ) as time_series
+            FROM worker_status
+            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
+            GROUP BY pool_id
+            ORDER BY actions_per_sec DESC
+            "#,
+        )
+        .bind(window_minutes)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let mut statuses = Vec::new();
+        for row in rows {
+            statuses.push(WorkerStatus {
+                pool_id: row.get::<Uuid, _>("pool_id"),
+                active_workers: row.get::<Option<i32>, _>("active_workers").unwrap_or(0),
+                throughput_per_min: row.get::<f64, _>("throughput_per_min"),
+                actions_per_sec: row.get::<f64, _>("actions_per_sec"),
+                total_completed: row.get::<i64, _>("total_completed"),
+                last_action_at: row.get::<Option<DateTime<Utc>>, _>("last_action_at"),
+                updated_at: row.get::<DateTime<Utc>, _>("updated_at"),
+                median_dequeue_ms: row
+                    .get::<Option<f64>, _>("median_dequeue_ms")
+                    .map(|v| v as i64),
+                median_handling_ms: row
+                    .get::<Option<f64>, _>("median_handling_ms")
+                    .map(|v| v as i64),
+                dispatch_queue_size: row.get::<Option<i64>, _>("dispatch_queue_size"),
+                total_in_flight: row.get::<Option<i64>, _>("total_in_flight"),
+                median_instance_duration_secs: row
+                    .get::<Option<f64>, _>("median_instance_duration_secs"),
+                active_instance_count: row
+                    .get::<Option<i32>, _>("active_instance_count")
+                    .unwrap_or(0),
+                total_instances_completed: row
+                    .get::<Option<i64>, _>("total_instances_completed")
+                    .unwrap_or(0),
+                instances_per_sec: row
+                    .get::<Option<f64>, _>("instances_per_sec")
+                    .unwrap_or(0.0),
+                instances_per_min: row
+                    .get::<Option<f64>, _>("instances_per_min")
+                    .unwrap_or(0.0),
+                time_series: row.get::<Option<Vec<u8>>, _>("time_series"),
+            });
+        }
+
+        Ok(statuses)
+    }
+}
+
+struct DecodedActionResultRow {
+    created_at: DateTime<Utc>,
+    execution_id: Uuid,
+    attempt: i32,
+    status: Option<String>,
+    started_at: Option<DateTime<Utc>>,
+    completed_at: Option<DateTime<Utc>>,
+    duration_ms: Option<i64>,
+    result: Option<Value>,
+}
+
+fn decode_msgpack_json(bytes: &[u8]) -> BackendResult<Value> {
+    rmp_serde::from_slice::<Value>(bytes)
+        .map_err(|err| BackendError::Message(format!("failed to decode action result: {err}")))
+}
+
+fn render_action_request_preview(
+    action: Option<&ActionCallSpec>,
+    state: &RunnerState,
+    action_results: &HashMap<Uuid, Value>,
+    node_id: Uuid,
+) -> String {
+    let Some(action) = action else {
+        return "{}".to_string();
+    };
+
+    match replay_action_kwargs(state, action_results, node_id) {
+        Ok(kwargs) => {
+            let rendered_map: serde_json::Map<String, Value> = kwargs.into_iter().collect();
+            pretty_json(&Value::Object(rendered_map))
+        }
+        Err(_) => format_symbolic_kwargs(action),
+    }
+}
+
+fn format_symbolic_kwargs(action: &ActionCallSpec) -> String {
+    if action.kwargs.is_empty() {
+        return "{}".to_string();
+    }
+    let rendered_map: serde_json::Map<String, Value> = action
+        .kwargs
+        .iter()
+        .map(|(name, expr)| (name.clone(), Value::String(format_value(expr))))
+        .collect();
+    pretty_json(&Value::Object(rendered_map))
+}
+
+fn action_timing_from_state(
+    node: Option<&ExecutionNode>,
+    attempt: i32,
+    fallback_completed_at: DateTime<Utc>,
+) -> (Option<String>, Option<String>, Option<i64>) {
+    // Node timing fields represent the latest attempt for this execution id.
+    // For historical retries, fall back to row timestamps from actions_done.
+    let Some(node) = node else {
+        let at = fallback_completed_at.to_rfc3339();
+        return (Some(at.clone()), Some(at), None);
+    };
+    if node.action_attempt != attempt {
+        let at = fallback_completed_at.to_rfc3339();
+        return (Some(at.clone()), Some(at), None);
+    }
+
+    let dispatched_at = node
+        .started_at
+        .map(|value| value.to_rfc3339())
+        .unwrap_or_else(|| fallback_completed_at.to_rfc3339());
+    let completed_dt = node.completed_at.unwrap_or(fallback_completed_at);
+    let completed_at = completed_dt.to_rfc3339();
+    let duration_ms = node
+        .started_at
+        .map(|started_at| {
+            completed_dt
+                .signed_duration_since(started_at)
+                .num_milliseconds()
+        })
+        .filter(|duration| *duration >= 0);
+
+    (Some(dispatched_at), Some(completed_at), duration_ms)
+}
+
+fn format_action_result(value: &Value) -> (String, Option<String>) {
+    let preview = pretty_json(value);
+    let error = extract_action_error(value);
+    (preview, error)
+}
+
+fn extract_action_error(value: &Value) -> Option<String> {
+    let Value::Object(map) = value else {
+        return None;
+    };
+    let message = map.get("message").and_then(Value::as_str);
+    let is_exception = map.contains_key("type") && map.contains_key("message");
+    if is_exception {
+        return Some(message.unwrap_or("action failed").to_string());
+    }
+    map.get("error")
+        .and_then(Value::as_str)
+        .map(|msg| msg.to_string())
+}
+
+fn pretty_json(value: &Value) -> String {
+    serde_json::to_string_pretty(value).unwrap_or_else(|_| "{}".to_string())
+}
+
+fn determine_status(
+    state_bytes: &Option<Vec<u8>>,
+    result_bytes: &Option<Vec<u8>>,
+    error_bytes: &Option<Vec<u8>>,
+) -> InstanceStatus {
+    if error_bytes.is_some() {
+        return InstanceStatus::Failed;
+    }
+    if result_bytes
+        .as_deref()
+        .is_some_and(result_payload_is_error_wrapper)
+    {
+        return InstanceStatus::Failed;
+    }
+    if result_bytes.is_some() {
+        return InstanceStatus::Completed;
+    }
+    if state_bytes.is_some() {
+        return InstanceStatus::Running;
+    }
+    InstanceStatus::Queued
+}
+
+fn extract_input_preview(state_bytes: &Option<Vec<u8>>) -> String {
+    let Some(bytes) = state_bytes else {
+        return "{}".to_string();
+    };
+
+    match rmp_serde::from_slice::<GraphUpdate>(bytes) {
+        Ok(graph) => {
+            let count = graph.nodes.len();
+            format!("{{nodes: {count}}}")
+        }
+        Err(_) => "{}".to_string(),
+    }
+}
+
+fn format_input_payload(state_bytes: &Option<Vec<u8>>) -> String {
+    let Some(bytes) = state_bytes else {
+        return "{}".to_string();
+    };
+
+    match rmp_serde::from_slice::<GraphUpdate>(bytes) {
+        Ok(graph) => format_extracted_inputs(&graph.nodes),
+        Err(_) => "{}".to_string(),
+    }
+}
+
+fn format_extracted_inputs(nodes: &HashMap<Uuid, ExecutionNode>) -> String {
+    let mut input_pairs: Vec<(String, Value)> = nodes
+        .values()
+        .filter_map(extract_input_assignment)
+        .collect();
+    if input_pairs.is_empty() {
+        return "{}".to_string();
+    }
+    input_pairs.sort_by(|(left, _), (right, _)| left.cmp(right));
+    let input_map: serde_json::Map<String, Value> = input_pairs.into_iter().collect();
+    pretty_json(&Value::Object(input_map))
+}
+
+fn extract_input_assignment(node: &ExecutionNode) -> Option<(String, Value)> {
+    let (name, raw_value) = parse_input_assignment_label(&node.label)?;
+
+    if let Ok(value) = serde_json::from_str::<Value>(raw_value) {
+        return Some((name.to_string(), value));
+    }
+
+    if let Some(value_expr) = node.assignments.get(name) {
+        return Some((name.to_string(), value_expr_to_json(value_expr)));
+    }
+
+    Some((name.to_string(), Value::String(raw_value.to_string())))
+}
+
+fn parse_input_assignment_label(label: &str) -> Option<(&str, &str)> {
+    let payload = label.strip_prefix("input ")?;
+    payload.split_once(" = ")
+}
+
+fn value_expr_to_json(value_expr: &ValueExpr) -> Value {
+    match value_expr {
+        ValueExpr::Literal(value) => value.value.clone(),
+        ValueExpr::List(value) => {
+            Value::Array(value.elements.iter().map(value_expr_to_json).collect())
+        }
+        ValueExpr::Dict(value) => {
+            let mut map = serde_json::Map::new();
+            for entry in &value.entries {
+                let key = match value_expr_to_json(&entry.key) {
+                    Value::String(key) => key,
+                    other => other.to_string(),
+                };
+                map.insert(key, value_expr_to_json(&entry.value));
+            }
+            Value::Object(map)
+        }
+        _ => Value::String(format_value(value_expr)),
+    }
+}
+
+fn format_instance_result_payload(
+    status: InstanceStatus,
+    result_bytes: &Option<Vec<u8>>,
+    error_bytes: &Option<Vec<u8>>,
+) -> String {
+    match status {
+        InstanceStatus::Failed => {
+            let payload = error_bytes.as_deref().or(result_bytes.as_deref());
+            let Some(bytes) = payload else {
+                return "(failed)".to_string();
+            };
+            match rmp_serde::from_slice::<serde_json::Value>(bytes) {
+                Ok(value) => pretty_json(&normalize_error_payload(value)),
+                Err(_) => "(decode error)".to_string(),
+            }
+        }
+        InstanceStatus::Completed => {
+            let Some(bytes) = result_bytes else {
+                return "(pending)".to_string();
+            };
+            match rmp_serde::from_slice::<serde_json::Value>(bytes) {
+                Ok(value) => pretty_json(&normalize_success_payload(value)),
+                Err(_) => "(decode error)".to_string(),
+            }
+        }
+        InstanceStatus::Running | InstanceStatus::Queued => "(pending)".to_string(),
+    }
+}
+
+fn normalize_success_payload(value: Value) -> Value {
+    let Value::Object(mut map) = value else {
+        return value;
+    };
+    map.remove("result").unwrap_or(Value::Object(map))
+}
+
+fn normalize_error_payload(value: Value) -> Value {
+    let Value::Object(mut map) = value else {
+        return value;
+    };
+
+    if let Some(error) = map.remove("error") {
+        return normalize_error_payload(error);
+    }
+    if let Some(exception) = map.remove("__exception__") {
+        return normalize_error_payload(exception);
+    }
+    if let Some(exception) = map.remove("exception") {
+        return normalize_error_payload(exception);
+    }
+
+    Value::Object(map)
+}
+
+fn result_payload_is_error_wrapper(bytes: &[u8]) -> bool {
+    let Ok(value) = rmp_serde::from_slice::<serde_json::Value>(bytes) else {
+        return false;
+    };
+    let Value::Object(map) = value else {
+        return false;
+    };
+    map.len() == 1
+        && (map.contains_key("error")
+            || map.contains_key("__exception__")
+            || map.contains_key("exception"))
+}
+
+fn format_error(error_bytes: &Option<Vec<u8>>) -> Option<String> {
+    let bytes = error_bytes.as_ref()?;
+
+    match rmp_serde::from_slice::<serde_json::Value>(bytes) {
+        Ok(value) => Some(pretty_json(&normalize_error_payload(value))),
+        Err(_) => Some("(decode error)".to_string()),
+    }
+}
+
+fn format_node_status(status: &NodeStatus) -> String {
+    match status {
+        NodeStatus::Queued => "queued".to_string(),
+        NodeStatus::Running => "running".to_string(),
+        NodeStatus::Completed => "completed".to_string(),
+        NodeStatus::Failed => "failed".to_string(),
+    }
+}
+
+fn merge_template_status(existing: &NodeStatus, new_status: &NodeStatus) -> NodeStatus {
+    if node_status_rank(new_status) > node_status_rank(existing) {
+        new_status.clone()
+    } else {
+        existing.clone()
+    }
+}
+
+fn node_status_rank(status: &NodeStatus) -> u8 {
+    match status {
+        NodeStatus::Completed => 0,
+        NodeStatus::Queued => 1,
+        NodeStatus::Running => 2,
+        NodeStatus::Failed => 3,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::{HashMap, HashSet};
+
+    use chrono::{Duration as ChronoDuration, Utc};
+    use prost::Message;
+    use serial_test::serial;
+    use uuid::Uuid;
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_webapp_backend::WebappBackend;
+    use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
+    use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
+
+    use crate::PostgresBackend;
+
+    use super::super::test_helpers::setup_backend;
+    use super::*;
+
+    use waymark_dag::EdgeType;
+    use waymark_ir_parser::parse_program;
+    use waymark_runner_state::{
+        ActionCallSpec, ExecutionEdge, ExecutionNode, LiteralValue, NodeStatus,
+        value_visitor::ValueExpr,
+    };
+    use waymark_scheduler_core::{CreateScheduleParams, ScheduleType};
+
+    #[test]
+    fn format_extracted_inputs_happy_path() {
+        let mut nodes = HashMap::new();
+        let mut first_assignments = HashMap::new();
+        first_assignments.insert(
+            "iterations".to_string(),
+            ValueExpr::Literal(LiteralValue {
+                value: serde_json::json!(3),
+            }),
+        );
+        nodes.insert(
+            Uuid::new_v4(),
+            ExecutionNode {
+                node_id: Uuid::new_v4(),
+                node_type: "assignment".to_string(),
+                label: "input iterations = 3".to_string(),
+                status: NodeStatus::Completed,
+                template_id: None,
+                targets: vec!["iterations".to_string()],
+                action: None,
+                value_expr: None,
+                assignments: first_assignments,
+                action_attempt: 0,
+                started_at: None,
+                completed_at: None,
+                scheduled_at: None,
+            },
+        );
+
+        let mut second_assignments = HashMap::new();
+        second_assignments.insert(
+            "sleep_seconds".to_string(),
+            ValueExpr::Literal(LiteralValue {
+                value: serde_json::json!(20),
+            }),
+        );
+        nodes.insert(
+            Uuid::new_v4(),
+            ExecutionNode {
+                node_id: Uuid::new_v4(),
+                node_type: "assignment".to_string(),
+                label: "input sleep_seconds = 20".to_string(),
+                status: NodeStatus::Completed,
+                template_id: None,
+                targets: vec!["sleep_seconds".to_string()],
+                action: None,
+                value_expr: None,
+                assignments: second_assignments,
+                action_attempt: 0,
+                started_at: None,
+                completed_at: None,
+                scheduled_at: None,
+            },
+        );
+
+        let rendered = format_extracted_inputs(&nodes);
+        let value: Value = serde_json::from_str(&rendered).expect("decode rendered input payload");
+        assert_eq!(
+            value,
+            serde_json::json!({
+                "iterations": 3,
+                "sleep_seconds": 20
+            })
+        );
+    }
+
+    #[test]
+    fn format_instance_result_payload_unwraps_success_result_wrapper() {
+        let result_bytes =
+            rmp_serde::to_vec_named(&serde_json::json!({"result": {"total_iterations": 3}}))
+                .expect("encode result");
+        let rendered =
+            format_instance_result_payload(InstanceStatus::Completed, &Some(result_bytes), &None);
+        let value: Value = serde_json::from_str(&rendered).expect("decode result payload");
+        assert_eq!(value, serde_json::json!({"total_iterations": 3}));
+    }
+
+    #[test]
+    fn format_instance_result_payload_unwraps_error_wrapper() {
+        let error_bytes = rmp_serde::to_vec_named(&serde_json::json!({
+            "error": {
+                "__exception__": {
+                    "type": "ValueError",
+                    "message": "boom"
+                }
+            }
+        }))
+        .expect("encode error");
+        let rendered =
+            format_instance_result_payload(InstanceStatus::Failed, &None, &Some(error_bytes));
+        let value: Value = serde_json::from_str(&rendered).expect("decode result payload");
+        assert_eq!(
+            value,
+            serde_json::json!({
+                "type": "ValueError",
+                "message": "boom"
+            })
+        );
+    }
+
+    #[test]
+    fn determine_status_marks_wrapped_result_errors_as_failed() {
+        let result_bytes =
+            rmp_serde::to_vec_named(&serde_json::json!({"error": {"message": "boom"}}))
+                .expect("encode result error");
+        let status = determine_status(&None, &Some(result_bytes), &None);
+        assert_eq!(status, InstanceStatus::Failed);
+    }
+
+    #[test]
+    fn parse_instance_search_expr_handles_boolean_operators() {
+        let parsed = parse_instance_search_expr("(alpha OR beta) AND running");
+        assert_eq!(
+            parsed,
+            Some(InstanceSearchExpr::And(
+                Box::new(InstanceSearchExpr::Or(
+                    Box::new(InstanceSearchExpr::Term("alpha".to_string())),
+                    Box::new(InstanceSearchExpr::Term("beta".to_string())),
+                )),
+                Box::new(InstanceSearchExpr::Term("running".to_string())),
+            ))
+        );
+    }
+
+    #[test]
+    fn parse_instance_search_expr_falls_back_for_unbalanced_parentheses() {
+        let parsed = parse_instance_search_expr("(alpha OR beta");
+        assert_eq!(
+            parsed,
+            Some(InstanceSearchExpr::Term("(alpha OR beta".to_string()))
+        );
+    }
+
+    #[test]
+    fn action_timing_from_state_uses_state_timestamps_for_latest_attempt() {
+        let started_at = Utc::now() - ChronoDuration::milliseconds(1500);
+        let completed_at = started_at + ChronoDuration::milliseconds(450);
+        let fallback = Utc::now();
+        let node = ExecutionNode {
+            node_id: Uuid::new_v4(),
+            node_type: "action_call".to_string(),
+            label: "@tests.action()".to_string(),
+            status: NodeStatus::Completed,
+            template_id: Some("n0".to_string()),
+            targets: Vec::new(),
+            action: Some(ActionCallSpec {
+                action_name: "tests.action".to_string(),
+                module_name: Some("tests".to_string()),
+                kwargs: HashMap::new(),
+            }),
+            value_expr: None,
+            assignments: HashMap::new(),
+            action_attempt: 2,
+            started_at: Some(started_at),
+            completed_at: Some(completed_at),
+            scheduled_at: None,
+        };
+
+        let (dispatched_at, finished_at, duration_ms) =
+            action_timing_from_state(Some(&node), 2, fallback);
+        assert_eq!(dispatched_at, Some(started_at.to_rfc3339()));
+        assert_eq!(finished_at, Some(completed_at.to_rfc3339()));
+        assert_eq!(duration_ms, Some(450));
+    }
+
+    #[test]
+    fn action_timing_from_state_falls_back_for_prior_attempt_rows() {
+        let started_at = Utc::now() - ChronoDuration::milliseconds(1200);
+        let completed_at = started_at + ChronoDuration::milliseconds(600);
+        let fallback = Utc::now();
+        let node = ExecutionNode {
+            node_id: Uuid::new_v4(),
+            node_type: "action_call".to_string(),
+            label: "@tests.action()".to_string(),
+            status: NodeStatus::Completed,
+            template_id: Some("n0".to_string()),
+            targets: Vec::new(),
+            action: Some(ActionCallSpec {
+                action_name: "tests.action".to_string(),
+                module_name: Some("tests".to_string()),
+                kwargs: HashMap::new(),
+            }),
+            value_expr: None,
+            assignments: HashMap::new(),
+            action_attempt: 3,
+            started_at: Some(started_at),
+            completed_at: Some(completed_at),
+            scheduled_at: None,
+        };
+
+        let (dispatched_at, finished_at, duration_ms) =
+            action_timing_from_state(Some(&node), 2, fallback);
+        assert_eq!(dispatched_at, Some(fallback.to_rfc3339()));
+        assert_eq!(finished_at, Some(fallback.to_rfc3339()));
+        assert_eq!(duration_ms, None);
+    }
+
+    fn sample_execution_node(execution_id: Uuid) -> ExecutionNode {
+        ExecutionNode {
+            node_id: execution_id,
+            node_type: "action_call".to_string(),
+            label: "@tests.action()".to_string(),
+            status: NodeStatus::Queued,
+            template_id: Some("n0".to_string()),
+            targets: Vec::new(),
+            action: Some(ActionCallSpec {
+                action_name: "tests.action".to_string(),
+                module_name: Some("tests".to_string()),
+                kwargs: HashMap::from([(
+                    "value".to_string(),
+                    ValueExpr::Literal(LiteralValue {
+                        value: serde_json::json!(7),
+                    }),
+                )]),
+            }),
+            value_expr: None,
+            assignments: HashMap::new(),
+            action_attempt: 1,
+            started_at: None,
+            completed_at: None,
+            scheduled_at: Some(Utc::now()),
+        }
+    }
+
+    fn sample_graph(instance_id: Uuid, execution_id: Uuid) -> GraphUpdate {
+        let mut nodes = HashMap::new();
+        nodes.insert(execution_id, sample_execution_node(execution_id));
+
+        GraphUpdate {
+            instance_id,
+            nodes,
+            edges: HashSet::from([ExecutionEdge {
+                source: execution_id,
+                target: execution_id,
+                edge_type: EdgeType::StateMachine,
+            }]),
+        }
+    }
+
+    async fn insert_instance_with_graph_with_workflow(
+        backend: &PostgresBackend,
+        workflow_name: &str,
+    ) -> (Uuid, Uuid, Uuid) {
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let execution_id = Uuid::new_v4();
+        let workflow_version_id = insert_workflow_version(backend, workflow_name).await;
+        let graph = sample_graph(instance_id, execution_id);
+        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
+
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, state) VALUES ($1, $2, $3, $4)",
+        )
+        .bind(instance_id)
+        .bind(entry_node)
+        .bind(workflow_version_id)
+        .bind(state_payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert runner instance");
+
+        (instance_id, entry_node, execution_id)
+    }
+
+    async fn insert_instance_with_graph(backend: &PostgresBackend) -> (Uuid, Uuid, Uuid) {
+        insert_instance_with_graph_with_workflow(backend, "tests.workflow").await
+    }
+
+    async fn insert_action_result(backend: &PostgresBackend, execution_id: Uuid) {
+        let payload = rmp_serde::to_vec_named(&serde_json::json!({"ok": true}))
+            .expect("encode action result");
+        sqlx::query(
+            "INSERT INTO runner_actions_done (execution_id, attempt, result) VALUES ($1, $2, $3)",
+        )
+        .bind(execution_id)
+        .bind(1_i32)
+        .bind(payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert action result");
+    }
+
+    fn sample_program_proto() -> Vec<u8> {
+        let source = r#"
+fn main(input: [x], output: [y]):
+    y = @tests.action(value=x)
+    return y
+"#;
+        let program = parse_program(source.trim()).expect("parse program");
+        program.encode_to_vec()
+    }
+
+    fn loop_program_proto() -> Vec<u8> {
+        let source = r#"
+fn main(input: [items], output: [total]):
+    total = 0
+    for item in items:
+        total = total + item
+    return total
+"#;
+        let program = parse_program(source.trim()).expect("parse loop program");
+        program.encode_to_vec()
+    }
+
+    async fn insert_workflow_version(backend: &PostgresBackend, workflow_name: &str) -> Uuid {
+        WorkflowRegistryBackend::upsert_workflow_version(
+            backend,
+            &WorkflowRegistration {
+                workflow_name: workflow_name.to_string(),
+                workflow_version: "v1".to_string(),
+                ir_hash: format!("hash-{workflow_name}"),
+                program_proto: sample_program_proto(),
+                concurrent: false,
+            },
+        )
+        .await
+        .expect("insert workflow version")
+    }
+
+    async fn insert_loop_workflow_version(backend: &PostgresBackend, workflow_name: &str) -> Uuid {
+        WorkflowRegistryBackend::upsert_workflow_version(
+            backend,
+            &WorkflowRegistration {
+                workflow_name: workflow_name.to_string(),
+                workflow_version: "v1-loop".to_string(),
+                ir_hash: format!("hash-loop-{workflow_name}"),
+                program_proto: loop_program_proto(),
+                concurrent: false,
+            },
+        )
+        .await
+        .expect("insert loop workflow version")
+    }
+
+    async fn insert_schedule(backend: &PostgresBackend, schedule_name: &str) -> Uuid {
+        SchedulerBackend::upsert_schedule(
+            backend,
+            &CreateScheduleParams {
+                workflow_name: "tests.workflow".to_string(),
+                schedule_name: schedule_name.to_string(),
+                schedule_type: ScheduleType::Interval,
+                cron_expression: None,
+                interval_seconds: Some(60),
+                jitter_seconds: 0,
+                input_payload: Some(
+                    rmp_serde::to_vec_named(&serde_json::json!({"k": "v"}))
+                        .expect("encode payload"),
+                ),
+                priority: 0,
+                allow_duplicate: false,
+            },
+        )
+        .await
+        .expect("upsert schedule")
+        .0
+    }
+
+    async fn insert_scheduled_instance(
+        backend: &PostgresBackend,
+        schedule_id: Uuid,
+        created_at: DateTime<Utc>,
+        with_result: bool,
+    ) -> Uuid {
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let execution_id = Uuid::new_v4();
+        let workflow_version_id = insert_workflow_version(backend, "tests.workflow").await;
+        let graph = sample_graph(instance_id, execution_id);
+        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
+        let result_payload = if with_result {
+            Some(
+                rmp_serde::to_vec_named(&serde_json::json!({"result": {"ok": true}}))
+                    .expect("encode result"),
+            )
+        } else {
+            None
+        };
+
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, schedule_id, created_at, state, result, error) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)",
+        )
+        .bind(instance_id)
+        .bind(entry_node)
+        .bind(workflow_version_id)
+        .bind(schedule_id)
+        .bind(created_at)
+        .bind(state_payload)
+        .bind(result_payload)
+        .bind(Option::<Vec<u8>>::None)
+        .execute(backend.pool())
+        .await
+        .expect("insert scheduled instance");
+
+        instance_id
+    }
+
+    async fn insert_worker_status(backend: &PostgresBackend, pool_id: Uuid) {
+        WorkerStatusBackend::upsert_worker_status(
+            backend,
+            &WorkerStatusUpdate {
+                pool_id,
+                throughput_per_min: 180.0,
+                total_completed: 20,
+                last_action_at: Some(Utc::now()),
+                median_dequeue_ms: Some(5),
+                median_handling_ms: Some(12),
+                dispatch_queue_size: 3,
+                total_in_flight: 2,
+                active_workers: 4,
+                actions_per_sec: 3.0,
+                median_instance_duration_secs: Some(0.2),
+                active_instance_count: 1,
+                total_instances_completed: 8,
+                instances_per_sec: 0.5,
+                instances_per_min: 30.0,
+                time_series: None,
+            },
+        )
+        .await
+        .expect("upsert worker status");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_count_instances_happy_path() {
+        let backend = setup_backend().await;
+        insert_instance_with_graph(&backend).await;
+
+        let count = WebappBackend::count_instances(&backend, None)
+            .await
+            .expect("count instances");
+        assert_eq!(count, 1);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_count_instances_applies_search_expression() {
+        let backend = setup_backend().await;
+        let (alpha_id, _, _) =
+            insert_instance_with_graph_with_workflow(&backend, "tests.alpha").await;
+        let (beta_id, _, _) =
+            insert_instance_with_graph_with_workflow(&backend, "tests.beta").await;
+        assert_ne!(alpha_id, beta_id);
+
+        let completed_payload =
+            rmp_serde::to_vec_named(&serde_json::json!({"result": {"ok": true}}))
+                .expect("encode completed payload");
+        sqlx::query(
+            "UPDATE runner_instances SET result = $2, current_status = $3 WHERE instance_id = $1",
+        )
+        .bind(beta_id)
+        .bind(completed_payload)
+        .bind("completed")
+        .execute(backend.pool())
+        .await
+        .expect("mark beta completed");
+
+        let alpha_count = WebappBackend::count_instances(&backend, Some("alpha"))
+            .await
+            .expect("count alpha");
+        assert_eq!(alpha_count, 1);
+
+        let completed_count = WebappBackend::count_instances(&backend, Some("completed"))
+            .await
+            .expect("count completed");
+        assert_eq!(completed_count, 1);
+
+        let combined = WebappBackend::count_instances(&backend, Some("(alpha OR completed)"))
+            .await
+            .expect("count combined");
+        assert_eq!(combined, 2);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_list_instances_happy_path() {
+        let backend = setup_backend().await;
+        let (instance_id, _, _) = insert_instance_with_graph(&backend).await;
+
+        let instances = WebappBackend::list_instances(&backend, None, 10, 0)
+            .await
+            .expect("list instances");
+
+        assert_eq!(instances.len(), 1);
+        assert_eq!(instances[0].id, instance_id);
+        assert_eq!(instances[0].status, InstanceStatus::Running);
+        assert_eq!(
+            instances[0].workflow_name,
+            Some("tests.workflow".to_string())
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_list_instances_applies_search_expression() {
+        let backend = setup_backend().await;
+        let (alpha_id, _, _) =
+            insert_instance_with_graph_with_workflow(&backend, "tests.alpha").await;
+        let _ = insert_instance_with_graph_with_workflow(&backend, "tests.beta").await;
+
+        let alpha_instances = WebappBackend::list_instances(&backend, Some("alpha"), 10, 0)
+            .await
+            .expect("list alpha");
+        assert_eq!(alpha_instances.len(), 1);
+        assert_eq!(alpha_instances[0].id, alpha_id);
+
+        let running_instances =
+            WebappBackend::list_instances(&backend, Some("(alpha OR beta) AND running"), 10, 0)
+                .await
+                .expect("list running instances");
+        assert_eq!(running_instances.len(), 2);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_instance_happy_path() {
+        let backend = setup_backend().await;
+        let (instance_id, _, _) = insert_instance_with_graph(&backend).await;
+
+        let instance = WebappBackend::get_instance(&backend, instance_id)
+            .await
+            .expect("get instance");
+
+        assert_eq!(instance.id, instance_id);
+        assert_eq!(instance.status, InstanceStatus::Running);
+        assert_eq!(instance.workflow_name, Some("tests.workflow".to_string()));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_workflow_name_prefers_registered_workflow_name() {
+        let backend = setup_backend().await;
+        let (instance_id, entry_node, execution_id) =
+            insert_instance_with_graph_with_workflow(&backend, "tests.workflow_name").await;
+
+        let list = WebappBackend::list_instances(&backend, None, 10, 0)
+            .await
+            .expect("list instances");
+        assert_eq!(list.len(), 1);
+        assert_eq!(list[0].id, instance_id);
+        assert_eq!(
+            list[0].workflow_name,
+            Some("tests.workflow_name".to_string())
+        );
+
+        let detail = WebappBackend::get_instance(&backend, instance_id)
+            .await
+            .expect("get instance");
+        assert_eq!(detail.id, instance_id);
+        assert_eq!(detail.entry_node, entry_node);
+        assert_eq!(
+            detail.workflow_name,
+            Some("tests.workflow_name".to_string())
+        );
+
+        let graph = WebappBackend::get_execution_graph(&backend, instance_id)
+            .await
+            .expect("get graph")
+            .expect("graph");
+        assert!(
+            graph
+                .nodes
+                .iter()
+                .any(|node| node.id == execution_id.to_string()),
+            "expected action node to remain intact"
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_execution_graph_happy_path() {
+        let backend = setup_backend().await;
+        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
+
+        let graph = WebappBackend::get_execution_graph(&backend, instance_id)
+            .await
+            .expect("get execution graph")
+            .expect("expected execution graph");
+
+        assert_eq!(graph.nodes.len(), 1);
+        assert_eq!(graph.edges.len(), 1);
+        assert_eq!(graph.nodes[0].id, execution_id.to_string());
+        assert_eq!(graph.nodes[0].action_name, Some("tests.action".to_string()));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_workflow_graph_uses_template_node_ids() {
+        let backend = setup_backend().await;
+        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
+
+        let graph = WebappBackend::get_workflow_graph(&backend, instance_id)
+            .await
+            .expect("get workflow graph")
+            .expect("expected workflow graph");
+
+        assert!(!graph.nodes.is_empty(), "workflow graph should have nodes");
+        assert!(
+            graph
+                .nodes
+                .iter()
+                .all(|node| node.id != execution_id.to_string()),
+            "workflow graph should use template node ids, not runtime execution ids"
+        );
+        assert!(
+            graph
+                .nodes
+                .iter()
+                .any(|node| node.node_type == "action_call"),
+            "workflow graph should include action_call template nodes"
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_workflow_graph_marks_loop_back_edges() {
+        let backend = setup_backend().await;
+        let instance_id = Uuid::new_v4();
+        let entry_node = Uuid::new_v4();
+        let execution_id = Uuid::new_v4();
+        let workflow_version_id =
+            insert_loop_workflow_version(&backend, "tests.loop_workflow").await;
+        let graph = sample_graph(instance_id, execution_id);
+        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
+
+        sqlx::query(
+            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, state) VALUES ($1, $2, $3, $4)",
+        )
+        .bind(instance_id)
+        .bind(entry_node)
+        .bind(workflow_version_id)
+        .bind(state_payload)
+        .execute(backend.pool())
+        .await
+        .expect("insert loop runner instance");
+
+        let workflow_graph = WebappBackend::get_workflow_graph(&backend, instance_id)
+            .await
+            .expect("get workflow graph")
+            .expect("expected workflow graph");
+
+        assert!(
+            workflow_graph
+                .edges
+                .iter()
+                .any(|edge| edge.edge_type == "state_machine_loop_back"),
+            "loop workflows should emit at least one loop_back edge"
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_action_results_happy_path() {
+        let backend = setup_backend().await;
+        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
+        insert_action_result(&backend, execution_id).await;
+
+        let entries = WebappBackend::get_action_results(&backend, instance_id)
+            .await
+            .expect("get action results");
+
+        assert_eq!(entries.len(), 1);
+        assert_eq!(entries[0].action_id, execution_id.to_string());
+        assert_eq!(entries[0].action_name, "tests.action");
+        assert_eq!(entries[0].status, "completed");
+        assert!(entries[0].request_preview.contains("\"value\": 7"));
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_distinct_workflows_happy_path() {
+        let backend = setup_backend().await;
+        insert_instance_with_graph_with_workflow(&backend, "tests.workflow_a").await;
+        insert_instance_with_graph_with_workflow(&backend, "tests.workflow_b").await;
+
+        let workflows = WebappBackend::get_distinct_workflows(&backend)
+            .await
+            .expect("get distinct workflows");
+        assert_eq!(
+            workflows,
+            vec![
+                "tests.workflow_a".to_string(),
+                "tests.workflow_b".to_string()
+            ]
+        );
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_distinct_statuses_happy_path() {
+        let backend = setup_backend().await;
+
+        let statuses = WebappBackend::get_distinct_statuses(&backend)
+            .await
+            .expect("get distinct statuses");
+        assert_eq!(statuses, vec!["queued", "running", "completed", "failed"]);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_count_schedules_happy_path() {
+        let backend = setup_backend().await;
+        insert_schedule(&backend, "count").await;
+
+        let count = WebappBackend::count_schedules(&backend)
+            .await
+            .expect("count schedules");
+        assert_eq!(count, 1);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_list_schedules_happy_path() {
+        let backend = setup_backend().await;
+        let schedule_id = insert_schedule(&backend, "list").await;
+
+        let schedules = WebappBackend::list_schedules(&backend, 10, 0)
+            .await
+            .expect("list schedules");
+        assert_eq!(schedules.len(), 1);
+        assert_eq!(schedules[0].id, schedule_id.to_string());
+        assert_eq!(schedules[0].schedule_name, "list");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_schedule_happy_path() {
+        let backend = setup_backend().await;
+        let schedule_id = insert_schedule(&backend, "detail").await;
+
+        let schedule = WebappBackend::get_schedule(&backend, schedule_id)
+            .await
+            .expect("get schedule");
+        assert_eq!(schedule.id, schedule_id.to_string());
+        assert_eq!(schedule.schedule_name, "detail");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_schedule_invocations_are_filtered_by_schedule_id() {
+        let backend = setup_backend().await;
+        let schedule_id = insert_schedule(&backend, "invocations-a").await;
+        let other_schedule_id = insert_schedule(&backend, "invocations-b").await;
+
+        let running_instance_id = insert_scheduled_instance(
+            &backend,
+            schedule_id,
+            Utc::now() - ChronoDuration::minutes(2),
+            false,
+        )
+        .await;
+        let completed_instance_id = insert_scheduled_instance(
+            &backend,
+            schedule_id,
+            Utc::now() - ChronoDuration::minutes(1),
+            true,
+        )
+        .await;
+        let _other_instance_id =
+            insert_scheduled_instance(&backend, other_schedule_id, Utc::now(), true).await;
+
+        let total = WebappBackend::count_schedule_invocations(&backend, schedule_id)
+            .await
+            .expect("count schedule invocations");
+        assert_eq!(total, 2);
+
+        let invocations = WebappBackend::list_schedule_invocations(&backend, schedule_id, 10, 0)
+            .await
+            .expect("list schedule invocations");
+        assert_eq!(invocations.len(), 2);
+        assert_eq!(invocations[0].id, completed_instance_id);
+        assert_eq!(invocations[0].status, InstanceStatus::Completed);
+        assert_eq!(invocations[1].id, running_instance_id);
+        assert_eq!(invocations[1].status, InstanceStatus::Running);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_update_schedule_status_happy_path() {
+        let backend = setup_backend().await;
+        let schedule_id = insert_schedule(&backend, "update").await;
+
+        let updated = WebappBackend::update_schedule_status(&backend, schedule_id, "paused")
+            .await
+            .expect("update schedule status");
+        assert!(updated);
+
+        let schedule = WebappBackend::get_schedule(&backend, schedule_id)
+            .await
+            .expect("get schedule");
+        assert_eq!(schedule.status, "paused");
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_distinct_schedule_statuses_happy_path() {
+        let backend = setup_backend().await;
+
+        let statuses = WebappBackend::get_distinct_schedule_statuses(&backend)
+            .await
+            .expect("get distinct schedule statuses");
+        assert_eq!(statuses, vec!["active", "paused"]);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_distinct_schedule_types_happy_path() {
+        let backend = setup_backend().await;
+
+        let types = WebappBackend::get_distinct_schedule_types(&backend)
+            .await
+            .expect("get distinct schedule types");
+        assert_eq!(types, vec!["cron", "interval"]);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_worker_action_stats_happy_path() {
+        let backend = setup_backend().await;
+        let pool_id = Uuid::new_v4();
+        insert_worker_status(&backend, pool_id).await;
+
+        let rows = WebappBackend::get_worker_action_stats(&backend, 60)
+            .await
+            .expect("get worker action stats");
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0].pool_id, pool_id.to_string());
+        assert_eq!(rows[0].total_completed, 20);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_worker_aggregate_stats_happy_path() {
+        let backend = setup_backend().await;
+        insert_worker_status(&backend, Uuid::new_v4()).await;
+
+        let aggregate = WebappBackend::get_worker_aggregate_stats(&backend, 60)
+            .await
+            .expect("get worker aggregate stats");
+        assert_eq!(aggregate.active_worker_count, 1);
+        assert_eq!(aggregate.total_in_flight, 2);
+        assert_eq!(aggregate.total_queue_depth, 3);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_worker_status_table_exists_happy_path() {
+        let backend = setup_backend().await;
+
+        assert!(WebappBackend::worker_status_table_exists(&backend).await);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_schedules_table_exists_happy_path() {
+        let backend = setup_backend().await;
+
+        assert!(WebappBackend::schedules_table_exists(&backend).await);
+    }
+
+    #[serial(postgres)]
+    #[tokio::test]
+    async fn webapp_get_worker_statuses_happy_path() {
+        let backend = setup_backend().await;
+        let pool_id = Uuid::new_v4();
+        insert_worker_status(&backend, pool_id).await;
+        let (completed_instance_id, _, _) = insert_instance_with_graph(&backend).await;
+        let completed_payload =
+            rmp_serde::to_vec_named(&serde_json::json!({"ok": true})).expect("encode result");
+        sqlx::query("UPDATE runner_instances SET result = $2 WHERE instance_id = $1")
+            .bind(completed_instance_id)
+            .bind(completed_payload)
+            .execute(backend.pool())
+            .await
+            .expect("mark instance completed");
+
+        let (failed_instance_id, _, _) = insert_instance_with_graph(&backend).await;
+        let error_payload = rmp_serde::to_vec_named(&serde_json::json!({
+            "type": "Exception",
+            "message": "boom",
+        }))
+        .expect("encode error");
+        sqlx::query("UPDATE runner_instances SET error = $2 WHERE instance_id = $1")
+            .bind(failed_instance_id)
+            .bind(error_payload)
+            .execute(backend.pool())
+            .await
+            .expect("mark instance failed");
+
+        let statuses = WebappBackend::get_worker_statuses(&backend, 60)
+            .await
+            .expect("get worker statuses");
+        assert_eq!(statuses.len(), 1);
+        assert_eq!(statuses[0].pool_id, pool_id);
+        assert_eq!(statuses[0].total_completed, 20);
+        assert_eq!(statuses[0].total_instances_completed, 1);
+        assert_eq!(statuses[0].total_in_flight, Some(2));
+        assert_eq!(statuses[0].dispatch_queue_size, Some(3));
+    }
+}
diff --git a/crates/backends-core/src/lib.rs b/crates/backends-core/src/lib.rs
index 50f807b4..ee49d385 100644
--- a/crates/backends-core/src/lib.rs
+++ b/crates/backends-core/src/lib.rs
@@ -27,3 +27,10 @@ pub type InnerError = ();
 /// TODO: move away from the single-`Result` type aliases as we want to vary
 /// rrors per-call.
 pub type BackendResult<T, E = InnerError> = Result<T, BackendError<E>>;
+
+#[cfg(feature = "sqlx-error")]
+impl From<sqlx::Error> for BackendError<sqlx::Error> {
+    fn from(value: sqlx::Error) -> Self {
+        Self::Inner(value)
+    }
+}
diff --git a/crates/core-backend/src/lib.rs b/crates/core-backend/src/lib.rs
index e38f5cdd..5e876b29 100644
--- a/crates/core-backend/src/lib.rs
+++ b/crates/core-backend/src/lib.rs
@@ -4,7 +4,7 @@ mod data;
 
 use uuid::Uuid;
 
-pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_backends_core::BackendResult;
 
 pub use self::data::*;
 
diff --git a/crates/dag/Cargo.toml b/crates/dag/Cargo.toml
index 54abfd13..a8bda894 100644
--- a/crates/dag/Cargo.toml
+++ b/crates/dag/Cargo.toml
@@ -11,4 +11,4 @@ uuid = { workspace = true, features = ["serde", "v4"]  }
 waymark-proto = { workspace = true, features = ["serde"] }
 
 [dev-dependencies]
-waymark = { workspace = true }
+waymark-ir-parser = { workspace = true }
diff --git a/crates/dag/src/builder/test_helpers.rs b/crates/dag/src/builder/test_helpers.rs
index ed33ca00..0c9811c1 100644
--- a/crates/dag/src/builder/test_helpers.rs
+++ b/crates/dag/src/builder/test_helpers.rs
@@ -1,5 +1,5 @@
 use crate::{DAG, DAGConverter, convert_to_dag};
-use waymark::waymark_core::ir_parser::parse_program;
+use waymark_ir_parser::parse_program;
 use waymark_proto::ast as ir;
 
 pub(super) fn dedent(source: &str) -> String {
diff --git a/crates/dag/src/validate.rs b/crates/dag/src/validate.rs
index b6aa67e1..0e48504e 100644
--- a/crates/dag/src/validate.rs
+++ b/crates/dag/src/validate.rs
@@ -361,7 +361,7 @@ fn collect_expr_variables(expr: &ir::Expr, vars: &mut HashSet<String>) {
 mod tests {
     use super::validate_dag;
     use crate::convert_to_dag;
-    use waymark::waymark_core::ir_parser::parse_program;
+    use waymark_ir_parser::parse_program;
 
     #[test]
     fn validate_dag_rejects_unresolved_variable_reference() {
diff --git a/crates/garbage-collector-backend/src/lib.rs b/crates/garbage-collector-backend/src/lib.rs
index d3f2e234..af8badff 100644
--- a/crates/garbage-collector-backend/src/lib.rs
+++ b/crates/garbage-collector-backend/src/lib.rs
@@ -1,6 +1,6 @@
 use chrono::{DateTime, Utc};
 
-pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_backends_core::BackendResult;
 
 #[derive(Clone, Copy, Debug, Default)]
 /// Summary of a garbage collection sweep.
diff --git a/crates/integration-support/Cargo.toml b/crates/integration-support/Cargo.toml
new file mode 100644
index 00000000..1e24644c
--- /dev/null
+++ b/crates/integration-support/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-integration-support"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+anyhow = { workspace = true }
+sqlx = { workspace = true }
+tokio = { workspace = true, features = ["process", "time", "sync"] }
+waymark-backend-postgres-migrations = { workspace = true }
diff --git a/crates/integration-support/src/lib.rs b/crates/integration-support/src/lib.rs
new file mode 100644
index 00000000..db198a86
--- /dev/null
+++ b/crates/integration-support/src/lib.rs
@@ -0,0 +1,5 @@
+//! Shared integration harness helpers used by test binaries and Rust tests.
+
+mod postgres;
+
+pub use postgres::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
diff --git a/crates/integration-support/src/postgres.rs b/crates/integration-support/src/postgres.rs
new file mode 100644
index 00000000..59c9caec
--- /dev/null
+++ b/crates/integration-support/src/postgres.rs
@@ -0,0 +1,103 @@
+//! Shared Postgres bootstrap for integration harnesses.
+
+use std::path::PathBuf;
+use std::time::{Duration, Instant};
+
+use anyhow::{Context, Result, anyhow, bail};
+use sqlx::{PgPool, postgres::PgPoolOptions};
+use tokio::process::Command;
+use tokio::sync::OnceCell;
+
+pub const LOCAL_POSTGRES_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
+
+const READY_TIMEOUT: Duration = Duration::from_secs(45);
+const RETRY_DELAY: Duration = Duration::from_millis(500);
+const POOL_MAX_CONNECTIONS: u32 = 32;
+const POOL_ACQUIRE_TIMEOUT: Duration = Duration::from_secs(15);
+
+static LOCAL_POSTGRES_BOOTSTRAPPED: OnceCell<()> = OnceCell::const_new();
+
+/// Ensure the default local Postgres is available and migrated.
+///
+/// This helper is intended for local integration workflows where the default
+/// DSN maps to the repository docker-compose service.
+pub async fn ensure_local_postgres() -> Result<()> {
+    LOCAL_POSTGRES_BOOTSTRAPPED
+        .get_or_try_init(|| async { ensure_local_postgres_impl().await })
+        .await?;
+    Ok(())
+}
+
+/// Connect a PgPool using integration defaults.
+pub async fn connect_pool(dsn: &str) -> Result<PgPool> {
+    Ok(PgPoolOptions::new()
+        .max_connections(POOL_MAX_CONNECTIONS)
+        .acquire_timeout(POOL_ACQUIRE_TIMEOUT)
+        .connect(dsn)
+        .await?)
+}
+
+async fn ensure_local_postgres_impl() -> Result<()> {
+    if let Ok(pool) = connect_pool(LOCAL_POSTGRES_DSN).await {
+        waymark_backend_postgres_migrations::run(&pool)
+            .await
+            .context("run migrations for existing local postgres")?;
+        pool.close().await;
+        return Ok(());
+    }
+
+    run_compose_up().await?;
+    let pool = wait_for_postgres(LOCAL_POSTGRES_DSN).await?;
+    waymark_backend_postgres_migrations::run(&pool)
+        .await
+        .context("run migrations for local postgres")?;
+    pool.close().await;
+    Ok(())
+}
+
+async fn run_compose_up() -> Result<()> {
+    let root = project_root();
+    let status = Command::new("docker")
+        .arg("compose")
+        .arg("-f")
+        .arg("../../docker-compose.yml")
+        .arg("up")
+        .arg("-d")
+        .arg("postgres")
+        .current_dir(&root)
+        .status()
+        .await
+        .with_context(|| format!("failed to run docker compose in {}", root.display()))?;
+
+    if !status.success() {
+        bail!("docker compose up -d postgres exited with status {status}");
+    }
+
+    Ok(())
+}
+
+async fn wait_for_postgres(dsn: &str) -> Result<PgPool> {
+    let deadline = Instant::now() + READY_TIMEOUT;
+    let mut last_error = None;
+
+    while Instant::now() < deadline {
+        match connect_pool(dsn).await {
+            Ok(pool) => return Ok(pool),
+            Err(err) => {
+                last_error = Some(err);
+                tokio::time::sleep(RETRY_DELAY).await;
+            }
+        }
+    }
+
+    Err(anyhow!(
+        "timed out waiting for postgres at {dsn}; last error: {}",
+        last_error
+            .map(|err| err.to_string())
+            .unwrap_or_else(|| "unknown".to_string())
+    ))
+}
+
+fn project_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+}
diff --git a/crates/runner/src/lib.rs b/crates/runner/src/lib.rs
index ed59081f..684a49a0 100644
--- a/crates/runner/src/lib.rs
+++ b/crates/runner/src/lib.rs
@@ -4,7 +4,9 @@ pub mod executor;
 pub mod expression_evaluator;
 pub mod replay;
 pub(crate) mod retry;
-pub(crate) mod synthetic_exceptions;
+
+/// TODO: make `pub(crate)`
+pub mod synthetic_exceptions;
 
 pub use executor::{
     DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
diff --git a/crates/runner/src/synthetic_exceptions.rs b/crates/runner/src/synthetic_exceptions.rs
index df89b71f..5bd2be0d 100644
--- a/crates/runner/src/synthetic_exceptions.rs
+++ b/crates/runner/src/synthetic_exceptions.rs
@@ -3,13 +3,13 @@
 use serde_json::Value;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) enum SyntheticExceptionType {
+pub enum SyntheticExceptionType {
     ExecutorResume,
     ActionTimeout,
 }
 
 impl SyntheticExceptionType {
-    pub(crate) fn as_type_str(self) -> &'static str {
+    pub fn as_type_str(self) -> &'static str {
         match self {
             Self::ExecutorResume => "ExecutorResume",
             Self::ActionTimeout => "ActionTimeout",
@@ -24,7 +24,7 @@ impl SyntheticExceptionType {
         }
     }
 
-    pub(crate) fn from_value(value: &Value) -> Option<Self> {
+    pub fn from_value(value: &Value) -> Option<Self> {
         let Value::Object(map) = value else {
             return None;
         };
@@ -34,7 +34,7 @@ impl SyntheticExceptionType {
     }
 }
 
-pub(crate) fn build_synthetic_exception_value(
+pub fn build_synthetic_exception_value(
     exception_type: SyntheticExceptionType,
     message: impl Into<String>,
     fields: Vec<(String, Value)>,
diff --git a/crates/test-support/Cargo.toml b/crates/test-support/Cargo.toml
new file mode 100644
index 00000000..829d2395
--- /dev/null
+++ b/crates/test-support/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-test-support"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sqlx = { workspace = true }
+waymark-integration-support = { workspace = true }
diff --git a/crates/test-support/src/lib.rs b/crates/test-support/src/lib.rs
new file mode 100644
index 00000000..5e34abaa
--- /dev/null
+++ b/crates/test-support/src/lib.rs
@@ -0,0 +1,5 @@
+//! Shared test fixtures for Rust tests.
+
+mod postgres;
+
+pub use postgres::postgres_setup;
diff --git a/crates/test-support/src/postgres.rs b/crates/test-support/src/postgres.rs
new file mode 100644
index 00000000..e76bf812
--- /dev/null
+++ b/crates/test-support/src/postgres.rs
@@ -0,0 +1,15 @@
+//! Shared Postgres fixture bootstrapped from root docker-compose.
+
+use sqlx::PgPool;
+
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
+
+/// Ensure test Postgres is available and migrated, then return a pooled connection.
+pub async fn postgres_setup() -> PgPool {
+    ensure_local_postgres()
+        .await
+        .unwrap_or_else(|err| panic!("postgres_setup bootstrap failed: {err:#}"));
+    connect_pool(LOCAL_POSTGRES_DSN)
+        .await
+        .unwrap_or_else(|err| panic!("postgres_setup connect failed: {err:#}"))
+}
diff --git a/crates/webapp-backend/src/lib.rs b/crates/webapp-backend/src/lib.rs
index 354e0e67..bc8f365c 100644
--- a/crates/webapp-backend/src/lib.rs
+++ b/crates/webapp-backend/src/lib.rs
@@ -1,5 +1,5 @@
 use uuid::Uuid;
-pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_backends_core::BackendResult;
 use waymark_webapp_core::{
     ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary,
     ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus,
diff --git a/crates/webapp-core/src/lib.rs b/crates/webapp-core/src/lib.rs
index 7805c428..61a4a453 100644
--- a/crates/webapp-core/src/lib.rs
+++ b/crates/webapp-core/src/lib.rs
@@ -4,58 +4,6 @@ use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
-/// Configuration for the webapp server.
-#[derive(Debug, Clone)]
-pub struct WebappConfig {
-    pub enabled: bool,
-    pub host: String,
-    pub port: u16,
-}
-
-impl Default for WebappConfig {
-    fn default() -> Self {
-        Self {
-            enabled: false,
-            host: "0.0.0.0".to_string(),
-            port: 24119,
-        }
-    }
-}
-
-impl WebappConfig {
-    /// Create config from environment variables.
-    pub fn from_env() -> Self {
-        let enabled = std::env::var("WAYMARK_WEBAPP_ENABLED")
-            .map(|v| v == "true" || v == "1")
-            .unwrap_or(false);
-
-        let (host, port) = std::env::var("WAYMARK_WEBAPP_ADDR")
-            .ok()
-            .and_then(|addr| {
-                let parts: Vec<&str> = addr.split(':').collect();
-                if parts.len() == 2 {
-                    let host = parts[0].to_string();
-                    let port = parts[1].parse().ok()?;
-                    Some((host, port))
-                } else {
-                    None
-                }
-            })
-            .unwrap_or_else(|| ("0.0.0.0".to_string(), 24119));
-
-        Self {
-            enabled,
-            host,
-            port,
-        }
-    }
-
-    /// Get the bind address.
-    pub fn bind_addr(&self) -> String {
-        format!("{}:{}", self.host, self.port)
-    }
-}
-
 /// Instance status.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]

From f15dca096936ebfddc9ecdd3011a905d04b82107 Mon Sep 17 00:00:00 2001
From: MOZGIII <mike-n@narod.ru>
Date: Wed, 25 Feb 2026 21:46:00 +0400
Subject: [PATCH 5/5] Integrate separate crates into waymark main crate

---
 Cargo.lock                                    |   23 +-
 crates/fuzzer/Cargo.toml                      |    4 +
 crates/fuzzer/src/harness.rs                  |    8 +-
 crates/waymark/Cargo.toml                     |   23 +-
 crates/waymark/migrations/0001_init.sql       |  115 -
 .../0002_runner_actions_done_execution_id.sql |    7 -
 .../migrations/0003_instance_locks.sql        |   12 -
 .../migrations/0004_workflow_versions.sql     |   21 -
 ...5_runner_instances_workflow_version_id.sql |    7 -
 .../0006_drop_unused_runner_tables.sql        |    4 -
 .../0007_runner_instances_schedule_id.sql     |    5 -
 .../0008_runner_actions_done_timing.sql       |   14 -
 .../0009_instance_search_columns.sql          |   63 -
 crates/waymark/src/backends/base.rs           |  366 --
 crates/waymark/src/backends/memory.rs         |  814 -----
 crates/waymark/src/backends/mod.rs            |   15 -
 crates/waymark/src/backends/postgres/core.rs  | 1992 -----------
 crates/waymark/src/backends/postgres/mod.rs   |  116 -
 .../waymark/src/backends/postgres/registry.rs |  146 -
 .../src/backends/postgres/scheduler.rs        |  604 ----
 .../src/backends/postgres/test_helpers.rs     |   27 -
 .../waymark/src/backends/postgres/webapp.rs   | 2324 -------------
 crates/waymark/src/bin/integration_test.rs    |   15 +-
 crates/waymark/src/bin/soak-harness.rs        |   11 +-
 crates/waymark/src/bin/start-workers.rs       |    5 +-
 crates/waymark/src/bin/waymark-bridge.rs      |   22 +-
 crates/waymark/src/db.rs                      |   14 -
 crates/waymark/src/garbage_collector/task.rs  |    5 +-
 crates/waymark/src/integration_support/mod.rs |    5 -
 .../src/integration_support/postgres.rs       |  105 -
 crates/waymark/src/lib.rs                     |   10 +-
 crates/waymark/src/observability.rs           |    2 +-
 crates/waymark/src/scheduler/mod.rs           |    4 -
 crates/waymark/src/scheduler/task.rs          |   32 +-
 crates/waymark/src/scheduler/types.rs         |  139 -
 crates/waymark/src/scheduler/utils.rs         |  181 -
 crates/waymark/src/test_support/mod.rs        |    5 -
 crates/waymark/src/test_support/postgres.rs   |   15 -
 .../waymark/src/waymark_core/cli/benchmark.rs |   15 +-
 crates/waymark/src/waymark_core/cli/smoke.rs  |    8 +-
 crates/waymark/src/waymark_core/ir_format.rs  |    2 +-
 crates/waymark/src/waymark_core/lock.rs       |    5 +-
 crates/waymark/src/waymark_core/mod.rs        |    3 -
 crates/waymark/src/waymark_core/runloop.rs    |   32 +-
 .../waymark/src/waymark_core/runloop/tests.rs |  136 +-
 .../src/waymark_core/runner/executor.rs       | 3031 -----------------
 .../runner/expression_evaluator.rs            | 1058 ------
 crates/waymark/src/waymark_core/runner/mod.rs |   19 -
 .../waymark/src/waymark_core/runner/replay.rs |  658 ----
 .../waymark/src/waymark_core/runner/retry.rs  |  137 -
 .../waymark/src/waymark_core/runner/state.rs  | 2201 ------------
 .../runner/synthetic_exceptions.rs            |   90 -
 .../src/waymark_core/runner/value_visitor.rs  |  533 ---
 crates/waymark/src/webapp/server.rs           |   45 +-
 crates/waymark/src/webapp/types.rs            |  248 +-
 crates/waymark/src/workers/status.rs          |    2 +-
 56 files changed, 162 insertions(+), 15341 deletions(-)
 delete mode 100644 crates/waymark/migrations/0001_init.sql
 delete mode 100644 crates/waymark/migrations/0002_runner_actions_done_execution_id.sql
 delete mode 100644 crates/waymark/migrations/0003_instance_locks.sql
 delete mode 100644 crates/waymark/migrations/0004_workflow_versions.sql
 delete mode 100644 crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql
 delete mode 100644 crates/waymark/migrations/0006_drop_unused_runner_tables.sql
 delete mode 100644 crates/waymark/migrations/0007_runner_instances_schedule_id.sql
 delete mode 100644 crates/waymark/migrations/0008_runner_actions_done_timing.sql
 delete mode 100644 crates/waymark/migrations/0009_instance_search_columns.sql
 delete mode 100644 crates/waymark/src/backends/base.rs
 delete mode 100644 crates/waymark/src/backends/memory.rs
 delete mode 100644 crates/waymark/src/backends/mod.rs
 delete mode 100644 crates/waymark/src/backends/postgres/core.rs
 delete mode 100644 crates/waymark/src/backends/postgres/mod.rs
 delete mode 100644 crates/waymark/src/backends/postgres/registry.rs
 delete mode 100644 crates/waymark/src/backends/postgres/scheduler.rs
 delete mode 100644 crates/waymark/src/backends/postgres/test_helpers.rs
 delete mode 100644 crates/waymark/src/backends/postgres/webapp.rs
 delete mode 100644 crates/waymark/src/db.rs
 delete mode 100644 crates/waymark/src/integration_support/mod.rs
 delete mode 100644 crates/waymark/src/integration_support/postgres.rs
 delete mode 100644 crates/waymark/src/scheduler/types.rs
 delete mode 100644 crates/waymark/src/scheduler/utils.rs
 delete mode 100644 crates/waymark/src/test_support/mod.rs
 delete mode 100644 crates/waymark/src/test_support/postgres.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/executor.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/expression_evaluator.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/mod.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/replay.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/retry.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/state.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs
 delete mode 100644 crates/waymark/src/waymark_core/runner/value_visitor.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9737bb86..08e29a10 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3366,10 +3366,27 @@ dependencies = [
  "tracing-chrome",
  "tracing-subscriber",
  "uuid",
+ "waymark-backend-fault-injection",
+ "waymark-backend-memory",
+ "waymark-backend-postgres",
+ "waymark-backend-postgres-migrations",
+ "waymark-backends-core",
+ "waymark-core-backend",
  "waymark-dag",
+ "waymark-garbage-collector-backend",
+ "waymark-integration-support",
  "waymark-ir-parser",
- "waymark-observability-macros",
+ "waymark-observability",
  "waymark-proto",
+ "waymark-runner",
+ "waymark-runner-state",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-test-support",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
 ]
 
 [[package]]
@@ -3493,8 +3510,12 @@ dependencies = [
  "tokio",
  "uuid",
  "waymark",
+ "waymark-backend-memory",
+ "waymark-core-backend",
  "waymark-dag",
  "waymark-ir-parser",
+ "waymark-runner-state",
+ "waymark-workflow-registry-backend",
 ]
 
 [[package]]
diff --git a/crates/fuzzer/Cargo.toml b/crates/fuzzer/Cargo.toml
index 4e0fd1e0..8c7c039d 100644
--- a/crates/fuzzer/Cargo.toml
+++ b/crates/fuzzer/Cargo.toml
@@ -15,3 +15,7 @@ tokio = { workspace = true }
 waymark = { workspace = true }
 waymark-dag = { workspace = true }
 waymark-ir-parser = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
diff --git a/crates/fuzzer/src/harness.rs b/crates/fuzzer/src/harness.rs
index 59dc38c2..2bec4043 100644
--- a/crates/fuzzer/src/harness.rs
+++ b/crates/fuzzer/src/harness.rs
@@ -9,17 +9,17 @@ use prost::Message;
 use serde_json::Value;
 use sha2::{Digest, Sha256};
 use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
 use super::generator::GeneratedCase;
-use waymark::backends::{
-    MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
 use waymark::messages::ast as ir;
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError};
 use waymark_dag::convert_to_dag;
 use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
 
 pub async fn run_case(case_index: usize, case: &GeneratedCase) -> Result<()> {
     let program = parse_program(case.source.trim()).map_err(|err| {
diff --git a/crates/waymark/Cargo.toml b/crates/waymark/Cargo.toml
index 83c06cd3..91b817bf 100644
--- a/crates/waymark/Cargo.toml
+++ b/crates/waymark/Cargo.toml
@@ -17,9 +17,25 @@ name = "smoke"
 path = "src/bin/smoke.rs"
 
 [dependencies]
-waymark-proto = { workspace = true, features = ["serde", "client", "server"] }
+waymark-core-backend = { workspace = true }
 waymark-dag = { workspace = true }
 waymark-ir-parser = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-proto = { workspace = true, features = ["serde", "client", "server"] }
+waymark-runner = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-webapp-backend = { workspace = true }
+waymark-webapp-core = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true }
+waymark-scheduler-backend = { workspace = true }
+waymark-scheduler-core = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-integration-support = { workspace = true }
+waymark-backend-postgres = { workspace = true }
+waymark-backend-postgres-migrations = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-backend-memory = { workspace = true }
 
 anyhow = "1"
 axum = "0.8"
@@ -51,7 +67,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 tracing-chrome = "0.7"
 metrics = "0.24"
 regex = "1"
-waymark-observability-macros = { path = "../observability-macros" }
 console-subscriber = { version = "0.5", optional = true }
 
 [features]
@@ -59,6 +74,10 @@ trace = []
 observability = ["trace", "dep:console-subscriber"]
 
 [dev-dependencies]
+waymark-backend-fault-injection = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-test-support = { workspace = true }
+
 serial_test = "2"
 tower = { version = "0.5", features = ["util"] }
 http-body-util = "0.1"
diff --git a/crates/waymark/migrations/0001_init.sql b/crates/waymark/migrations/0001_init.sql
deleted file mode 100644
index dbb6b7da..00000000
--- a/crates/waymark/migrations/0001_init.sql
+++ /dev/null
@@ -1,115 +0,0 @@
--- Waymark core schema (baseline)
-
-CREATE EXTENSION IF NOT EXISTS pgcrypto;
-
--- ---------------------------------------------------------------------------
--- Workflow definitions
--- ---------------------------------------------------------------------------
-
-CREATE TABLE workflow_versions (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    workflow_name TEXT NOT NULL,
-    dag_hash TEXT NOT NULL,
-    program_proto BYTEA NOT NULL,
-    concurrent BOOLEAN NOT NULL DEFAULT false,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    UNIQUE(workflow_name, dag_hash)
-);
-
-CREATE INDEX idx_workflow_versions_name ON workflow_versions(workflow_name);
-
--- ---------------------------------------------------------------------------
--- Runner persistence tables
--- ---------------------------------------------------------------------------
-
-CREATE TABLE runner_graph_updates (
-    id BIGSERIAL PRIMARY KEY,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    state BYTEA NOT NULL
-);
-
-CREATE TABLE runner_actions_done (
-    id BIGSERIAL PRIMARY KEY,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    node_id UUID NOT NULL,
-    action_name TEXT NOT NULL,
-    attempt INTEGER NOT NULL,
-    result BYTEA
-);
-
-CREATE TABLE runner_instances (
-    instance_id UUID PRIMARY KEY,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    entry_node UUID NOT NULL,
-    state BYTEA,
-    result BYTEA,
-    error BYTEA
-);
-
-CREATE TABLE runner_instances_done (
-    id BIGSERIAL PRIMARY KEY,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    executor_id UUID NOT NULL,
-    entry_node UUID NOT NULL,
-    result BYTEA,
-    error BYTEA
-);
-
-CREATE TABLE queued_instances (
-    instance_id UUID PRIMARY KEY,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    payload BYTEA NOT NULL
-);
-
--- ---------------------------------------------------------------------------
--- Scheduler
--- ---------------------------------------------------------------------------
-
-CREATE TABLE workflow_schedules (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    workflow_name TEXT NOT NULL,
-    schedule_name TEXT NOT NULL,
-    schedule_type TEXT NOT NULL,
-    cron_expression TEXT,
-    interval_seconds BIGINT,
-    jitter_seconds BIGINT NOT NULL DEFAULT 0,
-    input_payload BYTEA,
-    status TEXT NOT NULL DEFAULT 'active',
-    next_run_at TIMESTAMPTZ,
-    last_run_at TIMESTAMPTZ,
-    last_instance_id UUID,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    priority INT NOT NULL DEFAULT 0,
-    allow_duplicate BOOLEAN NOT NULL DEFAULT false,
-    UNIQUE(workflow_name, schedule_name)
-);
-
-CREATE INDEX idx_schedules_due ON workflow_schedules(next_run_at)
-    WHERE status = 'active' AND next_run_at IS NOT NULL;
-
--- ---------------------------------------------------------------------------
--- Worker status metrics
--- ---------------------------------------------------------------------------
-
-CREATE TABLE worker_status (
-    pool_id UUID NOT NULL,
-    worker_id BIGINT NOT NULL,
-    throughput_per_min DOUBLE PRECISION NOT NULL DEFAULT 0,
-    total_completed BIGINT NOT NULL DEFAULT 0,
-    last_action_at TIMESTAMPTZ,
-    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    median_dequeue_ms BIGINT,
-    median_handling_ms BIGINT,
-    dispatch_queue_size BIGINT,
-    total_in_flight BIGINT,
-    active_workers INT NOT NULL DEFAULT 0,
-    actions_per_sec DOUBLE PRECISION NOT NULL DEFAULT 0,
-    median_instance_duration_secs DOUBLE PRECISION,
-    active_instance_count INT NOT NULL DEFAULT 0,
-    total_instances_completed BIGINT NOT NULL DEFAULT 0,
-    instances_per_sec DOUBLE PRECISION NOT NULL DEFAULT 0,
-    instances_per_min DOUBLE PRECISION NOT NULL DEFAULT 0,
-    time_series BYTEA,
-    PRIMARY KEY (pool_id, worker_id)
-);
diff --git a/crates/waymark/migrations/0002_runner_actions_done_execution_id.sql b/crates/waymark/migrations/0002_runner_actions_done_execution_id.sql
deleted file mode 100644
index b4bce178..00000000
--- a/crates/waymark/migrations/0002_runner_actions_done_execution_id.sql
+++ /dev/null
@@ -1,7 +0,0 @@
--- Rename runner action identifier to execution_id and drop stored action name.
-
-ALTER TABLE runner_actions_done
-    RENAME COLUMN node_id TO execution_id;
-
-ALTER TABLE runner_actions_done
-    DROP COLUMN action_name;
diff --git a/crates/waymark/migrations/0003_instance_locks.sql b/crates/waymark/migrations/0003_instance_locks.sql
deleted file mode 100644
index 6b826d18..00000000
--- a/crates/waymark/migrations/0003_instance_locks.sql
+++ /dev/null
@@ -1,12 +0,0 @@
--- Add scheduling and locking for queued instances.
-
-ALTER TABLE queued_instances
-    ADD COLUMN scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-    ADD COLUMN lock_uuid UUID,
-    ADD COLUMN lock_expires_at TIMESTAMPTZ;
-
-CREATE INDEX IF NOT EXISTS idx_queued_instances_scheduled_at
-    ON queued_instances(scheduled_at);
-
-CREATE INDEX IF NOT EXISTS idx_queued_instances_lock_expires_at
-    ON queued_instances(lock_expires_at);
diff --git a/crates/waymark/migrations/0004_workflow_versions.sql b/crates/waymark/migrations/0004_workflow_versions.sql
deleted file mode 100644
index daf3b54d..00000000
--- a/crates/waymark/migrations/0004_workflow_versions.sql
+++ /dev/null
@@ -1,21 +0,0 @@
--- Workflow versions: replace dag_hash with workflow_version + ir_hash
-
-ALTER TABLE workflow_versions
-    RENAME COLUMN dag_hash TO workflow_version;
-
-ALTER TABLE workflow_versions
-    ADD COLUMN ir_hash TEXT;
-
-UPDATE workflow_versions
-SET ir_hash = workflow_version
-WHERE ir_hash IS NULL;
-
-ALTER TABLE workflow_versions
-    ALTER COLUMN ir_hash SET NOT NULL;
-
-ALTER TABLE workflow_versions
-    DROP CONSTRAINT IF EXISTS workflow_versions_workflow_name_dag_hash_key;
-
-ALTER TABLE workflow_versions
-    ADD CONSTRAINT workflow_versions_workflow_name_version_key
-    UNIQUE (workflow_name, workflow_version);
diff --git a/crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql b/crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql
deleted file mode 100644
index 6d09937b..00000000
--- a/crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql
+++ /dev/null
@@ -1,7 +0,0 @@
--- Persist workflow version on instances so webapp can show workflow names.
-
-ALTER TABLE runner_instances
-    ADD COLUMN workflow_version_id UUID;
-
-CREATE INDEX IF NOT EXISTS idx_runner_instances_workflow_version_id
-    ON runner_instances(workflow_version_id);
diff --git a/crates/waymark/migrations/0006_drop_unused_runner_tables.sql b/crates/waymark/migrations/0006_drop_unused_runner_tables.sql
deleted file mode 100644
index d3b1f272..00000000
--- a/crates/waymark/migrations/0006_drop_unused_runner_tables.sql
+++ /dev/null
@@ -1,4 +0,0 @@
--- Remove legacy tables no longer used by runtime or webapp.
-
-DROP TABLE IF EXISTS runner_graph_updates;
-DROP TABLE IF EXISTS runner_instances_done;
diff --git a/crates/waymark/migrations/0007_runner_instances_schedule_id.sql b/crates/waymark/migrations/0007_runner_instances_schedule_id.sql
deleted file mode 100644
index 06cb1385..00000000
--- a/crates/waymark/migrations/0007_runner_instances_schedule_id.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-ALTER TABLE runner_instances
-ADD COLUMN IF NOT EXISTS schedule_id UUID;
-
-CREATE INDEX IF NOT EXISTS idx_runner_instances_schedule_id_created_at
-    ON runner_instances(schedule_id, created_at DESC);
diff --git a/crates/waymark/migrations/0008_runner_actions_done_timing.sql b/crates/waymark/migrations/0008_runner_actions_done_timing.sql
deleted file mode 100644
index b1b5551d..00000000
--- a/crates/waymark/migrations/0008_runner_actions_done_timing.sql
+++ /dev/null
@@ -1,14 +0,0 @@
--- Persist per-attempt lifecycle metadata for action history and timeline rendering.
-
-ALTER TABLE runner_actions_done
-    ADD COLUMN status TEXT,
-    ADD COLUMN started_at TIMESTAMPTZ,
-    ADD COLUMN completed_at TIMESTAMPTZ,
-    ADD COLUMN duration_ms BIGINT;
-
-ALTER TABLE runner_actions_done
-    ADD CONSTRAINT runner_actions_done_status_check
-    CHECK (status IS NULL OR status IN ('completed', 'failed', 'timed_out'));
-
-CREATE INDEX idx_runner_actions_done_execution_attempt
-    ON runner_actions_done (execution_id, attempt);
diff --git a/crates/waymark/migrations/0009_instance_search_columns.sql b/crates/waymark/migrations/0009_instance_search_columns.sql
deleted file mode 100644
index 948c6aca..00000000
--- a/crates/waymark/migrations/0009_instance_search_columns.sql
+++ /dev/null
@@ -1,63 +0,0 @@
--- Persist workflow/status instance metadata for indexed search in webapp queries.
-
-ALTER TABLE runner_instances
-    ADD COLUMN IF NOT EXISTS workflow_name TEXT,
-    ADD COLUMN IF NOT EXISTS current_status TEXT;
-
-ALTER TABLE queued_instances
-    ADD COLUMN IF NOT EXISTS workflow_name TEXT,
-    ADD COLUMN IF NOT EXISTS current_status TEXT;
-
-UPDATE runner_instances AS ri
-SET workflow_name = wv.workflow_name
-FROM workflow_versions wv
-WHERE ri.workflow_name IS NULL
-  AND ri.workflow_version_id = wv.id;
-
-UPDATE runner_instances
-SET current_status = CASE
-    WHEN error IS NOT NULL THEN 'failed'
-    WHEN result IS NOT NULL THEN 'completed'
-    WHEN state IS NOT NULL THEN 'running'
-    ELSE 'queued'
-END
-WHERE current_status IS NULL;
-
-UPDATE queued_instances AS qi
-SET workflow_name = ri.workflow_name
-FROM runner_instances ri
-WHERE qi.workflow_name IS NULL
-  AND qi.instance_id = ri.instance_id;
-
-UPDATE queued_instances
-SET current_status = CASE
-    WHEN lock_uuid IS NULL THEN 'queued'
-    ELSE 'running'
-END
-WHERE current_status IS NULL;
-
-ALTER TABLE runner_instances
-    ADD CONSTRAINT runner_instances_current_status_check
-    CHECK (
-        current_status IS NULL
-        OR current_status IN ('queued', 'running', 'completed', 'failed')
-    );
-
-ALTER TABLE queued_instances
-    ADD CONSTRAINT queued_instances_current_status_check
-    CHECK (
-        current_status IS NULL
-        OR current_status IN ('queued', 'running')
-    );
-
-CREATE INDEX IF NOT EXISTS idx_runner_instances_workflow_name
-    ON runner_instances(workflow_name);
-
-CREATE INDEX IF NOT EXISTS idx_runner_instances_current_status
-    ON runner_instances(current_status);
-
-CREATE INDEX IF NOT EXISTS idx_queued_instances_workflow_name
-    ON queued_instances(workflow_name);
-
-CREATE INDEX IF NOT EXISTS idx_queued_instances_current_status
-    ON queued_instances(current_status);
diff --git a/crates/waymark/src/backends/base.rs b/crates/waymark/src/backends/base.rs
deleted file mode 100644
index 92c17a3f..00000000
--- a/crates/waymark/src/backends/base.rs
+++ /dev/null
@@ -1,366 +0,0 @@
-//! Backend interfaces for persisting runner state and action results.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Deserializer, Serialize};
-use serde_json::Value;
-use tonic::async_trait;
-use uuid::Uuid;
-
-use crate::scheduler::{CreateScheduleParams, ScheduleId, WorkflowSchedule};
-use crate::waymark_core::runner::state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
-use crate::webapp::{
-    ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary,
-    ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus,
-};
-use waymark_dag::DAG;
-
-#[derive(Debug, thiserror::Error)]
-pub enum BackendError {
-    #[error("{0}")]
-    Message(String),
-    #[error(transparent)]
-    Sqlx(#[from] sqlx::Error),
-    #[error(transparent)]
-    Serialization(#[from] serde_json::Error),
-}
-
-pub type BackendResult<T> = Result<T, BackendError>;
-
-fn default_instance_id() -> Uuid {
-    Uuid::new_v4()
-}
-
-fn default_action_results() -> HashMap<Uuid, Value> {
-    HashMap::new()
-}
-
-fn deserialize_action_results<'de, D>(deserializer: D) -> Result<HashMap<Uuid, Value>, D::Error>
-where
-    D: Deserializer<'de>,
-{
-    let value = Option::<HashMap<Uuid, Value>>::deserialize(deserializer)?;
-    Ok(value.unwrap_or_default())
-}
-
-// The models that we use for our backends are similar to the ones that we
-// have specified in our database/Postgres backend, but not 1:1. It's better for
-// us to internally convert within the given backend
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Queued instance payload for the run loop.
-pub struct QueuedInstance {
-    pub workflow_version_id: Uuid,
-    #[serde(default)]
-    pub schedule_id: Option<Uuid>,
-    #[serde(skip, default)]
-    pub dag: Option<Arc<DAG>>,
-    pub entry_node: Uuid,
-    pub state: Option<RunnerState>,
-    #[serde(
-        default = "default_action_results",
-        deserialize_with = "deserialize_action_results"
-    )]
-    pub action_results: HashMap<Uuid, Value>,
-    #[serde(default = "default_instance_id")]
-    pub instance_id: Uuid,
-    #[serde(default)]
-    pub scheduled_at: Option<DateTime<Utc>>,
-}
-
-#[derive(Clone, Debug)]
-/// Result payload for queued instance polling.
-pub struct QueuedInstanceBatch {
-    pub instances: Vec<QueuedInstance>,
-}
-
-#[derive(Clone, Debug)]
-/// Lock claim settings for owned instances.
-pub struct LockClaim {
-    pub lock_uuid: Uuid,
-    pub lock_expires_at: DateTime<Utc>,
-}
-
-#[derive(Clone, Debug)]
-/// Current lock status for an instance.
-pub struct InstanceLockStatus {
-    pub instance_id: Uuid,
-    pub lock_uuid: Option<Uuid>,
-    pub lock_expires_at: Option<DateTime<Utc>>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Completed instance payload with result or exception.
-pub struct InstanceDone {
-    pub executor_id: Uuid,
-    pub entry_node: Uuid,
-    pub result: Option<Value>,
-    pub error: Option<Value>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Batch payload representing an updated execution graph snapshot.
-///
-/// This intentionally stores only runtime nodes and edges (no DAG template or
-/// derived caches) so persistence stays lightweight.
-pub struct GraphUpdate {
-    pub instance_id: Uuid,
-    pub nodes: HashMap<Uuid, ExecutionNode>,
-    pub edges: HashSet<ExecutionEdge>,
-}
-
-impl GraphUpdate {
-    pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self {
-        Self {
-            instance_id,
-            nodes: state.nodes.clone(),
-            edges: state.edges.clone(),
-        }
-    }
-
-    pub fn next_scheduled_at(&self) -> DateTime<Utc> {
-        let mut next: Option<DateTime<Utc>> = None;
-        for node in self.nodes.values() {
-            if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) {
-                continue;
-            }
-            if let Some(scheduled_at) = node.scheduled_at {
-                next = Some(match next {
-                    Some(existing) => existing.min(scheduled_at),
-                    None => scheduled_at,
-                });
-            }
-        }
-        next.unwrap_or_else(Utc::now)
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Batch payload representing a finished action attempt (success or failure).
-pub struct ActionDone {
-    pub execution_id: Uuid,
-    pub attempt: i32,
-    pub status: ActionAttemptStatus,
-    pub started_at: Option<DateTime<Utc>>,
-    pub completed_at: Option<DateTime<Utc>>,
-    pub duration_ms: Option<i64>,
-    pub result: Value,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ActionAttemptStatus {
-    Completed,
-    Failed,
-    TimedOut,
-}
-
-impl std::fmt::Display for ActionAttemptStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Completed => write!(f, "completed"),
-            Self::Failed => write!(f, "failed"),
-            Self::TimedOut => write!(f, "timed_out"),
-        }
-    }
-}
-
-/// Worker status update for persistence.
-#[derive(Clone, Debug)]
-pub struct WorkerStatusUpdate {
-    pub pool_id: Uuid,
-    pub throughput_per_min: f64,
-    pub total_completed: i64,
-    pub last_action_at: Option<chrono::DateTime<chrono::Utc>>,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub dispatch_queue_size: i64,
-    pub total_in_flight: i64,
-    pub active_workers: i32,
-    pub actions_per_sec: f64,
-    pub median_instance_duration_secs: Option<f64>,
-    pub active_instance_count: i32,
-    pub total_instances_completed: i64,
-    pub instances_per_sec: f64,
-    pub instances_per_min: f64,
-    pub time_series: Option<Vec<u8>>,
-}
-
-/// Backend capability for recording worker status metrics.
-#[async_trait]
-pub trait WorkerStatusBackend: Send + Sync {
-    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>;
-}
-
-/// Abstract persistence backend for runner state.
-#[async_trait]
-pub trait CoreBackend: Send + Sync {
-    fn clone_box(&self) -> Box<dyn CoreBackend>;
-
-    /// Persist updated execution graphs.
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>>;
-
-    /// Persist finished action attempts (success or failure).
-    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>;
-
-    /// Return up to size queued instances without blocking.
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch>;
-
-    /// Refresh lock expiry for owned instances.
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>>;
-
-    /// Release instance locks when evicting from memory.
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()>;
-
-    /// Persist completed workflow instances.
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>;
-
-    /// Insert queued instances for run-loop consumption.
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>;
-}
-
-/// Registration payload for storing workflow DAG metadata.
-#[derive(Clone, Debug)]
-pub struct WorkflowRegistration {
-    pub workflow_name: String,
-    pub workflow_version: String,
-    pub ir_hash: String,
-    pub program_proto: Vec<u8>,
-    pub concurrent: bool,
-}
-
-#[derive(Clone, Debug)]
-/// Stored workflow version metadata and IR payload.
-pub struct WorkflowVersion {
-    pub id: Uuid,
-    pub workflow_name: String,
-    pub workflow_version: String,
-    pub ir_hash: String,
-    pub program_proto: Vec<u8>,
-    pub concurrent: bool,
-}
-
-/// Backend capability for registering workflow DAGs.
-#[async_trait]
-pub trait WorkflowRegistryBackend: Send + Sync {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid>;
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>>;
-}
-
-/// Backend capability for workflow schedule persistence.
-#[async_trait]
-pub trait SchedulerBackend: Send + Sync {
-    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId>;
-    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule>;
-    async fn get_schedule_by_name(
-        &self,
-        workflow_name: &str,
-        schedule_name: &str,
-    ) -> BackendResult<Option<WorkflowSchedule>>;
-    async fn list_schedules(&self, limit: i64, offset: i64)
-    -> BackendResult<Vec<WorkflowSchedule>>;
-    async fn count_schedules(&self) -> BackendResult<i64>;
-    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool>;
-    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool>;
-    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>>;
-    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool>;
-    async fn mark_schedule_executed(
-        &self,
-        schedule_id: ScheduleId,
-        instance_id: Uuid,
-    ) -> BackendResult<()>;
-    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>;
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-/// Summary of a garbage collection sweep.
-pub struct GarbageCollectionResult {
-    pub deleted_instances: usize,
-    pub deleted_actions: usize,
-}
-
-/// Backend capability for deleting old finished workflow data.
-#[async_trait]
-pub trait GarbageCollectorBackend: Send + Sync {
-    async fn collect_done_instances(
-        &self,
-        older_than: DateTime<Utc>,
-        limit: usize,
-    ) -> BackendResult<GarbageCollectionResult>;
-}
-
-/// Backend capability for webapp-specific queries.
-#[async_trait]
-pub trait WebappBackend: Send + Sync {
-    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64>;
-    async fn list_instances(
-        &self,
-        search: Option<&str>,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<InstanceSummary>>;
-    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail>;
-    async fn get_execution_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>>;
-    async fn get_workflow_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>>;
-    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>>;
-    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>>;
-    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>>;
-    async fn count_schedules(&self) -> BackendResult<i64>;
-    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>>;
-    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail>;
-    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64>;
-    async fn list_schedule_invocations(
-        &self,
-        schedule_id: Uuid,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<ScheduleInvocationSummary>>;
-    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool>;
-    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>>;
-    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>>;
-    async fn get_worker_action_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<Vec<WorkerActionRow>>;
-    async fn get_worker_aggregate_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<WorkerAggregateStats>;
-    async fn worker_status_table_exists(&self) -> bool;
-    async fn schedules_table_exists(&self) -> bool;
-    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>>;
-}
-
-impl Clone for Box<dyn CoreBackend> {
-    fn clone(&self) -> Self {
-        self.clone_box()
-    }
-}
diff --git a/crates/waymark/src/backends/memory.rs b/crates/waymark/src/backends/memory.rs
deleted file mode 100644
index c49bc6e0..00000000
--- a/crates/waymark/src/backends/memory.rs
+++ /dev/null
@@ -1,814 +0,0 @@
-//! In-memory backend that prints persistence operations.
-
-use std::collections::{HashMap, VecDeque};
-use std::sync::{Arc, Mutex};
-
-use chrono::{DateTime, Utc};
-use uuid::Uuid;
-
-use super::base::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult,
-    GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
-    QueuedInstance, QueuedInstanceBatch, SchedulerBackend, WebappBackend, WorkerStatusBackend,
-    WorkerStatusUpdate, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
-use crate::scheduler::compute_next_run;
-use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
-use crate::webapp::{
-    ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail,
-    ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow,
-    WorkerAggregateStats, WorkerStatus,
-};
-use tonic::async_trait;
-
-type WorkflowVersionKey = (String, String);
-type WorkflowVersionValue = (Uuid, WorkflowRegistration);
-type WorkflowVersionStore = HashMap<WorkflowVersionKey, WorkflowVersionValue>;
-type InstanceLockStore = HashMap<Uuid, (Option<Uuid>, Option<DateTime<Utc>>)>;
-
-/// Backend that stores updates in memory for tests or local runs.
-#[derive(Clone)]
-pub struct MemoryBackend {
-    instance_queue: Option<Arc<Mutex<VecDeque<QueuedInstance>>>>,
-    graph_updates: Arc<Mutex<Vec<GraphUpdate>>>,
-    actions_done: Arc<Mutex<Vec<ActionDone>>>,
-    instances_done: Arc<Mutex<Vec<InstanceDone>>>,
-    worker_status_updates: Arc<Mutex<Vec<WorkerStatusUpdate>>>,
-    workflow_versions: Arc<Mutex<WorkflowVersionStore>>,
-    schedules: Arc<Mutex<HashMap<ScheduleId, WorkflowSchedule>>>,
-    instance_locks: Arc<Mutex<InstanceLockStore>>,
-}
-
-impl Default for MemoryBackend {
-    fn default() -> Self {
-        Self {
-            instance_queue: None,
-            graph_updates: Arc::new(Mutex::new(Vec::new())),
-            actions_done: Arc::new(Mutex::new(Vec::new())),
-            instances_done: Arc::new(Mutex::new(Vec::new())),
-            worker_status_updates: Arc::new(Mutex::new(Vec::new())),
-            workflow_versions: Arc::new(Mutex::new(HashMap::new())),
-            schedules: Arc::new(Mutex::new(HashMap::new())),
-            instance_locks: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-}
-
-impl MemoryBackend {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn with_queue(queue: Arc<Mutex<VecDeque<QueuedInstance>>>) -> Self {
-        Self {
-            instance_queue: Some(queue),
-            ..Self::default()
-        }
-    }
-
-    pub fn instance_queue(&self) -> Option<Arc<Mutex<VecDeque<QueuedInstance>>>> {
-        self.instance_queue.clone()
-    }
-
-    pub fn graph_updates(&self) -> Vec<GraphUpdate> {
-        self.graph_updates
-            .lock()
-            .expect("graph updates poisoned")
-            .clone()
-    }
-
-    pub fn actions_done(&self) -> Vec<ActionDone> {
-        self.actions_done
-            .lock()
-            .expect("actions done poisoned")
-            .clone()
-    }
-
-    pub fn instances_done(&self) -> Vec<InstanceDone> {
-        self.instances_done
-            .lock()
-            .expect("instances done poisoned")
-            .clone()
-    }
-
-    pub fn worker_status_updates(&self) -> Vec<WorkerStatusUpdate> {
-        self.worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned")
-            .clone()
-    }
-}
-
-#[async_trait]
-impl CoreBackend for MemoryBackend {
-    fn clone_box(&self) -> Box<dyn CoreBackend> {
-        Box::new(self.clone())
-    }
-
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        let mut stored = self.graph_updates.lock().expect("graph updates poisoned");
-        stored.extend(graphs.iter().cloned());
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        let mut locks = Vec::with_capacity(graphs.len());
-        for graph in graphs {
-            if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id)
-                && *lock_uuid == claim.lock_uuid
-                && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at)
-            {
-                *lock_expires_at = Some(claim.lock_expires_at);
-            }
-            let (lock_uuid, lock_expires_at) = guard
-                .get(&graph.instance_id)
-                .cloned()
-                .unwrap_or((None, None));
-            locks.push(InstanceLockStatus {
-                instance_id: graph.instance_id,
-                lock_uuid,
-                lock_expires_at,
-            });
-        }
-        Ok(locks)
-    }
-
-    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
-        let mut stored = self.actions_done.lock().expect("actions done poisoned");
-        stored.extend(actions.iter().cloned());
-        Ok(())
-    }
-
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        let mut stored = self.instances_done.lock().expect("instances done poisoned");
-        stored.extend(instances.iter().cloned());
-        if !instances.is_empty() {
-            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
-            for instance in instances {
-                locks.remove(&instance.executor_id);
-            }
-        }
-        Ok(())
-    }
-
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        if size == 0 {
-            return Ok(QueuedInstanceBatch {
-                instances: Vec::new(),
-            });
-        }
-        let queue = match &self.instance_queue {
-            Some(queue) => queue,
-            None => {
-                return Ok(QueuedInstanceBatch {
-                    instances: Vec::new(),
-                });
-            }
-        };
-        let mut guard = queue.lock().expect("instance queue poisoned");
-        let now = Utc::now();
-        let mut instances = Vec::new();
-        while instances.len() < size {
-            let Some(instance) = guard.front() else {
-                break;
-            };
-            if let Some(scheduled_at) = instance.scheduled_at
-                && scheduled_at > now
-            {
-                break;
-            }
-            let instance = guard.pop_front().expect("instance queue empty");
-            instances.push(instance);
-        }
-        if !instances.is_empty() {
-            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
-            for instance in &instances {
-                locks.insert(
-                    instance.instance_id,
-                    (Some(claim.lock_uuid), Some(claim.lock_expires_at)),
-                );
-            }
-        }
-        Ok(QueuedInstanceBatch { instances })
-    }
-
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
-        if instances.is_empty() {
-            return Ok(());
-        }
-        let queue = self.instance_queue.as_ref().ok_or_else(|| {
-            BackendError::Message("memory backend missing instance queue".to_string())
-        })?;
-        let mut guard = queue.lock().expect("instance queue poisoned");
-        for instance in instances {
-            guard.push_back(instance.clone());
-        }
-        Ok(())
-    }
-
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        let mut locks = Vec::new();
-        for instance_id in instance_ids {
-            let entry = guard
-                .entry(*instance_id)
-                .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at)));
-            if entry.0 == Some(claim.lock_uuid) {
-                entry.1 = Some(claim.lock_expires_at);
-            }
-            locks.push(InstanceLockStatus {
-                instance_id: *instance_id,
-                lock_uuid: entry.0,
-                lock_expires_at: entry.1,
-            });
-        }
-        Ok(locks)
-    }
-
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()> {
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        for instance_id in instance_ids {
-            if let Some((current_lock, _)) = guard.get(instance_id)
-                && *current_lock == Some(lock_uuid)
-            {
-                guard.remove(instance_id);
-            }
-        }
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl WorkerStatusBackend for MemoryBackend {
-    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
-        let mut stored = self
-            .worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned");
-        stored.push(status.clone());
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl WorkflowRegistryBackend for MemoryBackend {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid> {
-        let mut guard = self
-            .workflow_versions
-            .lock()
-            .expect("workflow versions poisoned");
-        let key = (
-            registration.workflow_name.clone(),
-            registration.workflow_version.clone(),
-        );
-        if let Some((id, existing)) = guard.get(&key) {
-            if existing.ir_hash != registration.ir_hash {
-                return Err(BackendError::Message(format!(
-                    "workflow version already exists with different IR hash: {}@{}",
-                    registration.workflow_name, registration.workflow_version
-                )));
-            }
-            return Ok(*id);
-        }
-
-        let id = Uuid::new_v4();
-        guard.insert(key, (id, registration.clone()));
-        Ok(id)
-    }
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
-        if ids.is_empty() {
-            return Ok(Vec::new());
-        }
-        let guard = self
-            .workflow_versions
-            .lock()
-            .expect("workflow versions poisoned");
-        let mut versions = Vec::new();
-        for (id, registration) in guard.values() {
-            if ids.contains(id) {
-                versions.push(WorkflowVersion {
-                    id: *id,
-                    workflow_name: registration.workflow_name.clone(),
-                    workflow_version: registration.workflow_version.clone(),
-                    ir_hash: registration.ir_hash.clone(),
-                    program_proto: registration.program_proto.clone(),
-                    concurrent: registration.concurrent,
-                });
-            }
-        }
-        Ok(versions)
-    }
-}
-
-#[async_trait]
-impl SchedulerBackend for MemoryBackend {
-    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let existing_schedule = guard.iter().find_map(|(id, schedule)| {
-            if schedule.workflow_name == params.workflow_name
-                && schedule.schedule_name == params.schedule_name
-            {
-                Some((*id, schedule.clone()))
-            } else {
-                None
-            }
-        });
-        let schedule_id = existing_schedule
-            .as_ref()
-            .map(|(id, _)| *id)
-            .unwrap_or_else(ScheduleId::new);
-        let now = Utc::now();
-        let next_run_at = match existing_schedule
-            .as_ref()
-            .and_then(|(_, schedule)| schedule.next_run_at)
-        {
-            Some(next_run_at) => Some(next_run_at),
-            None => Some(
-                compute_next_run(
-                    params.schedule_type,
-                    params.cron_expression.as_deref(),
-                    params.interval_seconds,
-                    params.jitter_seconds,
-                    None,
-                )
-                .map_err(BackendError::Message)?,
-            ),
-        };
-        let schedule = WorkflowSchedule {
-            id: schedule_id.0,
-            workflow_name: params.workflow_name.clone(),
-            schedule_name: params.schedule_name.clone(),
-            schedule_type: params.schedule_type.as_str().to_string(),
-            cron_expression: params.cron_expression.clone(),
-            interval_seconds: params.interval_seconds,
-            jitter_seconds: params.jitter_seconds,
-            input_payload: params.input_payload.clone(),
-            status: "active".to_string(),
-            next_run_at,
-            last_run_at: existing_schedule
-                .as_ref()
-                .and_then(|(_, schedule)| schedule.last_run_at),
-            last_instance_id: existing_schedule
-                .as_ref()
-                .and_then(|(_, schedule)| schedule.last_instance_id),
-            created_at: existing_schedule
-                .as_ref()
-                .map(|(_, schedule)| schedule.created_at)
-                .unwrap_or(now),
-            updated_at: now,
-            priority: params.priority,
-            allow_duplicate: params.allow_duplicate,
-        };
-        guard.insert(schedule_id, schedule);
-        Ok(schedule_id)
-    }
-
-    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        guard
-            .get(&id)
-            .cloned()
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}")))
-    }
-
-    async fn get_schedule_by_name(
-        &self,
-        workflow_name: &str,
-        schedule_name: &str,
-    ) -> BackendResult<Option<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .find(|schedule| {
-                schedule.workflow_name == workflow_name
-                    && schedule.schedule_name == schedule_name
-                    && schedule.status != "deleted"
-            })
-            .cloned())
-    }
-
-    async fn list_schedules(
-        &self,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .cloned()
-            .collect();
-        schedules.sort_by(|a, b| {
-            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
-        });
-        let start = offset.max(0) as usize;
-        let end = start.saturating_add(limit.max(0) as usize);
-        Ok(schedules
-            .into_iter()
-            .skip(start)
-            .take(end - start)
-            .collect())
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .count() as i64)
-    }
-
-    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        if let Some(schedule) = guard.get_mut(&id) {
-            schedule.status = status.to_string();
-            schedule.updated_at = Utc::now();
-            Ok(true)
-        } else {
-            Ok(false)
-        }
-    }
-
-    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
-        SchedulerBackend::update_schedule_status(self, id, "deleted").await
-    }
-
-    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let now = Utc::now();
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| {
-                schedule.status == "active"
-                    && schedule
-                        .next_run_at
-                        .map(|next| next <= now)
-                        .unwrap_or(false)
-            })
-            .cloned()
-            .collect();
-        schedules.sort_by_key(|schedule| schedule.next_run_at);
-        Ok(schedules.into_iter().take(limit as usize).collect())
-    }
-
-    async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult<bool> {
-        Ok(false)
-    }
-
-    async fn mark_schedule_executed(
-        &self,
-        schedule_id: ScheduleId,
-        instance_id: Uuid,
-    ) -> BackendResult<()> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .get_mut(&schedule_id)
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-        schedule.last_run_at = Some(Utc::now());
-        schedule.last_instance_id = Some(instance_id);
-        schedule.next_run_at = Some(next_run_at);
-        schedule.updated_at = Utc::now();
-        Ok(())
-    }
-
-    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .get_mut(&schedule_id)
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-        schedule.next_run_at = Some(next_run_at);
-        schedule.updated_at = Utc::now();
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl GarbageCollectorBackend for MemoryBackend {
-    async fn collect_done_instances(
-        &self,
-        _older_than: DateTime<Utc>,
-        _limit: usize,
-    ) -> BackendResult<GarbageCollectionResult> {
-        Ok(GarbageCollectionResult::default())
-    }
-}
-
-#[async_trait]
-impl WebappBackend for MemoryBackend {
-    async fn count_instances(&self, _search: Option<&str>) -> BackendResult<i64> {
-        Ok(0)
-    }
-
-    async fn list_instances(
-        &self,
-        _search: Option<&str>,
-        _limit: i64,
-        _offset: i64,
-    ) -> BackendResult<Vec<InstanceSummary>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
-        Err(BackendError::Message(format!(
-            "instance not found: {instance_id}"
-        )))
-    }
-
-    async fn get_execution_graph(
-        &self,
-        _instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        Ok(None)
-    }
-
-    async fn get_workflow_graph(
-        &self,
-        _instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        Ok(None)
-    }
-
-    async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec![
-            InstanceStatus::Queued.to_string(),
-            InstanceStatus::Running.to_string(),
-            InstanceStatus::Completed.to_string(),
-            InstanceStatus::Failed.to_string(),
-        ])
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .count() as i64)
-    }
-
-    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .cloned()
-            .collect();
-        schedules.sort_by(|a, b| {
-            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
-        });
-
-        let start = offset.max(0) as usize;
-        let page_limit = limit.max(0) as usize;
-        Ok(schedules
-            .into_iter()
-            .skip(start)
-            .take(page_limit)
-            .map(|schedule| ScheduleSummary {
-                id: schedule.id.to_string(),
-                workflow_name: schedule.workflow_name,
-                schedule_name: schedule.schedule_name,
-                schedule_type: schedule.schedule_type,
-                cron_expression: schedule.cron_expression,
-                interval_seconds: schedule.interval_seconds,
-                status: schedule.status,
-                next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
-                last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
-                created_at: schedule.created_at.to_rfc3339(),
-            })
-            .collect())
-    }
-
-    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .values()
-            .find(|schedule| schedule.id == schedule_id)
-            .cloned()
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-
-        let input_payload = schedule.input_payload.as_ref().and_then(|bytes| {
-            rmp_serde::from_slice::<serde_json::Value>(bytes)
-                .ok()
-                .and_then(|value| serde_json::to_string_pretty(&value).ok())
-        });
-
-        Ok(ScheduleDetail {
-            id: schedule.id.to_string(),
-            workflow_name: schedule.workflow_name,
-            schedule_name: schedule.schedule_name,
-            schedule_type: schedule.schedule_type,
-            cron_expression: schedule.cron_expression,
-            interval_seconds: schedule.interval_seconds,
-            jitter_seconds: schedule.jitter_seconds,
-            status: schedule.status,
-            next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
-            last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
-            last_instance_id: schedule.last_instance_id.map(|id| id.to_string()),
-            created_at: schedule.created_at.to_rfc3339(),
-            updated_at: schedule.updated_at.to_rfc3339(),
-            priority: schedule.priority,
-            allow_duplicate: schedule.allow_duplicate,
-            input_payload,
-        })
-    }
-
-    async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult<i64> {
-        Ok(0)
-    }
-
-    async fn list_schedule_invocations(
-        &self,
-        _schedule_id: Uuid,
-        _limit: i64,
-        _offset: i64,
-    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
-        Ok(Vec::new())
-    }
-
-    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let Some(schedule) = guard
-            .values_mut()
-            .find(|schedule| schedule.id == schedule_id)
-        else {
-            return Ok(false);
-        };
-        schedule.status = status.to_string();
-        schedule.updated_at = Utc::now();
-        Ok(true)
-    }
-
-    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["active".to_string(), "paused".to_string()])
-    }
-
-    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["cron".to_string(), "interval".to_string()])
-    }
-
-    async fn get_worker_action_stats(
-        &self,
-        _window_minutes: i64,
-    ) -> BackendResult<Vec<WorkerActionRow>> {
-        let statuses = latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        );
-
-        Ok(statuses
-            .into_iter()
-            .map(|status| WorkerActionRow {
-                pool_id: status.pool_id.to_string(),
-                active_workers: status.active_workers as i64,
-                actions_per_sec: format!("{:.1}", status.actions_per_sec),
-                throughput_per_min: status.throughput_per_min as i64,
-                total_completed: status.total_completed,
-                median_dequeue_ms: status.median_dequeue_ms,
-                median_handling_ms: status.median_handling_ms,
-                last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()),
-                updated_at: status.updated_at.to_rfc3339(),
-            })
-            .collect())
-    }
-
-    async fn get_worker_aggregate_stats(
-        &self,
-        _window_minutes: i64,
-    ) -> BackendResult<WorkerAggregateStats> {
-        let statuses = latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        );
-
-        let active_worker_count = statuses
-            .iter()
-            .map(|status| status.active_workers as i64)
-            .sum();
-        let total_in_flight = statuses
-            .iter()
-            .filter_map(|status| status.total_in_flight)
-            .sum();
-        let total_queue_depth = statuses
-            .iter()
-            .filter_map(|status| status.dispatch_queue_size)
-            .sum();
-        let actions_per_sec = statuses
-            .iter()
-            .map(|status| status.actions_per_sec)
-            .sum::<f64>();
-
-        Ok(WorkerAggregateStats {
-            active_worker_count,
-            actions_per_sec: format!("{:.1}", actions_per_sec),
-            total_in_flight,
-            total_queue_depth,
-        })
-    }
-
-    async fn worker_status_table_exists(&self) -> bool {
-        !self
-            .worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned")
-            .is_empty()
-    }
-
-    async fn schedules_table_exists(&self) -> bool {
-        !self
-            .schedules
-            .lock()
-            .expect("schedules poisoned")
-            .is_empty()
-    }
-
-    async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
-        Ok(latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        ))
-    }
-}
-
-fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec<WorkerStatus> {
-    let mut by_pool: HashMap<Uuid, WorkerStatusUpdate> = HashMap::new();
-    for update in updates {
-        by_pool.insert(update.pool_id, update.clone());
-    }
-
-    let now = Utc::now();
-    let mut statuses: Vec<_> = by_pool
-        .into_values()
-        .map(|status| WorkerStatus {
-            pool_id: status.pool_id,
-            active_workers: status.active_workers,
-            throughput_per_min: status.throughput_per_min,
-            actions_per_sec: status.actions_per_sec,
-            total_completed: status.total_completed,
-            last_action_at: status.last_action_at,
-            updated_at: now,
-            median_dequeue_ms: status.median_dequeue_ms,
-            median_handling_ms: status.median_handling_ms,
-            dispatch_queue_size: Some(status.dispatch_queue_size),
-            total_in_flight: Some(status.total_in_flight),
-            median_instance_duration_secs: status.median_instance_duration_secs,
-            active_instance_count: status.active_instance_count,
-            total_instances_completed: status.total_instances_completed,
-            instances_per_sec: status.instances_per_sec,
-            instances_per_min: status.instances_per_min,
-            time_series: status.time_series,
-        })
-        .collect();
-
-    statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec));
-    statuses
-}
diff --git a/crates/waymark/src/backends/mod.rs b/crates/waymark/src/backends/mod.rs
deleted file mode 100644
index 7fbd84ad..00000000
--- a/crates/waymark/src/backends/mod.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-//! Backend implementations for runner persistence.
-
-mod base;
-mod memory;
-mod postgres;
-
-pub use base::{
-    ActionAttemptStatus, ActionDone, BackendError, BackendResult, CoreBackend,
-    GarbageCollectionResult, GarbageCollectorBackend, GraphUpdate, InstanceDone,
-    InstanceLockStatus, LockClaim, QueuedInstance, QueuedInstanceBatch, SchedulerBackend,
-    WebappBackend, WorkerStatusBackend, WorkerStatusUpdate, WorkflowRegistration,
-    WorkflowRegistryBackend, WorkflowVersion,
-};
-pub use memory::MemoryBackend;
-pub use postgres::PostgresBackend;
diff --git a/crates/waymark/src/backends/postgres/core.rs b/crates/waymark/src/backends/postgres/core.rs
deleted file mode 100644
index c827b4c0..00000000
--- a/crates/waymark/src/backends/postgres/core.rs
+++ /dev/null
@@ -1,1992 +0,0 @@
-use std::collections::HashMap;
-use std::future::Future;
-use std::time::Duration as StdDuration;
-
-use chrono::{DateTime, Utc};
-use sqlx::{Postgres, QueryBuilder, Row};
-use tonic::async_trait;
-use tracing::warn;
-use uuid::Uuid;
-
-use super::PostgresBackend;
-use crate::backends::base::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult,
-    GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
-    QueuedInstance, QueuedInstanceBatch, WorkerStatusBackend, WorkerStatusUpdate,
-};
-use crate::observability::obs;
-use crate::waymark_core::runner::state::RunnerState;
-
-const INSTANCE_STATUS_QUEUED: &str = "queued";
-const INSTANCE_STATUS_RUNNING: &str = "running";
-const INSTANCE_STATUS_COMPLETED: &str = "completed";
-const INSTANCE_STATUS_FAILED: &str = "failed";
-const TRANSIENT_DEADLOCK_SQLSTATE: &str = "40P01";
-const TRANSIENT_SERIALIZATION_SQLSTATE: &str = "40001";
-const TRANSIENT_RETRY_MAX_ATTEMPTS: usize = 3;
-const TRANSIENT_RETRY_INITIAL_BACKOFF_MS: u64 = 25;
-const TRANSIENT_RETRY_MAX_BACKOFF_MS: u64 = 250;
-
-fn instance_result_is_error_wrapper(result: &serde_json::Value) -> bool {
-    let serde_json::Value::Object(map) = result else {
-        return false;
-    };
-    map.len() == 1
-        && (map.contains_key("error")
-            || map.contains_key("__exception__")
-            || map.contains_key("exception"))
-}
-
-fn instance_done_status(instance: &InstanceDone) -> &'static str {
-    if instance.error.is_some()
-        || instance
-            .result
-            .as_ref()
-            .is_some_and(instance_result_is_error_wrapper)
-    {
-        INSTANCE_STATUS_FAILED
-    } else {
-        INSTANCE_STATUS_COMPLETED
-    }
-}
-
-fn is_transient_sqlstate(code: &str) -> bool {
-    matches!(
-        code,
-        TRANSIENT_DEADLOCK_SQLSTATE | TRANSIENT_SERIALIZATION_SQLSTATE
-    )
-}
-
-fn is_transient_backend_error(err: &BackendError) -> bool {
-    match err {
-        BackendError::Sqlx(sqlx::Error::Database(db_err)) => {
-            db_err.code().as_deref().is_some_and(is_transient_sqlstate)
-        }
-        // Fallback for cases where sqlstate is not preserved in wrapping.
-        BackendError::Message(message) => {
-            message.contains("deadlock detected")
-                || message.contains("could not serialize access due to")
-        }
-        _ => false,
-    }
-}
-
-async fn retry_transient_backend<T, Op, Fut>(
-    operation: &'static str,
-    mut op: Op,
-) -> BackendResult<T>
-where
-    Op: FnMut() -> Fut,
-    Fut: Future<Output = BackendResult<T>>,
-{
-    let mut attempt = 0usize;
-    let mut backoff_ms = TRANSIENT_RETRY_INITIAL_BACKOFF_MS;
-    loop {
-        match op().await {
-            Ok(value) => return Ok(value),
-            Err(err)
-                if attempt < TRANSIENT_RETRY_MAX_ATTEMPTS && is_transient_backend_error(&err) =>
-            {
-                attempt += 1;
-                warn!(
-                    operation,
-                    attempt,
-                    error = %err,
-                    "transient database error; retrying"
-                );
-                tokio::time::sleep(StdDuration::from_millis(backoff_ms)).await;
-                backoff_ms =
-                    std::cmp::min(backoff_ms.saturating_mul(2), TRANSIENT_RETRY_MAX_BACKOFF_MS);
-            }
-            Err(err) => return Err(err),
-        }
-    }
-}
-
-impl PostgresBackend {
-    /// Insert queued instances for run-loop consumption.
-    #[obs]
-    pub async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
-        if instances.is_empty() {
-            return Ok(());
-        }
-        let workflow_version_ids: Vec<Uuid> = instances
-            .iter()
-            .map(|instance| instance.workflow_version_id)
-            .collect();
-        let workflow_rows =
-            sqlx::query("SELECT id, workflow_name FROM workflow_versions WHERE id = ANY($1)")
-                .bind(&workflow_version_ids)
-                .fetch_all(&self.pool)
-                .await?;
-        let mut workflow_names_by_version_id: HashMap<Uuid, String> =
-            HashMap::with_capacity(workflow_rows.len());
-        for row in workflow_rows {
-            workflow_names_by_version_id.insert(row.get("id"), row.get("workflow_name"));
-        }
-
-        let mut queued_payloads = Vec::new();
-        let mut runner_payloads = Vec::new();
-        for instance in instances {
-            let state = instance.state.as_ref().ok_or_else(|| {
-                BackendError::Message("queued instance missing runner state".to_string())
-            })?;
-            let scheduled_at = instance.scheduled_at.unwrap_or_else(Utc::now);
-            let workflow_name = workflow_names_by_version_id
-                .get(&instance.workflow_version_id)
-                .cloned();
-            let mut payload_instance = instance.clone();
-            payload_instance.scheduled_at = Some(scheduled_at);
-            queued_payloads.push((
-                payload_instance.instance_id,
-                scheduled_at,
-                workflow_name.clone(),
-                INSTANCE_STATUS_QUEUED,
-                Self::serialize(&payload_instance)?,
-            ));
-            let graph = GraphUpdate::from_state(instance.instance_id, state);
-            runner_payloads.push((
-                instance.instance_id,
-                instance.entry_node,
-                instance.workflow_version_id,
-                instance.schedule_id,
-                workflow_name,
-                INSTANCE_STATUS_QUEUED,
-                Self::serialize(&graph)?,
-            ));
-        }
-
-        let mut queued_builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            "INSERT INTO queued_instances (instance_id, scheduled_at, workflow_name, current_status, payload) ",
-        );
-        queued_builder.push_values(
-            queued_payloads.iter(),
-            |mut builder, (id, scheduled_at, workflow_name, current_status, payload)| {
-                builder
-                    .push_bind(*id)
-                    .push_bind(*scheduled_at)
-                    .push_bind(workflow_name.as_deref())
-                    .push_bind(*current_status)
-                    .push_bind(payload.as_slice());
-            },
-        );
-
-        let mut runner_builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, schedule_id, workflow_name, current_status, state) ",
-        );
-        runner_builder.push_values(
-            runner_payloads.iter(),
-            |mut builder,
-             (
-                id,
-                entry,
-                workflow_version_id,
-                schedule_id,
-                workflow_name,
-                current_status,
-                payload,
-            )| {
-                builder
-                    .push_bind(*id)
-                    .push_bind(*entry)
-                    .push_bind(*workflow_version_id)
-                    .push_bind(*schedule_id)
-                    .push_bind(workflow_name.as_deref())
-                    .push_bind(*current_status)
-                    .push_bind(payload.as_slice());
-            },
-        );
-
-        let mut tx = self.pool.begin().await?;
-        Self::count_query(&self.query_counts, "insert:queued_instances");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "insert:queued_instances",
-            instances.len(),
-        );
-        queued_builder.build().execute(&mut *tx).await?;
-        Self::count_query(&self.query_counts, "insert:runner_instances");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "insert:runner_instances",
-            instances.len(),
-        );
-        runner_builder.build().execute(&mut *tx).await?;
-        tx.commit().await?;
-        Ok(())
-    }
-
-    /// Upsert worker status for monitoring and activity graphs.
-    #[obs]
-    pub async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
-        Self::count_query(&self.query_counts, "upsert:worker_status");
-        sqlx::query(
-            r#"
-            INSERT INTO worker_status (
-                pool_id,
-                worker_id,
-                throughput_per_min,
-                total_completed,
-                last_action_at,
-                updated_at,
-                median_dequeue_ms,
-                median_handling_ms,
-                dispatch_queue_size,
-                total_in_flight,
-                active_workers,
-                actions_per_sec,
-                median_instance_duration_secs,
-                active_instance_count,
-                total_instances_completed,
-                instances_per_sec,
-                instances_per_min,
-                time_series
-            )
-            VALUES ($1, 0, $2, $3, $4, NOW(), $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
-            ON CONFLICT (pool_id, worker_id)
-            DO UPDATE SET
-                throughput_per_min = EXCLUDED.throughput_per_min,
-                total_completed = EXCLUDED.total_completed,
-                last_action_at = EXCLUDED.last_action_at,
-                updated_at = EXCLUDED.updated_at,
-                median_dequeue_ms = EXCLUDED.median_dequeue_ms,
-                median_handling_ms = EXCLUDED.median_handling_ms,
-                dispatch_queue_size = EXCLUDED.dispatch_queue_size,
-                total_in_flight = EXCLUDED.total_in_flight,
-                active_workers = EXCLUDED.active_workers,
-                actions_per_sec = EXCLUDED.actions_per_sec,
-                median_instance_duration_secs = EXCLUDED.median_instance_duration_secs,
-                active_instance_count = EXCLUDED.active_instance_count,
-                total_instances_completed = EXCLUDED.total_instances_completed,
-                instances_per_sec = EXCLUDED.instances_per_sec,
-                instances_per_min = EXCLUDED.instances_per_min,
-                time_series = EXCLUDED.time_series
-            "#,
-        )
-        .bind(status.pool_id)
-        .bind(status.throughput_per_min)
-        .bind(status.total_completed)
-        .bind(status.last_action_at)
-        .bind(status.median_dequeue_ms)
-        .bind(status.median_handling_ms)
-        .bind(status.dispatch_queue_size)
-        .bind(status.total_in_flight)
-        .bind(status.active_workers)
-        .bind(status.actions_per_sec)
-        .bind(status.median_instance_duration_secs)
-        .bind(status.active_instance_count)
-        .bind(status.total_instances_completed)
-        .bind(status.instances_per_sec)
-        .bind(status.instances_per_min)
-        .bind(&status.time_series)
-        .execute(&self.pool)
-        .await?;
-
-        Ok(())
-    }
-
-    /// Clear expired queue locks so they can be claimed again by the runloop.
-    ///
-    /// This uses the same `FOR UPDATE SKIP LOCKED` claim pattern as dequeue to
-    /// avoid blocking under concurrent sweepers.
-    #[obs]
-    pub async fn reclaim_expired_instance_locks(&self, size: usize) -> BackendResult<usize> {
-        if size == 0 {
-            return Ok(0);
-        }
-
-        let now = Utc::now();
-        let mut tx = self.pool.begin().await?;
-        Self::count_query(&self.query_counts, "update:queued_instances_expired_unlock");
-        let rows = sqlx::query(
-            r#"
-            WITH expired AS (
-                SELECT instance_id
-                FROM queued_instances
-                WHERE lock_uuid IS NOT NULL
-                  AND lock_expires_at <= $1
-                ORDER BY lock_expires_at, scheduled_at, created_at
-                LIMIT $2
-                FOR UPDATE SKIP LOCKED
-            )
-            UPDATE queued_instances AS qi
-            SET lock_uuid = NULL,
-                lock_expires_at = NULL
-            FROM expired
-            WHERE qi.instance_id = expired.instance_id
-            RETURNING qi.instance_id
-            "#,
-        )
-        .bind(now)
-        .bind(size as i64)
-        .fetch_all(&mut *tx)
-        .await?;
-
-        if !rows.is_empty() {
-            let instance_ids: Vec<Uuid> = rows.iter().map(|row| row.get("instance_id")).collect();
-            sqlx::query(
-                "UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1) AND result IS NULL AND error IS NULL",
-            )
-            .bind(&instance_ids)
-            .bind(INSTANCE_STATUS_QUEUED)
-            .execute(&mut *tx)
-            .await?;
-        }
-
-        tx.commit().await?;
-
-        if !rows.is_empty() {
-            Self::count_batch_size(
-                &self.batch_size_counts,
-                "update:queued_instances_expired_unlock",
-                rows.len(),
-            );
-        }
-
-        Ok(rows.len())
-    }
-
-    /// Delete old finished instances and their action attempt rows.
-    #[obs]
-    pub async fn collect_done_instances_impl(
-        &self,
-        older_than: DateTime<Utc>,
-        limit: usize,
-    ) -> BackendResult<GarbageCollectionResult> {
-        if limit == 0 {
-            return Ok(GarbageCollectionResult::default());
-        }
-
-        let mut tx = self.pool.begin().await?;
-        Self::count_query(&self.query_counts, "select:runner_instances_gc_candidates");
-        let candidate_rows = sqlx::query(
-            r#"
-            SELECT instance_id, state
-            FROM runner_instances
-            WHERE created_at < $1
-              AND (result IS NOT NULL OR error IS NOT NULL)
-            ORDER BY created_at, instance_id
-            LIMIT $2
-            FOR UPDATE SKIP LOCKED
-            "#,
-        )
-        .bind(older_than)
-        .bind(limit as i64)
-        .fetch_all(&mut *tx)
-        .await?;
-
-        if candidate_rows.is_empty() {
-            tx.commit().await?;
-            return Ok(GarbageCollectionResult::default());
-        }
-
-        let mut instance_ids = Vec::with_capacity(candidate_rows.len());
-        let mut action_execution_ids = Vec::new();
-        for row in candidate_rows {
-            let instance_id: Uuid = row.get("instance_id");
-            let state_payload: Option<Vec<u8>> = row.get("state");
-            instance_ids.push(instance_id);
-
-            let Some(state_payload) = state_payload else {
-                continue;
-            };
-            match Self::deserialize::<GraphUpdate>(&state_payload) {
-                Ok(graph) => {
-                    for (execution_id, node) in graph.nodes {
-                        if node.is_action_call() {
-                            action_execution_ids.push(execution_id);
-                        }
-                    }
-                }
-                Err(err) => {
-                    warn!(
-                        %instance_id,
-                        error = %err,
-                        "failed to decode runner state while collecting garbage"
-                    );
-                }
-            }
-        }
-
-        action_execution_ids.sort_unstable();
-        action_execution_ids.dedup();
-        let deleted_actions = if action_execution_ids.is_empty() {
-            0
-        } else {
-            Self::count_query(&self.query_counts, "delete:runner_actions_done_gc");
-            let result =
-                sqlx::query("DELETE FROM runner_actions_done WHERE execution_id = ANY($1)")
-                    .bind(&action_execution_ids)
-                    .execute(&mut *tx)
-                    .await?;
-            let rows = result.rows_affected() as usize;
-            Self::count_batch_size(
-                &self.batch_size_counts,
-                "delete:runner_actions_done_gc",
-                rows,
-            );
-            rows
-        };
-
-        Self::count_query(&self.query_counts, "delete:queued_instances_gc");
-        let _ = sqlx::query("DELETE FROM queued_instances WHERE instance_id = ANY($1)")
-            .bind(&instance_ids)
-            .execute(&mut *tx)
-            .await?;
-
-        Self::count_query(&self.query_counts, "delete:runner_instances_gc");
-        let deleted_instances_result =
-            sqlx::query("DELETE FROM runner_instances WHERE instance_id = ANY($1)")
-                .bind(&instance_ids)
-                .execute(&mut *tx)
-                .await?;
-        let deleted_instances = deleted_instances_result.rows_affected() as usize;
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "delete:runner_instances_gc",
-            deleted_instances,
-        );
-        tx.commit().await?;
-
-        Ok(GarbageCollectionResult {
-            deleted_instances,
-            deleted_actions,
-        })
-    }
-
-    #[obs]
-    async fn save_graphs_impl(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        retry_transient_backend("save_graphs_impl", || {
-            let claim = claim.clone();
-            async move { self.save_graphs_once(claim, graphs).await }
-        })
-        .await
-    }
-
-    async fn save_graphs_once(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        if graphs.is_empty() {
-            return Ok(Vec::new());
-        }
-        let mut payloads = Vec::with_capacity(graphs.len());
-        for graph in graphs {
-            payloads.push((
-                graph.instance_id,
-                graph.next_scheduled_at(),
-                claim.lock_expires_at,
-                Self::serialize(graph)?,
-            ));
-        }
-
-        Self::count_query(&self.query_counts, "update:queued_instances_scheduled_at");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "update:queued_instances_scheduled_at",
-            payloads.len(),
-        );
-        let now = Utc::now();
-        let mut schedule_builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            "UPDATE queued_instances AS qi SET scheduled_at = v.scheduled_at, lock_expires_at = CASE WHEN qi.lock_expires_at IS NULL OR qi.lock_expires_at < v.lock_expires_at THEN v.lock_expires_at ELSE qi.lock_expires_at END FROM (",
-        );
-        schedule_builder.push_values(
-            payloads.iter(),
-            |mut b, (instance_id, scheduled_at, lock_expires_at, _payload)| {
-                b.push_bind(*instance_id)
-                    .push_bind(*scheduled_at)
-                    .push_bind(*lock_expires_at);
-            },
-        );
-        schedule_builder.push(
-            ") AS v(instance_id, scheduled_at, lock_expires_at)
-             WHERE qi.instance_id = v.instance_id
-               AND qi.lock_uuid = ",
-        );
-        schedule_builder.push_bind(claim.lock_uuid);
-        schedule_builder.push(" AND (qi.lock_expires_at IS NULL OR qi.lock_expires_at > ");
-        schedule_builder.push_bind(now);
-        schedule_builder.push(")");
-        schedule_builder.build().execute(&self.pool).await?;
-
-        Self::count_query(&self.query_counts, "update:runner_instances_state");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "update:runner_instances_state",
-            payloads.len(),
-        );
-        let mut runner_builder: QueryBuilder<Postgres> =
-            QueryBuilder::new("UPDATE runner_instances AS ri SET state = v.state FROM (");
-        runner_builder.push_values(
-            payloads.iter(),
-            |mut b, (instance_id, _scheduled_at, _lock_expires_at, payload)| {
-                b.push_bind(*instance_id).push_bind(payload.as_slice());
-            },
-        );
-        runner_builder.push(
-            ") AS v(instance_id, state)
-             JOIN queued_instances qi ON qi.instance_id = v.instance_id
-             WHERE ri.instance_id = v.instance_id
-               AND qi.lock_uuid = ",
-        );
-        runner_builder.push_bind(claim.lock_uuid);
-        runner_builder.push(" AND (qi.lock_expires_at IS NULL OR qi.lock_expires_at > ");
-        runner_builder.push_bind(now);
-        runner_builder.push(")");
-        runner_builder.build().execute(&self.pool).await?;
-
-        let ids: Vec<Uuid> = graphs.iter().map(|graph| graph.instance_id).collect();
-        let lock_rows = sqlx::query(
-            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = ANY($1)",
-        )
-        .bind(&ids)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut lock_map: HashMap<Uuid, InstanceLockStatus> = HashMap::new();
-        for row in lock_rows {
-            let instance_id: Uuid = row.get(0);
-            lock_map.insert(
-                instance_id,
-                InstanceLockStatus {
-                    instance_id,
-                    lock_uuid: row.get(1),
-                    lock_expires_at: row.get(2),
-                },
-            );
-        }
-
-        let mut locks = Vec::with_capacity(ids.len());
-        for instance_id in ids {
-            locks.push(
-                lock_map
-                    .get(&instance_id)
-                    .cloned()
-                    .unwrap_or(InstanceLockStatus {
-                        instance_id,
-                        lock_uuid: None,
-                        lock_expires_at: None,
-                    }),
-            );
-        }
-        Ok(locks)
-    }
-
-    #[obs]
-    async fn save_actions_done_impl(&self, actions: &[ActionDone]) -> BackendResult<()> {
-        if actions.is_empty() {
-            return Ok(());
-        }
-        Self::count_query(&self.query_counts, "insert:runner_actions_done");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "insert:runner_actions_done",
-            actions.len(),
-        );
-        let mut payloads = Vec::new();
-        for action in actions {
-            payloads.push((
-                action.execution_id,
-                action.attempt,
-                action.status.to_string(),
-                action.started_at,
-                action.completed_at,
-                action.duration_ms,
-                Self::serialize(&action.result)?,
-            ));
-        }
-        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            "INSERT INTO runner_actions_done (execution_id, attempt, status, started_at, completed_at, duration_ms, result) ",
-        );
-        builder.push_values(
-            payloads.iter(),
-            |mut b, (execution_id, attempt, status, started_at, completed_at, duration_ms, payload)| {
-                b.push_bind(*execution_id)
-                    .push_bind(*attempt)
-                    .push_bind(status.as_str())
-                    .push_bind(*started_at)
-                    .push_bind(*completed_at)
-                    .push_bind(*duration_ms)
-                    .push_bind(payload.as_slice());
-            },
-        );
-        builder.build().execute(&self.pool).await?;
-        Ok(())
-    }
-
-    #[obs]
-    async fn get_queued_instances_impl(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        retry_transient_backend("get_queued_instances_impl", || {
-            let claim = claim.clone();
-            async move { self.get_queued_instances_once(size, claim).await }
-        })
-        .await
-    }
-
-    async fn get_queued_instances_once(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        if size == 0 {
-            return Ok(QueuedInstanceBatch {
-                instances: Vec::new(),
-            });
-        }
-        let now = Utc::now();
-        let mut tx = self.pool.begin().await?;
-        Self::count_query(&self.query_counts, "select:queued_instances");
-        let rows = sqlx::query(
-            r#"
-            WITH claimed AS (
-                SELECT instance_id, payload
-                FROM queued_instances
-                WHERE scheduled_at <= $1
-                  AND (lock_uuid IS NULL OR lock_expires_at <= $1)
-                ORDER BY scheduled_at, created_at
-                LIMIT $2
-                FOR UPDATE SKIP LOCKED
-            ),
-            updated AS (
-                UPDATE queued_instances AS qi
-                SET lock_uuid = $3,
-                    lock_expires_at = $4
-                FROM claimed
-                WHERE qi.instance_id = claimed.instance_id
-                RETURNING qi.instance_id, claimed.payload
-            )
-            SELECT updated.instance_id, updated.payload, ri.state
-            FROM updated
-            JOIN runner_instances ri ON ri.instance_id = updated.instance_id
-            "#,
-        )
-        .bind(now)
-        .bind(size as i64)
-        .bind(claim.lock_uuid)
-        .bind(claim.lock_expires_at)
-        .fetch_all(&mut *tx)
-        .await?;
-
-        if rows.is_empty() {
-            tx.commit().await?;
-            return Ok(QueuedInstanceBatch {
-                instances: Vec::new(),
-            });
-        }
-
-        let claimed_instance_ids: Vec<Uuid> =
-            rows.iter().map(|row| row.get("instance_id")).collect();
-        sqlx::query("UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1)")
-            .bind(&claimed_instance_ids)
-            .bind(INSTANCE_STATUS_RUNNING)
-            .execute(&mut *tx)
-            .await?;
-
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "select:queued_instances",
-            rows.len(),
-        );
-        tx.commit().await?;
-
-        let mut instances = Vec::new();
-        let mut action_node_ids_by_instance: HashMap<Uuid, Vec<Uuid>> = HashMap::new();
-        let mut all_action_node_ids: Vec<Uuid> = Vec::new();
-        for row in rows {
-            let instance_id: Uuid = row.get(0);
-            let payload: Vec<u8> = row.get(1);
-            let state_payload: Option<Vec<u8>> = row.get(2);
-            let mut instance: QueuedInstance = Self::deserialize(&payload)?;
-            instance.instance_id = instance_id;
-            if let Some(state_payload) = state_payload {
-                let graph: GraphUpdate = Self::deserialize(&state_payload)?;
-                let action_node_ids: Vec<Uuid> = graph
-                    .nodes
-                    .iter()
-                    .filter_map(|(node_id, node)| node.is_action_call().then_some(*node_id))
-                    .collect();
-                if !action_node_ids.is_empty() {
-                    all_action_node_ids.extend(action_node_ids.iter().copied());
-                    action_node_ids_by_instance.insert(instance_id, action_node_ids);
-                }
-                instance.state = Some(RunnerState::new(
-                    None,
-                    Some(graph.nodes),
-                    Some(graph.edges),
-                    false,
-                ));
-            }
-            instances.push(instance);
-        }
-
-        if !all_action_node_ids.is_empty() {
-            all_action_node_ids.sort_unstable();
-            all_action_node_ids.dedup();
-
-            Self::count_query(
-                &self.query_counts,
-                "select:runner_actions_done_by_execution_id",
-            );
-            let rows = sqlx::query(
-                r#"
-                SELECT DISTINCT ON (execution_id)
-                    execution_id,
-                    result
-                FROM runner_actions_done
-                WHERE execution_id = ANY($1)
-                ORDER BY execution_id, attempt DESC, id DESC
-                "#,
-            )
-            .bind(&all_action_node_ids)
-            .fetch_all(&self.pool)
-            .await?;
-
-            let mut action_results_by_execution_id: HashMap<Uuid, serde_json::Value> =
-                HashMap::new();
-            for row in rows {
-                let execution_id: Uuid = row.get("execution_id");
-                let result_payload: Option<Vec<u8>> = row.get("result");
-                let Some(result_payload) = result_payload else {
-                    continue;
-                };
-                let result: serde_json::Value = Self::deserialize(&result_payload)?;
-                action_results_by_execution_id.insert(execution_id, result);
-            }
-
-            for instance in &mut instances {
-                let Some(action_node_ids) = action_node_ids_by_instance.get(&instance.instance_id)
-                else {
-                    continue;
-                };
-                for node_id in action_node_ids {
-                    if let Some(result) = action_results_by_execution_id.get(node_id) {
-                        instance.action_results.insert(*node_id, result.clone());
-                    }
-                }
-            }
-        }
-
-        Ok(QueuedInstanceBatch { instances })
-    }
-
-    #[obs]
-    async fn save_instances_done_impl(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        retry_transient_backend("save_instances_done_impl", || async move {
-            self.save_instances_done_once(instances).await
-        })
-        .await
-    }
-
-    async fn save_instances_done_once(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        if instances.is_empty() {
-            return Ok(());
-        }
-        let ids: Vec<Uuid> = instances
-            .iter()
-            .map(|instance| instance.executor_id)
-            .collect();
-
-        let mut tx = self.pool.begin().await?;
-        Self::count_query(&self.query_counts, "delete:queued_instances_by_id");
-        sqlx::query("DELETE FROM queued_instances WHERE instance_id = ANY($1)")
-            .bind(&ids)
-            .execute(&mut *tx)
-            .await?;
-
-        Self::count_query(&self.query_counts, "update:runner_instances_result");
-        Self::count_batch_size(
-            &self.batch_size_counts,
-            "update:runner_instances_result",
-            instances.len(),
-        );
-        let mut payloads = Vec::with_capacity(instances.len());
-        for instance in instances {
-            let current_status = instance_done_status(instance);
-            let result = match &instance.result {
-                Some(value) => Some(Self::serialize(value)?),
-                None => None,
-            };
-            let error = match &instance.error {
-                Some(value) => Some(Self::serialize(value)?),
-                None => None,
-            };
-            payloads.push((instance.executor_id, current_status, result, error));
-        }
-        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            "UPDATE runner_instances AS ri SET result = v.result, error = v.error, current_status = v.current_status FROM (",
-        );
-        builder.push_values(
-            payloads.iter(),
-            |mut b, (instance_id, current_status, result, error)| {
-                b.push_bind(*instance_id)
-                    .push_bind(*current_status)
-                    .push_bind(result.as_deref())
-                    .push_bind(error.as_deref());
-            },
-        );
-        builder.push(
-            ") AS v(instance_id, current_status, result, error) WHERE ri.instance_id = v.instance_id",
-        );
-        builder.build().execute(&mut *tx).await?;
-        tx.commit().await?;
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl CoreBackend for PostgresBackend {
-    fn clone_box(&self) -> Box<dyn CoreBackend> {
-        Box::new(self.clone())
-    }
-
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        self.save_graphs_impl(claim, graphs).await
-    }
-
-    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
-        self.save_actions_done_impl(actions).await
-    }
-
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        self.get_queued_instances_impl(size, claim).await
-    }
-
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        self.save_instances_done_impl(instances).await
-    }
-
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        retry_transient_backend("refresh_instance_locks", || {
-            let claim = claim.clone();
-            async move { self.refresh_instance_locks_once(claim, instance_ids).await }
-        })
-        .await
-    }
-
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()> {
-        if instance_ids.is_empty() {
-            return Ok(());
-        }
-        Self::count_query(&self.query_counts, "update:queued_instances_release");
-        let released_rows = sqlx::query(
-            r#"
-            WITH releasable AS (
-                SELECT instance_id
-                FROM queued_instances
-                WHERE instance_id = ANY($1)
-                  AND lock_uuid = $2
-                FOR UPDATE SKIP LOCKED
-            ),
-            released AS (
-                UPDATE queued_instances AS qi
-                SET lock_uuid = NULL,
-                    lock_expires_at = NULL
-                FROM releasable
-                WHERE qi.instance_id = releasable.instance_id
-                RETURNING qi.instance_id
-            )
-            SELECT instance_id FROM released
-            "#,
-        )
-        .bind(instance_ids)
-        .bind(lock_uuid)
-        .fetch_all(&self.pool)
-        .await?;
-
-        if !released_rows.is_empty() {
-            let released_instance_ids: Vec<Uuid> = released_rows
-                .iter()
-                .map(|row| row.get("instance_id"))
-                .collect();
-            sqlx::query(
-                "UPDATE runner_instances SET current_status = $2 WHERE instance_id = ANY($1) AND result IS NULL AND error IS NULL",
-            )
-            .bind(&released_instance_ids)
-            .bind(INSTANCE_STATUS_QUEUED)
-            .execute(&self.pool)
-            .await?;
-        }
-
-        Ok(())
-    }
-
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
-        PostgresBackend::queue_instances(self, instances).await
-    }
-}
-
-impl PostgresBackend {
-    async fn refresh_instance_locks_once(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        if instance_ids.is_empty() {
-            return Ok(Vec::new());
-        }
-        Self::count_query(&self.query_counts, "update:queued_instances_lock");
-        sqlx::query(
-            r#"
-            WITH claimable AS (
-                SELECT instance_id
-                FROM queued_instances
-                WHERE instance_id = ANY($2)
-                  AND lock_uuid = $3
-                FOR UPDATE SKIP LOCKED
-            )
-            UPDATE queued_instances AS qi
-            SET lock_expires_at = $1
-            FROM claimable
-            WHERE qi.instance_id = claimable.instance_id
-            "#,
-        )
-        .bind(claim.lock_expires_at)
-        .bind(instance_ids)
-        .bind(claim.lock_uuid)
-        .execute(&self.pool)
-        .await?;
-        let rows = sqlx::query(
-            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = ANY($1)",
-        )
-        .bind(instance_ids)
-        .fetch_all(&self.pool)
-        .await?;
-        let mut locks = Vec::with_capacity(rows.len());
-        for row in rows {
-            locks.push(InstanceLockStatus {
-                instance_id: row.get(0),
-                lock_uuid: row.get(1),
-                lock_expires_at: row.get(2),
-            });
-        }
-        Ok(locks)
-    }
-}
-
-#[async_trait]
-impl GarbageCollectorBackend for PostgresBackend {
-    async fn collect_done_instances(
-        &self,
-        older_than: DateTime<Utc>,
-        limit: usize,
-    ) -> BackendResult<GarbageCollectionResult> {
-        self.collect_done_instances_impl(older_than, limit).await
-    }
-}
-
-#[async_trait]
-impl WorkerStatusBackend for PostgresBackend {
-    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
-        PostgresBackend::upsert_worker_status(self, status).await
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::{HashMap, HashSet};
-    use std::sync::Arc;
-    use std::sync::atomic::{AtomicUsize, Ordering};
-    use std::time::Duration as StdDuration;
-
-    use chrono::{DateTime, Duration, Utc};
-    use serial_test::serial;
-    use sqlx::Row;
-    use uuid::Uuid;
-
-    use super::super::test_helpers::setup_backend;
-    use super::*;
-    use crate::backends::{
-        ActionAttemptStatus, CoreBackend, GarbageCollectorBackend, WorkerStatusBackend,
-    };
-    use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus};
-    use waymark_dag::EdgeType;
-
-    fn sample_runner_state() -> RunnerState {
-        RunnerState::new(None, None, None, false)
-    }
-
-    fn sample_queued_instance(instance_id: Uuid, entry_node: Uuid) -> QueuedInstance {
-        QueuedInstance {
-            workflow_version_id: Uuid::new_v4(),
-            schedule_id: None,
-            dag: None,
-            entry_node,
-            state: Some(sample_runner_state()),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: Some(Utc::now() - Duration::seconds(1)),
-        }
-    }
-
-    fn sample_execution_node(node_id: Uuid) -> ExecutionNode {
-        ExecutionNode {
-            node_id,
-            node_type: "action_call".to_string(),
-            label: "@tests.action()".to_string(),
-            status: NodeStatus::Queued,
-            template_id: Some("n0".to_string()),
-            targets: Vec::new(),
-            action: Some(ActionCallSpec {
-                action_name: "tests.action".to_string(),
-                module_name: Some("tests".to_string()),
-                kwargs: HashMap::new(),
-            }),
-            value_expr: None,
-            assignments: HashMap::new(),
-            action_attempt: 1,
-            started_at: None,
-            completed_at: None,
-            scheduled_at: Some(Utc::now() + Duration::seconds(15)),
-        }
-    }
-
-    fn sample_lock_claim() -> LockClaim {
-        LockClaim {
-            lock_uuid: Uuid::new_v4(),
-            lock_expires_at: Utc::now() + Duration::seconds(30),
-        }
-    }
-
-    async fn insert_workflow_version_row(
-        backend: &PostgresBackend,
-        workflow_version_id: Uuid,
-        workflow_name: &str,
-    ) {
-        sqlx::query(
-            "INSERT INTO workflow_versions (id, workflow_name, workflow_version, ir_hash, program_proto, concurrent) VALUES ($1, $2, $3, $4, $5, $6)",
-        )
-        .bind(workflow_version_id)
-        .bind(workflow_name)
-        .bind("v1")
-        .bind(format!("hash-{workflow_name}"))
-        .bind(vec![0_u8])
-        .bind(false)
-        .execute(backend.pool())
-        .await
-        .expect("insert workflow version row");
-    }
-
-    async fn claim_instance(backend: &PostgresBackend, instance_id: Uuid) -> LockClaim {
-        let claim = sample_lock_claim();
-        let batch = CoreBackend::get_queued_instances(backend, 10, claim.clone())
-            .await
-            .expect("claim queued instance");
-        assert_eq!(batch.instances.len(), 1);
-        assert_eq!(batch.instances[0].instance_id, instance_id);
-        claim
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_queue_instances_happy_path() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let queued = sample_queued_instance(instance_id, entry_node);
-        let expected_workflow_version_id = queued.workflow_version_id;
-
-        CoreBackend::queue_instances(&backend, &[queued])
-            .await
-            .expect("queue instances");
-
-        let queued_count: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM queued_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("queued count");
-        assert_eq!(queued_count, 1);
-
-        let runner_count: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("runner count");
-        assert_eq!(runner_count, 1);
-
-        let workflow_version_id: Option<Uuid> = sqlx::query_scalar(
-            "SELECT workflow_version_id FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner workflow version");
-        assert_eq!(workflow_version_id, Some(expected_workflow_version_id));
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner current status");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-
-        let queued_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("queued current status");
-        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_queue_instances_persists_workflow_name_when_registered() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let workflow_version_id = Uuid::new_v4();
-        insert_workflow_version_row(&backend, workflow_version_id, "tests.searchable").await;
-
-        let queued = QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node,
-            state: Some(sample_runner_state()),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: Some(Utc::now()),
-        };
-
-        CoreBackend::queue_instances(&backend, &[queued])
-            .await
-            .expect("queue instances");
-
-        let runner_workflow_name: Option<String> =
-            sqlx::query_scalar("SELECT workflow_name FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("runner workflow_name");
-        assert_eq!(runner_workflow_name.as_deref(), Some("tests.searchable"));
-
-        let queued_workflow_name: Option<String> =
-            sqlx::query_scalar("SELECT workflow_name FROM queued_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("queued workflow_name");
-        assert_eq!(queued_workflow_name.as_deref(), Some("tests.searchable"));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_get_queued_instances_updates_runner_status_without_mutating_queue_status() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let queued = sample_queued_instance(instance_id, entry_node);
-        CoreBackend::queue_instances(&backend, &[queued])
-            .await
-            .expect("queue instances");
-
-        let claim = sample_lock_claim();
-        let batch = CoreBackend::get_queued_instances(&backend, 1, claim.clone())
-            .await
-            .expect("get queued instances");
-        assert_eq!(batch.instances.len(), 1);
-        assert_eq!(batch.instances[0].instance_id, instance_id);
-
-        let row = sqlx::query("SELECT lock_uuid FROM queued_instances WHERE instance_id = $1")
-            .bind(instance_id)
-            .fetch_one(backend.pool())
-            .await
-            .expect("queued lock row");
-        let lock_uuid: Option<Uuid> = row.get("lock_uuid");
-        assert_eq!(lock_uuid, Some(claim.lock_uuid));
-
-        let queued_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("queued current status");
-        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner current status");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_RUNNING));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_get_queued_instances_restores_action_results_from_actions_done() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-
-        let initial_claim = sample_lock_claim();
-        let initial_batch = CoreBackend::get_queued_instances(&backend, 1, initial_claim.clone())
-            .await
-            .expect("initial claim");
-        assert_eq!(initial_batch.instances.len(), 1);
-
-        let execution_id = Uuid::new_v4();
-        let mut completed_action_node = sample_execution_node(execution_id);
-        completed_action_node.status = NodeStatus::Completed;
-        completed_action_node.scheduled_at = None;
-
-        let graph = GraphUpdate {
-            instance_id,
-            nodes: HashMap::from([(execution_id, completed_action_node)]),
-            edges: std::collections::HashSet::new(),
-        };
-        CoreBackend::save_graphs(
-            &backend,
-            initial_claim.clone(),
-            std::slice::from_ref(&graph),
-        )
-        .await
-        .expect("persist graph");
-
-        CoreBackend::save_actions_done(
-            &backend,
-            &[ActionDone {
-                execution_id,
-                attempt: 1,
-                status: ActionAttemptStatus::Completed,
-                started_at: None,
-                completed_at: Some(Utc::now()),
-                duration_ms: None,
-                result: serde_json::json!({"ok": true}),
-            }],
-        )
-        .await
-        .expect("persist action result");
-
-        CoreBackend::release_instance_locks(&backend, initial_claim.lock_uuid, &[instance_id])
-            .await
-            .expect("release initial lock");
-
-        let queued_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("queued current status after release");
-        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner current status after release");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-
-        let second_claim = sample_lock_claim();
-        let batch = CoreBackend::get_queued_instances(&backend, 1, second_claim)
-            .await
-            .expect("rehydrate instance");
-        assert_eq!(batch.instances.len(), 1);
-        assert_eq!(
-            batch.instances[0].action_results.get(&execution_id),
-            Some(&serde_json::json!({"ok": true}))
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_save_graphs_happy_path() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-        let claim = claim_instance(&backend, instance_id).await;
-
-        let execution_id = Uuid::new_v4();
-        let mut nodes = HashMap::new();
-        nodes.insert(execution_id, sample_execution_node(execution_id));
-        let graph = GraphUpdate {
-            instance_id,
-            nodes,
-            edges: std::collections::HashSet::from([
-                crate::waymark_core::runner::state::ExecutionEdge {
-                    source: execution_id,
-                    target: execution_id,
-                    edge_type: EdgeType::StateMachine,
-                },
-            ]),
-        };
-        let extended_claim = LockClaim {
-            lock_uuid: claim.lock_uuid,
-            lock_expires_at: claim.lock_expires_at + Duration::seconds(120),
-        };
-
-        let locks = CoreBackend::save_graphs(
-            &backend,
-            extended_claim.clone(),
-            std::slice::from_ref(&graph),
-        )
-        .await
-        .expect("save graphs");
-        assert_eq!(locks.len(), 1);
-        assert_eq!(locks[0].instance_id, instance_id);
-        assert_eq!(locks[0].lock_uuid, Some(claim.lock_uuid));
-        assert_eq!(
-            locks[0]
-                .lock_expires_at
-                .map(|value| value.timestamp_micros()),
-            Some(extended_claim.lock_expires_at.timestamp_micros()),
-        );
-
-        let state_payload: Option<Vec<u8>> =
-            sqlx::query_scalar("SELECT state FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("runner state payload");
-        let decoded: GraphUpdate = rmp_serde::from_slice(&state_payload.expect("state payload"))
-            .expect("decode graph update");
-        assert_eq!(decoded.nodes.len(), 1);
-        assert_eq!(decoded.edges.len(), 1);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_save_graphs_returns_lock_status_for_duplicate_instance_updates() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-        let claim = claim_instance(&backend, instance_id).await;
-
-        let first_node_id = Uuid::new_v4();
-        let second_node_id = Uuid::new_v4();
-        let first_graph = GraphUpdate {
-            instance_id,
-            nodes: HashMap::from([(first_node_id, sample_execution_node(first_node_id))]),
-            edges: HashSet::new(),
-        };
-        let second_graph = GraphUpdate {
-            instance_id,
-            nodes: HashMap::from([(second_node_id, sample_execution_node(second_node_id))]),
-            edges: HashSet::new(),
-        };
-
-        let locks = CoreBackend::save_graphs(
-            &backend,
-            claim.clone(),
-            &[first_graph.clone(), second_graph.clone()],
-        )
-        .await
-        .expect("save duplicate instance graphs");
-        assert_eq!(locks.len(), 2);
-        assert_eq!(locks[0].instance_id, instance_id);
-        assert_eq!(locks[1].instance_id, instance_id);
-        assert_eq!(locks[0].lock_uuid, Some(claim.lock_uuid));
-        assert_eq!(locks[1].lock_uuid, Some(claim.lock_uuid));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_save_actions_done_happy_path() {
-        let backend = setup_backend().await;
-        let execution_id = Uuid::new_v4();
-        CoreBackend::save_actions_done(
-            &backend,
-            &[ActionDone {
-                execution_id,
-                attempt: 1,
-                status: ActionAttemptStatus::Completed,
-                started_at: None,
-                completed_at: Some(Utc::now()),
-                duration_ms: None,
-                result: serde_json::json!({"ok": true}),
-            }],
-        )
-        .await
-        .expect("save actions done");
-
-        let row = sqlx::query(
-            "SELECT execution_id, attempt, status, started_at, completed_at, duration_ms, result FROM runner_actions_done WHERE execution_id = $1",
-        )
-        .bind(execution_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("action row");
-
-        assert_eq!(row.get::<Uuid, _>("execution_id"), execution_id);
-        assert_eq!(row.get::<i32, _>("attempt"), 1);
-        assert_eq!(row.get::<String, _>("status"), "completed");
-        assert!(
-            row.get::<Option<DateTime<Utc>>, _>("completed_at")
-                .is_some()
-        );
-        let payload: Option<Vec<u8>> = row.get("result");
-        let decoded: serde_json::Value =
-            rmp_serde::from_slice(&payload.expect("action payload")).expect("decode action");
-        assert_eq!(decoded, serde_json::json!({"ok": true}));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_refresh_instance_locks_happy_path() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-        let claim = claim_instance(&backend, instance_id).await;
-
-        let refreshed_expiry = Utc::now() + Duration::seconds(120);
-        let refreshed = CoreBackend::refresh_instance_locks(
-            &backend,
-            LockClaim {
-                lock_uuid: claim.lock_uuid,
-                lock_expires_at: refreshed_expiry,
-            },
-            &[instance_id],
-        )
-        .await
-        .expect("refresh locks");
-
-        assert_eq!(refreshed.len(), 1);
-        assert_eq!(refreshed[0].instance_id, instance_id);
-        assert_eq!(refreshed[0].lock_uuid, Some(claim.lock_uuid));
-        assert_eq!(
-            refreshed[0]
-                .lock_expires_at
-                .map(|value| value.timestamp_micros()),
-            Some(refreshed_expiry.timestamp_micros()),
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_refresh_instance_locks_skip_locked_does_not_block_or_override() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-        let claim = claim_instance(&backend, instance_id).await;
-
-        let mut tx = backend.pool().begin().await.expect("begin lock tx");
-        sqlx::query("SELECT instance_id FROM queued_instances WHERE instance_id = $1 FOR UPDATE")
-            .bind(instance_id)
-            .fetch_one(&mut *tx)
-            .await
-            .expect("lock queued row");
-
-        let refreshed_expiry = Utc::now() + Duration::seconds(120);
-        let refreshed = tokio::time::timeout(
-            StdDuration::from_millis(300),
-            CoreBackend::refresh_instance_locks(
-                &backend,
-                LockClaim {
-                    lock_uuid: claim.lock_uuid,
-                    lock_expires_at: refreshed_expiry,
-                },
-                &[instance_id],
-            ),
-        )
-        .await
-        .expect("refresh should not block")
-        .expect("refresh locks");
-
-        assert_eq!(refreshed.len(), 1);
-        assert_eq!(refreshed[0].instance_id, instance_id);
-        assert_eq!(refreshed[0].lock_uuid, Some(claim.lock_uuid));
-        assert_eq!(
-            refreshed[0]
-                .lock_expires_at
-                .map(|value| value.timestamp_micros()),
-            Some(claim.lock_expires_at.timestamp_micros()),
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_release_instance_locks_happy_path() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-        let claim = claim_instance(&backend, instance_id).await;
-
-        CoreBackend::release_instance_locks(&backend, claim.lock_uuid, &[instance_id])
-            .await
-            .expect("release locks");
-
-        let row = sqlx::query(
-            "SELECT lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("lock row");
-        let lock_uuid: Option<Uuid> = row.get("lock_uuid");
-        let lock_expires_at: Option<chrono::DateTime<Utc>> = row.get("lock_expires_at");
-        assert!(lock_uuid.is_none());
-        assert!(lock_expires_at.is_none());
-
-        let queued_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM queued_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("queued current status after release");
-        assert_eq!(queued_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner current status after release");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_QUEUED));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_reclaim_expired_instance_locks_happy_path() {
-        let backend = setup_backend().await;
-        let expired_id = Uuid::new_v4();
-        let live_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(
-            &backend,
-            &[
-                sample_queued_instance(expired_id, entry_node),
-                sample_queued_instance(live_id, entry_node),
-            ],
-        )
-        .await
-        .expect("queue instances");
-
-        let claim = sample_lock_claim();
-        let claimed = CoreBackend::get_queued_instances(&backend, 10, claim.clone())
-            .await
-            .expect("claim queued instances");
-        assert_eq!(claimed.instances.len(), 2);
-
-        let expired_at = Utc::now() - Duration::seconds(1);
-        let live_at = Utc::now() + Duration::seconds(60);
-        sqlx::query(
-            r#"
-            UPDATE queued_instances
-            SET lock_expires_at = CASE
-                WHEN instance_id = $1 THEN $3
-                ELSE $4
-            END
-            WHERE instance_id IN ($1, $2)
-            "#,
-        )
-        .bind(expired_id)
-        .bind(live_id)
-        .bind(expired_at)
-        .bind(live_at)
-        .execute(backend.pool())
-        .await
-        .expect("set lock expiries");
-
-        let reclaimed = backend
-            .reclaim_expired_instance_locks(10)
-            .await
-            .expect("reclaim expired locks");
-        assert_eq!(reclaimed, 1);
-
-        let rows = sqlx::query(
-            "SELECT instance_id, lock_uuid, lock_expires_at FROM queued_instances WHERE instance_id IN ($1, $2)",
-        )
-        .bind(expired_id)
-        .bind(live_id)
-        .fetch_all(backend.pool())
-        .await
-        .expect("fetch lock rows");
-        let mut lock_rows: HashMap<Uuid, (Option<Uuid>, Option<chrono::DateTime<Utc>>)> =
-            HashMap::new();
-        for row in rows {
-            let instance_id: Uuid = row.get("instance_id");
-            let lock_uuid: Option<Uuid> = row.get("lock_uuid");
-            let lock_expires_at: Option<chrono::DateTime<Utc>> = row.get("lock_expires_at");
-            lock_rows.insert(instance_id, (lock_uuid, lock_expires_at));
-        }
-
-        let expired_lock = lock_rows.get(&expired_id).expect("expired lock row");
-        assert_eq!(*expired_lock, (None, None));
-
-        let expired_runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(expired_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("expired runner status");
-        assert_eq!(
-            expired_runner_status.as_deref(),
-            Some(INSTANCE_STATUS_QUEUED)
-        );
-
-        let live_lock = lock_rows.get(&live_id).expect("live lock row");
-        assert_eq!(live_lock.0, Some(claim.lock_uuid));
-        assert_eq!(
-            live_lock.1.map(|value| value.timestamp_micros()),
-            Some(live_at.timestamp_micros()),
-        );
-
-        let live_runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(live_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("live runner status");
-        assert_eq!(live_runner_status.as_deref(), Some(INSTANCE_STATUS_RUNNING));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_save_instances_done_happy_path() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-
-        CoreBackend::save_instances_done(
-            &backend,
-            &[InstanceDone {
-                executor_id: instance_id,
-                entry_node,
-                result: Some(serde_json::json!({"value": 3})),
-                error: None,
-            }],
-        )
-        .await
-        .expect("save instances done");
-
-        let result_payload: Option<Vec<u8>> =
-            sqlx::query_scalar("SELECT result FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("result payload");
-        let decoded: serde_json::Value =
-            rmp_serde::from_slice(&result_payload.expect("stored result")).expect("decode result");
-        assert_eq!(decoded, serde_json::json!({"value": 3}));
-
-        let queued_count: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM queued_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("queued count");
-        assert_eq!(queued_count, 0);
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner status");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_COMPLETED));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_save_instances_done_updates_runner_even_if_queue_row_missing() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        CoreBackend::queue_instances(&backend, &[sample_queued_instance(instance_id, entry_node)])
-            .await
-            .expect("queue instances");
-
-        sqlx::query("DELETE FROM queued_instances WHERE instance_id = $1")
-            .bind(instance_id)
-            .execute(backend.pool())
-            .await
-            .expect("delete queued row");
-
-        CoreBackend::save_instances_done(
-            &backend,
-            &[InstanceDone {
-                executor_id: instance_id,
-                entry_node,
-                result: Some(serde_json::json!({"value": 11})),
-                error: None,
-            }],
-        )
-        .await
-        .expect("save instances done without queue row");
-
-        let runner_status: Option<String> = sqlx::query_scalar(
-            "SELECT current_status FROM runner_instances WHERE instance_id = $1",
-        )
-        .bind(instance_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("runner status");
-        assert_eq!(runner_status.as_deref(), Some(INSTANCE_STATUS_COMPLETED));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_retry_transient_deadlock_sqlstate_happy_path() {
-        let backend = setup_backend().await;
-        let pool = backend.pool().clone();
-        let attempts = Arc::new(AtomicUsize::new(0));
-        let result = retry_transient_backend("core_retry_test", || {
-            let pool = pool.clone();
-            let attempts = Arc::clone(&attempts);
-            async move {
-                let attempt = attempts.fetch_add(1, Ordering::SeqCst);
-                if attempt < 2 {
-                    sqlx::query(
-                        "DO $$ BEGIN RAISE EXCEPTION 'simulated deadlock' USING ERRCODE='40P01'; END $$;",
-                    )
-                    .execute(&pool)
-                    .await?;
-                }
-                Ok(())
-            }
-        })
-        .await;
-
-        assert!(result.is_ok());
-        assert_eq!(attempts.load(Ordering::SeqCst), 3);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn core_retry_non_transient_sqlstate_fails_without_retry() {
-        let backend = setup_backend().await;
-        let pool = backend.pool().clone();
-        let attempts = Arc::new(AtomicUsize::new(0));
-        let result = retry_transient_backend("core_retry_non_transient_test", || {
-            let pool = pool.clone();
-            let attempts = Arc::clone(&attempts);
-            async move {
-                attempts.fetch_add(1, Ordering::SeqCst);
-                sqlx::query(
-                    "DO $$ BEGIN RAISE EXCEPTION 'simulated unique violation' USING ERRCODE='23505'; END $$;",
-                )
-                .execute(&pool)
-                .await?;
-                Ok::<(), BackendError>(())
-            }
-        })
-        .await;
-
-        assert!(result.is_err());
-        assert_eq!(attempts.load(Ordering::SeqCst), 1);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn garbage_collector_deletes_old_done_instances_and_actions() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let execution_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let workflow_version_id = Uuid::new_v4();
-
-        let state = GraphUpdate {
-            instance_id,
-            nodes: HashMap::from([(execution_id, sample_execution_node(execution_id))]),
-            edges: HashSet::new(),
-        };
-        let state_payload = PostgresBackend::serialize(&state).expect("serialize state");
-        let result_payload =
-            PostgresBackend::serialize(&serde_json::json!({"ok": true})).expect("serialize done");
-        let action_payload =
-            PostgresBackend::serialize(&serde_json::json!({"value": 1})).expect("serialize action");
-
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, created_at, state, result) VALUES ($1, $2, $3, $4, $5, $6)",
-        )
-        .bind(instance_id)
-        .bind(entry_node)
-        .bind(workflow_version_id)
-        .bind(Utc::now() - Duration::hours(30))
-        .bind(state_payload)
-        .bind(result_payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert old done instance");
-
-        sqlx::query(
-            "INSERT INTO runner_actions_done (execution_id, attempt, status, result) VALUES ($1, $2, $3, $4)",
-        )
-        .bind(execution_id)
-        .bind(1_i32)
-        .bind("completed")
-        .bind(action_payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert action row");
-
-        let result = GarbageCollectorBackend::collect_done_instances(
-            &backend,
-            Utc::now() - Duration::hours(24),
-            100,
-        )
-        .await
-        .expect("collect done instances");
-
-        assert_eq!(result.deleted_instances, 1);
-        assert_eq!(result.deleted_actions, 1);
-
-        let remaining_instances: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("count instances");
-        assert_eq!(remaining_instances, 0);
-
-        let remaining_actions: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM runner_actions_done WHERE execution_id = $1")
-                .bind(execution_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("count actions");
-        assert_eq!(remaining_actions, 0);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn garbage_collector_keeps_recent_done_instances() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let workflow_version_id = Uuid::new_v4();
-        let state_payload = PostgresBackend::serialize(&GraphUpdate {
-            instance_id,
-            nodes: HashMap::new(),
-            edges: HashSet::new(),
-        })
-        .expect("serialize state");
-        let result_payload =
-            PostgresBackend::serialize(&serde_json::json!({"ok": true})).expect("serialize done");
-
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, created_at, state, result) VALUES ($1, $2, $3, $4, $5, $6)",
-        )
-        .bind(instance_id)
-        .bind(entry_node)
-        .bind(workflow_version_id)
-        .bind(Utc::now() - Duration::hours(1))
-        .bind(state_payload)
-        .bind(result_payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert recent done instance");
-
-        let result = GarbageCollectorBackend::collect_done_instances(
-            &backend,
-            Utc::now() - Duration::hours(24),
-            100,
-        )
-        .await
-        .expect("collect done instances");
-
-        assert_eq!(result.deleted_instances, 0);
-        assert_eq!(result.deleted_actions, 0);
-
-        let remaining_instances: i64 =
-            sqlx::query_scalar("SELECT COUNT(*) FROM runner_instances WHERE instance_id = $1")
-                .bind(instance_id)
-                .fetch_one(backend.pool())
-                .await
-                .expect("count instances");
-        assert_eq!(remaining_instances, 1);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn worker_status_backend_upsert_worker_status_happy_path() {
-        let backend = setup_backend().await;
-        let pool_id = Uuid::new_v4();
-
-        WorkerStatusBackend::upsert_worker_status(
-            &backend,
-            &WorkerStatusUpdate {
-                pool_id,
-                throughput_per_min: 180.0,
-                total_completed: 20,
-                last_action_at: Some(Utc::now()),
-                median_dequeue_ms: Some(5),
-                median_handling_ms: Some(12),
-                dispatch_queue_size: 3,
-                total_in_flight: 2,
-                active_workers: 4,
-                actions_per_sec: 3.0,
-                median_instance_duration_secs: Some(0.2),
-                active_instance_count: 1,
-                total_instances_completed: 8,
-                instances_per_sec: 0.5,
-                instances_per_min: 30.0,
-                time_series: None,
-            },
-        )
-        .await
-        .expect("upsert worker status");
-
-        let row = sqlx::query(
-            "SELECT total_completed, active_workers, actions_per_sec FROM worker_status WHERE pool_id = $1",
-        )
-        .bind(pool_id)
-        .fetch_one(backend.pool())
-        .await
-        .expect("worker status row");
-        assert_eq!(row.get::<i64, _>("total_completed"), 20);
-        assert_eq!(row.get::<i32, _>("active_workers"), 4);
-        assert_eq!(row.get::<f64, _>("actions_per_sec"), 3.0);
-    }
-}
diff --git a/crates/waymark/src/backends/postgres/mod.rs b/crates/waymark/src/backends/postgres/mod.rs
deleted file mode 100644
index 4bec275d..00000000
--- a/crates/waymark/src/backends/postgres/mod.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-//! Postgres backend for persisting runner state and action results.
-
-mod core;
-mod registry;
-mod scheduler;
-#[cfg(test)]
-mod test_helpers;
-mod webapp;
-
-use std::collections::HashMap;
-use std::sync::{Arc, Mutex};
-
-use sqlx::PgPool;
-
-use crate::db;
-use crate::observability::obs;
-
-use super::base::{BackendError, BackendResult};
-
-/// Persist runner state and action results in Postgres.
-#[derive(Clone)]
-pub struct PostgresBackend {
-    pool: PgPool,
-    query_counts: Arc<Mutex<HashMap<String, usize>>>,
-    batch_size_counts: Arc<Mutex<HashMap<String, HashMap<usize, usize>>>>,
-}
-
-impl PostgresBackend {
-    pub fn new(pool: PgPool) -> Self {
-        Self {
-            pool,
-            query_counts: Arc::new(Mutex::new(HashMap::new())),
-            batch_size_counts: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    #[obs]
-    pub async fn connect(dsn: &str) -> BackendResult<Self> {
-        let pool = PgPool::connect(dsn).await?;
-        db::run_migrations(&pool).await?;
-        Ok(Self::new(pool))
-    }
-
-    pub fn pool(&self) -> &PgPool {
-        &self.pool
-    }
-
-    /// Delete all queued instances from the backing table.
-    #[obs]
-    pub async fn clear_queue(&self) -> BackendResult<()> {
-        Self::count_query(&self.query_counts, "delete:queued_instances_all");
-        sqlx::query("DELETE FROM queued_instances")
-            .execute(&self.pool)
-            .await?;
-        Ok(())
-    }
-
-    /// Delete all persisted runner data for a clean benchmark run.
-    #[obs]
-    pub async fn clear_all(&self) -> BackendResult<()> {
-        Self::count_query(&self.query_counts, "truncate:runner_tables");
-        sqlx::query(
-            r#"
-            TRUNCATE runner_actions_done,
-                     runner_instances,
-                     queued_instances
-            RESTART IDENTITY
-            "#,
-        )
-        .execute(&self.pool)
-        .await?;
-        Ok(())
-    }
-
-    pub fn query_counts(&self) -> HashMap<String, usize> {
-        self.query_counts
-            .lock()
-            .expect("query counts poisoned")
-            .clone()
-    }
-
-    pub fn batch_size_counts(&self) -> HashMap<String, HashMap<usize, usize>> {
-        self.batch_size_counts
-            .lock()
-            .expect("batch size counts poisoned")
-            .clone()
-    }
-
-    pub(super) fn count_query(counts: &Arc<Mutex<HashMap<String, usize>>>, label: &str) {
-        let mut guard = counts.lock().expect("query counts poisoned");
-        *guard.entry(label.to_string()).or_insert(0) += 1;
-    }
-
-    pub(super) fn count_batch_size(
-        counts: &Arc<Mutex<HashMap<String, HashMap<usize, usize>>>>,
-        label: &str,
-        size: usize,
-    ) {
-        if size == 0 {
-            return;
-        }
-        let mut guard = counts.lock().expect("batch size counts poisoned");
-        let entry = guard.entry(label.to_string()).or_default();
-        *entry.entry(size).or_insert(0) += 1;
-    }
-
-    pub(super) fn serialize<T: serde::Serialize>(value: &T) -> Result<Vec<u8>, BackendError> {
-        rmp_serde::to_vec_named(value).map_err(|e| BackendError::Message(e.to_string()))
-    }
-
-    pub(super) fn deserialize<T: serde::de::DeserializeOwned>(
-        payload: &[u8],
-    ) -> Result<T, BackendError> {
-        rmp_serde::from_slice(payload).map_err(|e| BackendError::Message(e.to_string()))
-    }
-}
diff --git a/crates/waymark/src/backends/postgres/registry.rs b/crates/waymark/src/backends/postgres/registry.rs
deleted file mode 100644
index c8fb5a68..00000000
--- a/crates/waymark/src/backends/postgres/registry.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-use sqlx::Row;
-use tonic::async_trait;
-use uuid::Uuid;
-
-use super::PostgresBackend;
-use crate::backends::base::{
-    BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
-
-#[async_trait]
-impl WorkflowRegistryBackend for PostgresBackend {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid> {
-        let inserted = sqlx::query(
-            r#"
-            INSERT INTO workflow_versions
-                (workflow_name, workflow_version, ir_hash, program_proto, concurrent)
-            VALUES ($1, $2, $3, $4, $5)
-            ON CONFLICT (workflow_name, workflow_version)
-            DO NOTHING
-            RETURNING id
-            "#,
-        )
-        .bind(&registration.workflow_name)
-        .bind(&registration.workflow_version)
-        .bind(&registration.ir_hash)
-        .bind(&registration.program_proto)
-        .bind(registration.concurrent)
-        .fetch_optional(&self.pool)
-        .await?;
-
-        if let Some(row) = inserted {
-            let id: Uuid = row.get("id");
-            return Ok(id);
-        }
-
-        let row = sqlx::query(
-            r#"
-            SELECT id, ir_hash
-            FROM workflow_versions
-            WHERE workflow_name = $1 AND workflow_version = $2
-            "#,
-        )
-        .bind(&registration.workflow_name)
-        .bind(&registration.workflow_version)
-        .fetch_one(&self.pool)
-        .await?;
-
-        let id: Uuid = row.get("id");
-        let existing_hash: String = row.get("ir_hash");
-        if existing_hash != registration.ir_hash {
-            return Err(BackendError::Message(format!(
-                "workflow version already exists with different IR hash: {}@{}",
-                registration.workflow_name, registration.workflow_version
-            )));
-        }
-
-        Ok(id)
-    }
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
-        if ids.is_empty() {
-            return Ok(Vec::new());
-        }
-        let rows = sqlx::query(
-            r#"
-            SELECT id, workflow_name, workflow_version, ir_hash, program_proto, concurrent
-            FROM workflow_versions
-            WHERE id = ANY($1)
-            "#,
-        )
-        .bind(ids)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut versions = Vec::with_capacity(rows.len());
-        for row in rows {
-            versions.push(WorkflowVersion {
-                id: row.get("id"),
-                workflow_name: row.get("workflow_name"),
-                workflow_version: row.get("workflow_version"),
-                ir_hash: row.get("ir_hash"),
-                program_proto: row.get("program_proto"),
-                concurrent: row.get("concurrent"),
-            });
-        }
-        Ok(versions)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use serial_test::serial;
-
-    use super::super::test_helpers::setup_backend;
-    use crate::backends::{WorkflowRegistration, WorkflowRegistryBackend};
-
-    fn sample_registration(version: &str) -> WorkflowRegistration {
-        WorkflowRegistration {
-            workflow_name: "tests.workflow".to_string(),
-            workflow_version: version.to_string(),
-            ir_hash: format!("hash-{version}"),
-            program_proto: vec![1, 2, 3, 4],
-            concurrent: true,
-        }
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn workflow_registry_upsert_workflow_version_happy_path() {
-        let backend = setup_backend().await;
-        let registration = sample_registration("v1");
-
-        let id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
-            .await
-            .expect("insert workflow version");
-        let repeat_id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
-            .await
-            .expect("idempotent workflow upsert");
-
-        assert_eq!(id, repeat_id);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn workflow_registry_get_workflow_versions_happy_path() {
-        let backend = setup_backend().await;
-        let registration = sample_registration("v2");
-        let id = WorkflowRegistryBackend::upsert_workflow_version(&backend, &registration)
-            .await
-            .expect("insert workflow version");
-
-        let versions = WorkflowRegistryBackend::get_workflow_versions(&backend, &[id])
-            .await
-            .expect("get workflow versions");
-        assert_eq!(versions.len(), 1);
-        assert_eq!(versions[0].id, id);
-        assert_eq!(versions[0].workflow_name, registration.workflow_name);
-        assert_eq!(versions[0].workflow_version, registration.workflow_version);
-        assert_eq!(versions[0].ir_hash, registration.ir_hash);
-        assert_eq!(versions[0].program_proto, registration.program_proto);
-        assert_eq!(versions[0].concurrent, registration.concurrent);
-    }
-}
diff --git a/crates/waymark/src/backends/postgres/scheduler.rs b/crates/waymark/src/backends/postgres/scheduler.rs
deleted file mode 100644
index 5eb00735..00000000
--- a/crates/waymark/src/backends/postgres/scheduler.rs
+++ /dev/null
@@ -1,604 +0,0 @@
-use chrono::{DateTime, Utc};
-use sqlx::Row;
-use tonic::async_trait;
-use uuid::Uuid;
-
-use super::PostgresBackend;
-use crate::backends::base::{BackendError, BackendResult, SchedulerBackend};
-use crate::scheduler::compute_next_run;
-use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
-
-#[async_trait]
-impl SchedulerBackend for PostgresBackend {
-    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
-        let next_run_at = compute_next_run(
-            params.schedule_type,
-            params.cron_expression.as_deref(),
-            params.interval_seconds,
-            params.jitter_seconds,
-            None,
-        )
-        .map_err(BackendError::Message)?;
-
-        let row = sqlx::query(
-            r#"
-            INSERT INTO workflow_schedules
-                (workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                 jitter_seconds, input_payload, next_run_at, priority, allow_duplicate)
-            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
-            ON CONFLICT (workflow_name, schedule_name)
-            DO UPDATE SET
-                schedule_type = EXCLUDED.schedule_type,
-                cron_expression = EXCLUDED.cron_expression,
-                interval_seconds = EXCLUDED.interval_seconds,
-                jitter_seconds = EXCLUDED.jitter_seconds,
-                input_payload = EXCLUDED.input_payload,
-                next_run_at = COALESCE(workflow_schedules.next_run_at, EXCLUDED.next_run_at),
-                priority = EXCLUDED.priority,
-                allow_duplicate = EXCLUDED.allow_duplicate,
-                status = 'active',
-                updated_at = NOW()
-            RETURNING id
-            "#,
-        )
-        .bind(&params.workflow_name)
-        .bind(&params.schedule_name)
-        .bind(params.schedule_type.as_str())
-        .bind(&params.cron_expression)
-        .bind(params.interval_seconds)
-        .bind(params.jitter_seconds)
-        .bind(&params.input_payload)
-        .bind(next_run_at)
-        .bind(params.priority)
-        .bind(params.allow_duplicate)
-        .fetch_one(&self.pool)
-        .await?;
-
-        let id: Uuid = row.get("id");
-        Ok(ScheduleId(id))
-    }
-
-    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
-        let schedule = sqlx::query_as::<_, ScheduleRow>(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
-                   created_at, updated_at, priority, allow_duplicate
-            FROM workflow_schedules
-            WHERE id = $1
-            "#,
-        )
-        .bind(id.0)
-        .fetch_optional(&self.pool)
-        .await?
-        .ok_or_else(|| BackendError::Message(format!("schedule not found: {}", id)))?;
-
-        Ok(schedule.into())
-    }
-
-    async fn get_schedule_by_name(
-        &self,
-        workflow_name: &str,
-        schedule_name: &str,
-    ) -> BackendResult<Option<WorkflowSchedule>> {
-        let schedule = sqlx::query_as::<_, ScheduleRow>(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
-                   created_at, updated_at, priority, allow_duplicate
-            FROM workflow_schedules
-            WHERE workflow_name = $1 AND schedule_name = $2 AND status != 'deleted'
-            "#,
-        )
-        .bind(workflow_name)
-        .bind(schedule_name)
-        .fetch_optional(&self.pool)
-        .await?;
-
-        Ok(schedule.map(Into::into))
-    }
-
-    async fn list_schedules(
-        &self,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<WorkflowSchedule>> {
-        let rows = sqlx::query_as::<_, ScheduleRow>(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
-                   created_at, updated_at, priority, allow_duplicate
-            FROM workflow_schedules
-            WHERE status != 'deleted'
-            ORDER BY workflow_name, schedule_name
-            LIMIT $1 OFFSET $2
-            "#,
-        )
-        .bind(limit)
-        .bind(offset)
-        .fetch_all(&self.pool)
-        .await?;
-
-        Ok(rows.into_iter().map(Into::into).collect())
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let count = sqlx::query_scalar::<_, i64>(
-            "SELECT COUNT(*) FROM workflow_schedules WHERE status != 'deleted'",
-        )
-        .fetch_one(&self.pool)
-        .await?;
-
-        Ok(count)
-    }
-
-    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
-        let result = sqlx::query(
-            r#"
-            UPDATE workflow_schedules
-            SET status = $2, updated_at = NOW()
-            WHERE id = $1
-            "#,
-        )
-        .bind(id.0)
-        .bind(status)
-        .execute(&self.pool)
-        .await?;
-
-        Ok(result.rows_affected() > 0)
-    }
-
-    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
-        SchedulerBackend::update_schedule_status(self, id, "deleted").await
-    }
-
-    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
-        let rows = sqlx::query_as::<_, ScheduleRow>(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
-                   created_at, updated_at, priority, allow_duplicate
-            FROM workflow_schedules
-            WHERE status = 'active'
-              AND next_run_at IS NOT NULL
-              AND next_run_at <= NOW()
-            ORDER BY next_run_at
-            FOR UPDATE SKIP LOCKED
-            LIMIT $1
-            "#,
-        )
-        .bind(limit)
-        .fetch_all(&self.pool)
-        .await?;
-
-        Ok(rows.into_iter().map(Into::into).collect())
-    }
-
-    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool> {
-        let has_running = sqlx::query_scalar::<_, bool>(
-            r#"
-            SELECT EXISTS(
-                SELECT 1
-                FROM runner_instances ri
-                JOIN queued_instances qi ON qi.instance_id = ri.instance_id
-                WHERE ri.schedule_id = $1
-            )
-            "#,
-        )
-        .bind(schedule_id.0)
-        .fetch_one(&self.pool)
-        .await?;
-
-        Ok(has_running)
-    }
-
-    async fn mark_schedule_executed(
-        &self,
-        schedule_id: ScheduleId,
-        instance_id: Uuid,
-    ) -> BackendResult<()> {
-        let schedule = SchedulerBackend::get_schedule(self, schedule_id).await?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-
-        sqlx::query(
-            r#"
-            UPDATE workflow_schedules
-            SET last_run_at = NOW(),
-                last_instance_id = $2,
-                next_run_at = $3,
-                updated_at = NOW()
-            WHERE id = $1
-            "#,
-        )
-        .bind(schedule_id.0)
-        .bind(instance_id)
-        .bind(next_run_at)
-        .execute(&self.pool)
-        .await?;
-
-        Ok(())
-    }
-
-    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
-        let schedule = SchedulerBackend::get_schedule(self, schedule_id).await?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-
-        sqlx::query(
-            r#"
-            UPDATE workflow_schedules
-            SET next_run_at = $2, updated_at = NOW()
-            WHERE id = $1
-            "#,
-        )
-        .bind(schedule_id.0)
-        .bind(next_run_at)
-        .execute(&self.pool)
-        .await?;
-
-        Ok(())
-    }
-}
-
-#[derive(sqlx::FromRow)]
-struct ScheduleRow {
-    id: Uuid,
-    workflow_name: String,
-    schedule_name: String,
-    schedule_type: String,
-    cron_expression: Option<String>,
-    interval_seconds: Option<i64>,
-    jitter_seconds: i64,
-    input_payload: Option<Vec<u8>>,
-    status: String,
-    next_run_at: Option<DateTime<Utc>>,
-    last_run_at: Option<DateTime<Utc>>,
-    last_instance_id: Option<Uuid>,
-    created_at: DateTime<Utc>,
-    updated_at: DateTime<Utc>,
-    priority: i32,
-    allow_duplicate: bool,
-}
-
-impl From<ScheduleRow> for WorkflowSchedule {
-    fn from(row: ScheduleRow) -> Self {
-        Self {
-            id: row.id,
-            workflow_name: row.workflow_name,
-            schedule_name: row.schedule_name,
-            schedule_type: row.schedule_type,
-            cron_expression: row.cron_expression,
-            interval_seconds: row.interval_seconds,
-            jitter_seconds: row.jitter_seconds,
-            input_payload: row.input_payload,
-            status: row.status,
-            next_run_at: row.next_run_at,
-            last_run_at: row.last_run_at,
-            last_instance_id: row.last_instance_id,
-            created_at: row.created_at,
-            updated_at: row.updated_at,
-            priority: row.priority,
-            allow_duplicate: row.allow_duplicate,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use chrono::Utc;
-    use serial_test::serial;
-    use sqlx::Row;
-    use uuid::Uuid;
-
-    use super::super::test_helpers::setup_backend;
-    use super::*;
-    use crate::backends::SchedulerBackend;
-    use crate::scheduler::CreateScheduleParams;
-
-    fn sample_params(schedule_name: &str) -> CreateScheduleParams {
-        CreateScheduleParams {
-            workflow_name: "tests.workflow".to_string(),
-            schedule_name: schedule_name.to_string(),
-            schedule_type: ScheduleType::Interval,
-            cron_expression: None,
-            interval_seconds: Some(60),
-            jitter_seconds: 0,
-            input_payload: Some(vec![1, 2, 3]),
-            priority: 3,
-            allow_duplicate: true,
-        }
-    }
-
-    async fn insert_schedule(backend: &PostgresBackend, schedule_name: &str) -> ScheduleId {
-        SchedulerBackend::upsert_schedule(backend, &sample_params(schedule_name))
-            .await
-            .expect("upsert schedule")
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_upsert_schedule_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "upsert").await;
-        let row = sqlx::query("SELECT id FROM workflow_schedules WHERE id = $1")
-            .bind(id.0)
-            .fetch_one(backend.pool())
-            .await
-            .expect("select schedule");
-
-        assert_eq!(row.get::<Uuid, _>("id"), id.0);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_upsert_schedule_preserves_existing_next_run_at() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "preserve-next-run").await;
-        sqlx::query(
-            "UPDATE workflow_schedules SET next_run_at = NOW() + INTERVAL '2 days' WHERE id = $1",
-        )
-        .bind(id.0)
-        .execute(backend.pool())
-        .await
-        .expect("force next_run_at");
-
-        let before: Option<chrono::DateTime<Utc>> =
-            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
-                .bind(id.0)
-                .fetch_one(backend.pool())
-                .await
-                .expect("select next_run_at before");
-
-        let upserted_id =
-            SchedulerBackend::upsert_schedule(&backend, &sample_params("preserve-next-run"))
-                .await
-                .expect("upsert existing schedule");
-        assert_eq!(upserted_id.0, id.0);
-
-        let after: Option<chrono::DateTime<Utc>> =
-            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
-                .bind(id.0)
-                .fetch_one(backend.pool())
-                .await
-                .expect("select next_run_at after");
-
-        assert_eq!(after, before);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_get_schedule_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "get").await;
-        let schedule = SchedulerBackend::get_schedule(&backend, id)
-            .await
-            .expect("get schedule");
-
-        assert_eq!(schedule.id, id.0);
-        assert_eq!(schedule.schedule_name, "get");
-        assert_eq!(schedule.workflow_name, "tests.workflow");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_get_schedule_by_name_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "by-name").await;
-        let schedule =
-            SchedulerBackend::get_schedule_by_name(&backend, "tests.workflow", "by-name")
-                .await
-                .expect("get schedule by name")
-                .expect("expected schedule");
-
-        assert_eq!(schedule.id, id.0);
-        assert_eq!(schedule.schedule_name, "by-name");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_list_schedules_happy_path() {
-        let backend = setup_backend().await;
-
-        insert_schedule(&backend, "a-list").await;
-        insert_schedule(&backend, "b-list").await;
-
-        let schedules = SchedulerBackend::list_schedules(&backend, 10, 0)
-            .await
-            .expect("list schedules");
-
-        assert_eq!(schedules.len(), 2);
-        assert_eq!(schedules[0].schedule_name, "a-list");
-        assert_eq!(schedules[1].schedule_name, "b-list");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_count_schedules_happy_path() {
-        let backend = setup_backend().await;
-
-        insert_schedule(&backend, "count-a").await;
-        insert_schedule(&backend, "count-b").await;
-
-        let count = SchedulerBackend::count_schedules(&backend)
-            .await
-            .expect("count schedules");
-        assert_eq!(count, 2);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_update_schedule_status_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "status").await;
-        let updated = SchedulerBackend::update_schedule_status(&backend, id, "paused")
-            .await
-            .expect("update schedule status");
-        assert!(updated);
-
-        let status: String =
-            sqlx::query_scalar("SELECT status FROM workflow_schedules WHERE id = $1")
-                .bind(id.0)
-                .fetch_one(backend.pool())
-                .await
-                .expect("select status");
-        assert_eq!(status, "paused");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_delete_schedule_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "delete").await;
-        let deleted = SchedulerBackend::delete_schedule(&backend, id)
-            .await
-            .expect("delete schedule");
-        assert!(deleted);
-
-        let status: String =
-            sqlx::query_scalar("SELECT status FROM workflow_schedules WHERE id = $1")
-                .bind(id.0)
-                .fetch_one(backend.pool())
-                .await
-                .expect("select status");
-        assert_eq!(status, "deleted");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_find_due_schedules_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "due").await;
-        sqlx::query(
-            "UPDATE workflow_schedules SET next_run_at = NOW() - INTERVAL '1 minute' WHERE id = $1",
-        )
-        .bind(id.0)
-        .execute(backend.pool())
-        .await
-        .expect("force schedule due");
-
-        let due = SchedulerBackend::find_due_schedules(&backend, 10)
-            .await
-            .expect("find due schedules");
-        assert_eq!(due.len(), 1);
-        assert_eq!(due[0].id, id.0);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_has_running_instance_happy_path() {
-        let backend = setup_backend().await;
-
-        let has_running = SchedulerBackend::has_running_instance(&backend, ScheduleId::new())
-            .await
-            .expect("has running instance");
-        assert!(!has_running);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_has_running_instance_true_with_queued_instance() {
-        let backend = setup_backend().await;
-
-        let schedule_id = insert_schedule(&backend, "running-instance").await;
-        let instance_id = Uuid::new_v4();
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, schedule_id) VALUES ($1, $2, $3)",
-        )
-        .bind(instance_id)
-        .bind(Uuid::new_v4())
-        .bind(schedule_id.0)
-        .execute(backend.pool())
-        .await
-        .expect("insert runner instance");
-        sqlx::query("INSERT INTO queued_instances (instance_id, payload) VALUES ($1, $2)")
-            .bind(instance_id)
-            .bind(vec![0_u8])
-            .execute(backend.pool())
-            .await
-            .expect("insert queued instance");
-
-        let has_running = SchedulerBackend::has_running_instance(&backend, schedule_id)
-            .await
-            .expect("has running instance");
-        assert!(has_running);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_mark_schedule_executed_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "mark-executed").await;
-        let instance_id = Uuid::new_v4();
-        SchedulerBackend::mark_schedule_executed(&backend, id, instance_id)
-            .await
-            .expect("mark schedule executed");
-
-        let row = sqlx::query(
-            "SELECT last_instance_id, last_run_at, next_run_at FROM workflow_schedules WHERE id = $1",
-        )
-        .bind(id.0)
-        .fetch_one(backend.pool())
-        .await
-        .expect("select schedule");
-
-        let last_instance_id: Option<Uuid> = row.get("last_instance_id");
-        let last_run_at: Option<chrono::DateTime<Utc>> = row.get("last_run_at");
-        let next_run_at: Option<chrono::DateTime<Utc>> = row.get("next_run_at");
-
-        assert_eq!(last_instance_id, Some(instance_id));
-        assert!(last_run_at.is_some());
-        assert!(next_run_at.is_some());
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn scheduler_skip_schedule_run_happy_path() {
-        let backend = setup_backend().await;
-
-        let id = insert_schedule(&backend, "skip").await;
-        sqlx::query(
-            "UPDATE workflow_schedules SET next_run_at = NOW() - INTERVAL '1 minute' WHERE id = $1",
-        )
-        .bind(id.0)
-        .execute(backend.pool())
-        .await
-        .expect("force schedule due");
-
-        SchedulerBackend::skip_schedule_run(&backend, id)
-            .await
-            .expect("skip schedule run");
-
-        let next_run_at: Option<chrono::DateTime<Utc>> =
-            sqlx::query_scalar("SELECT next_run_at FROM workflow_schedules WHERE id = $1")
-                .bind(id.0)
-                .fetch_one(backend.pool())
-                .await
-                .expect("select next_run_at");
-        assert!(next_run_at.expect("next_run_at").gt(&Utc::now()));
-    }
-}
diff --git a/crates/waymark/src/backends/postgres/test_helpers.rs b/crates/waymark/src/backends/postgres/test_helpers.rs
deleted file mode 100644
index dd03cd7f..00000000
--- a/crates/waymark/src/backends/postgres/test_helpers.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-use sqlx::PgPool;
-
-use super::PostgresBackend;
-use crate::test_support::postgres_setup;
-
-pub(super) async fn setup_backend() -> PostgresBackend {
-    let pool = postgres_setup().await;
-    reset_database(&pool).await;
-    PostgresBackend::new(pool)
-}
-
-pub(super) async fn reset_database(pool: &PgPool) {
-    sqlx::query(
-        r#"
-        TRUNCATE runner_actions_done,
-                 queued_instances,
-                 runner_instances,
-                 workflow_versions,
-                 workflow_schedules,
-                 worker_status
-        RESTART IDENTITY CASCADE
-        "#,
-    )
-    .execute(pool)
-    .await
-    .expect("truncate postgres tables");
-}
diff --git a/crates/waymark/src/backends/postgres/webapp.rs b/crates/waymark/src/backends/postgres/webapp.rs
deleted file mode 100644
index 8f6b932f..00000000
--- a/crates/waymark/src/backends/postgres/webapp.rs
+++ /dev/null
@@ -1,2324 +0,0 @@
-use std::collections::HashMap;
-
-use chrono::{DateTime, Utc};
-use prost::Message;
-use serde_json::Value;
-use sqlx::{Postgres, QueryBuilder, Row};
-use tonic::async_trait;
-use uuid::Uuid;
-
-use super::PostgresBackend;
-use crate::backends::base::{BackendError, BackendResult, GraphUpdate, WebappBackend};
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus};
-use crate::waymark_core::runner::{RunnerState, ValueExpr, format_value, replay_action_kwargs};
-use crate::webapp::{
-    ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView, InstanceDetail, InstanceStatus,
-    InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, ScheduleSummary, TimelineEntry,
-    WorkerActionRow, WorkerAggregateStats, WorkerStatus,
-};
-use waymark_dag::{DAGNode, EdgeType, convert_to_dag};
-
-const INSTANCE_STATUS_FALLBACK_SQL: &str = r#"
-CASE
-    WHEN ri.error IS NOT NULL THEN 'failed'
-    WHEN ri.result IS NOT NULL THEN 'completed'
-    WHEN ri.state IS NOT NULL THEN 'running'
-    ELSE 'queued'
-END
-"#;
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-enum InstanceSearchToken {
-    Term(String),
-    And,
-    Or,
-    LParen,
-    RParen,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-enum InstanceSearchExpr {
-    Term(String),
-    And(Box<InstanceSearchExpr>, Box<InstanceSearchExpr>),
-    Or(Box<InstanceSearchExpr>, Box<InstanceSearchExpr>),
-}
-
-struct InstanceSearchParser {
-    tokens: Vec<InstanceSearchToken>,
-    position: usize,
-}
-
-impl InstanceSearchParser {
-    fn new(tokens: Vec<InstanceSearchToken>) -> Self {
-        Self {
-            tokens,
-            position: 0,
-        }
-    }
-
-    fn parse(mut self) -> Option<InstanceSearchExpr> {
-        let expr = self.parse_or()?;
-        if self.position == self.tokens.len() {
-            Some(expr)
-        } else {
-            None
-        }
-    }
-
-    fn parse_or(&mut self) -> Option<InstanceSearchExpr> {
-        let mut expr = self.parse_and()?;
-        while self.consume_or() {
-            let rhs = self.parse_and()?;
-            expr = InstanceSearchExpr::Or(Box::new(expr), Box::new(rhs));
-        }
-        Some(expr)
-    }
-
-    fn parse_and(&mut self) -> Option<InstanceSearchExpr> {
-        let mut expr = self.parse_primary()?;
-        loop {
-            if self.consume_and() || self.peek_is_primary_start() {
-                let rhs = self.parse_primary()?;
-                expr = InstanceSearchExpr::And(Box::new(expr), Box::new(rhs));
-                continue;
-            }
-            break;
-        }
-        Some(expr)
-    }
-
-    fn parse_primary(&mut self) -> Option<InstanceSearchExpr> {
-        match self.peek()? {
-            InstanceSearchToken::Term(term) => {
-                let term = term.clone();
-                self.position += 1;
-                Some(InstanceSearchExpr::Term(term))
-            }
-            InstanceSearchToken::LParen => {
-                self.position += 1;
-                let expr = self.parse_or()?;
-                if !self.consume_rparen() {
-                    return None;
-                }
-                Some(expr)
-            }
-            InstanceSearchToken::And | InstanceSearchToken::Or | InstanceSearchToken::RParen => {
-                None
-            }
-        }
-    }
-
-    fn consume_and(&mut self) -> bool {
-        if matches!(self.peek(), Some(InstanceSearchToken::And)) {
-            self.position += 1;
-            true
-        } else {
-            false
-        }
-    }
-
-    fn consume_or(&mut self) -> bool {
-        if matches!(self.peek(), Some(InstanceSearchToken::Or)) {
-            self.position += 1;
-            true
-        } else {
-            false
-        }
-    }
-
-    fn consume_rparen(&mut self) -> bool {
-        if matches!(self.peek(), Some(InstanceSearchToken::RParen)) {
-            self.position += 1;
-            true
-        } else {
-            false
-        }
-    }
-
-    fn peek_is_primary_start(&self) -> bool {
-        matches!(
-            self.peek(),
-            Some(InstanceSearchToken::Term(_)) | Some(InstanceSearchToken::LParen)
-        )
-    }
-
-    fn peek(&self) -> Option<&InstanceSearchToken> {
-        self.tokens.get(self.position)
-    }
-}
-
-fn tokenize_instance_search(search: &str) -> Vec<InstanceSearchToken> {
-    let mut chars = search.chars().peekable();
-    let mut tokens = Vec::new();
-
-    while let Some(ch) = chars.peek().copied() {
-        if ch.is_whitespace() {
-            chars.next();
-            continue;
-        }
-        if ch == '(' {
-            chars.next();
-            tokens.push(InstanceSearchToken::LParen);
-            continue;
-        }
-        if ch == ')' {
-            chars.next();
-            tokens.push(InstanceSearchToken::RParen);
-            continue;
-        }
-        if ch == '"' {
-            chars.next();
-            let mut quoted = String::new();
-            for next in chars.by_ref() {
-                if next == '"' {
-                    break;
-                }
-                quoted.push(next);
-            }
-            if !quoted.is_empty() {
-                tokens.push(InstanceSearchToken::Term(quoted));
-            }
-            continue;
-        }
-
-        let mut term = String::new();
-        while let Some(next) = chars.peek().copied() {
-            if next.is_whitespace() || next == '(' || next == ')' {
-                break;
-            }
-            term.push(next);
-            chars.next();
-        }
-        if term.is_empty() {
-            continue;
-        }
-
-        match term.to_ascii_uppercase().as_str() {
-            "AND" => tokens.push(InstanceSearchToken::And),
-            "OR" => tokens.push(InstanceSearchToken::Or),
-            _ => tokens.push(InstanceSearchToken::Term(term)),
-        }
-    }
-
-    tokens
-}
-
-fn parse_instance_search_expr(search: &str) -> Option<InstanceSearchExpr> {
-    let trimmed = search.trim();
-    if trimmed.is_empty() {
-        return None;
-    }
-
-    let tokens = tokenize_instance_search(trimmed);
-    if tokens.is_empty() {
-        return None;
-    }
-
-    InstanceSearchParser::new(tokens)
-        .parse()
-        .or_else(|| Some(InstanceSearchExpr::Term(trimmed.to_string())))
-}
-
-fn push_instance_search_expr_sql(
-    builder: &mut QueryBuilder<'_, Postgres>,
-    expr: &InstanceSearchExpr,
-) {
-    match expr {
-        InstanceSearchExpr::Term(term) => {
-            let pattern = format!("%{term}%");
-            builder.push("(");
-            builder.push("COALESCE(ri.workflow_name, wv.workflow_name, '') ILIKE ");
-            builder.push_bind(pattern.clone());
-            builder.push(" OR COALESCE(ri.current_status, ");
-            builder.push(INSTANCE_STATUS_FALLBACK_SQL);
-            builder.push(", '') ILIKE ");
-            builder.push_bind(pattern);
-            builder.push(")");
-        }
-        InstanceSearchExpr::And(left, right) => {
-            builder.push("(");
-            push_instance_search_expr_sql(builder, left);
-            builder.push(" AND ");
-            push_instance_search_expr_sql(builder, right);
-            builder.push(")");
-        }
-        InstanceSearchExpr::Or(left, right) => {
-            builder.push("(");
-            push_instance_search_expr_sql(builder, left);
-            builder.push(" OR ");
-            push_instance_search_expr_sql(builder, right);
-            builder.push(")");
-        }
-    }
-}
-
-fn parse_instance_status(status: &str) -> Option<InstanceStatus> {
-    match status {
-        "queued" => Some(InstanceStatus::Queued),
-        "running" => Some(InstanceStatus::Running),
-        "completed" => Some(InstanceStatus::Completed),
-        "failed" => Some(InstanceStatus::Failed),
-        _ => None,
-    }
-}
-
-#[async_trait]
-impl WebappBackend for PostgresBackend {
-    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64> {
-        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            r#"
-            SELECT COUNT(*)::BIGINT
-            FROM runner_instances ri
-            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
-            "#,
-        );
-
-        if let Some(search_expr) = search.and_then(parse_instance_search_expr) {
-            builder.push(" WHERE ");
-            push_instance_search_expr_sql(&mut builder, &search_expr);
-        }
-
-        let count: i64 = builder.build_query_scalar().fetch_one(&self.pool).await?;
-        Ok(count)
-    }
-
-    async fn list_instances(
-        &self,
-        search: Option<&str>,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<InstanceSummary>> {
-        let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
-            r#"
-            SELECT
-                ri.instance_id,
-                ri.entry_node,
-                ri.created_at,
-                ri.state,
-                ri.result,
-                ri.error,
-                COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name,
-                COALESCE(ri.current_status,
-                    CASE
-                        WHEN ri.error IS NOT NULL THEN 'failed'
-                        WHEN ri.result IS NOT NULL THEN 'completed'
-                        WHEN ri.state IS NOT NULL THEN 'running'
-                        ELSE 'queued'
-                    END
-                ) AS current_status
-            FROM runner_instances ri
-            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
-            "#,
-        );
-        if let Some(search_expr) = search.and_then(parse_instance_search_expr) {
-            builder.push(" WHERE ");
-            push_instance_search_expr_sql(&mut builder, &search_expr);
-        }
-        builder.push(" ORDER BY ri.created_at DESC, ri.instance_id DESC LIMIT ");
-        builder.push_bind(limit);
-        builder.push(" OFFSET ");
-        builder.push_bind(offset);
-        let rows = builder.build().fetch_all(&self.pool).await?;
-
-        let mut instances = Vec::new();
-        for row in rows {
-            let instance_id: Uuid = row.get("instance_id");
-            let entry_node: Uuid = row.get("entry_node");
-            let created_at: DateTime<Utc> = row.get("created_at");
-            let state_bytes: Option<Vec<u8>> = row.get("state");
-            let result_bytes: Option<Vec<u8>> = row.get("result");
-            let error_bytes: Option<Vec<u8>> = row.get("error");
-            let workflow_name: Option<String> = row.get("workflow_name");
-            let current_status: Option<String> = row.get("current_status");
-
-            let status = current_status
-                .as_deref()
-                .and_then(parse_instance_status)
-                .unwrap_or_else(|| determine_status(&state_bytes, &result_bytes, &error_bytes));
-            let input_preview = extract_input_preview(&state_bytes);
-
-            instances.push(InstanceSummary {
-                id: instance_id,
-                entry_node,
-                created_at,
-                status,
-                workflow_name,
-                input_preview,
-            });
-        }
-
-        Ok(instances)
-    }
-
-    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
-        let row = sqlx::query(
-            r#"
-            SELECT
-                ri.instance_id,
-                ri.entry_node,
-                ri.created_at,
-                ri.state,
-                ri.result,
-                ri.error,
-                COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name,
-                COALESCE(ri.current_status,
-                    CASE
-                        WHEN ri.error IS NOT NULL THEN 'failed'
-                        WHEN ri.result IS NOT NULL THEN 'completed'
-                        WHEN ri.state IS NOT NULL THEN 'running'
-                        ELSE 'queued'
-                    END
-                ) AS current_status
-            FROM runner_instances ri
-            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
-            WHERE ri.instance_id = $1
-            "#,
-        )
-        .bind(instance_id)
-        .fetch_optional(&self.pool)
-        .await?
-        .ok_or_else(|| BackendError::Message(format!("instance not found: {}", instance_id)))?;
-
-        let instance_id: Uuid = row.get("instance_id");
-        let entry_node: Uuid = row.get("entry_node");
-        let created_at: DateTime<Utc> = row.get("created_at");
-        let state_bytes: Option<Vec<u8>> = row.get("state");
-        let result_bytes: Option<Vec<u8>> = row.get("result");
-        let error_bytes: Option<Vec<u8>> = row.get("error");
-        let workflow_name: Option<String> = row.get("workflow_name");
-        let current_status: Option<String> = row.get("current_status");
-
-        let status = current_status
-            .as_deref()
-            .and_then(parse_instance_status)
-            .unwrap_or_else(|| determine_status(&state_bytes, &result_bytes, &error_bytes));
-        let input_payload = format_input_payload(&state_bytes);
-        let result_payload = format_instance_result_payload(status, &result_bytes, &error_bytes);
-        let error_payload = format_error(&error_bytes);
-
-        Ok(InstanceDetail {
-            id: instance_id,
-            entry_node,
-            created_at,
-            status,
-            workflow_name,
-            input_payload,
-            result_payload,
-            error_payload,
-        })
-    }
-
-    async fn get_execution_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        let row = sqlx::query(
-            r#"
-            SELECT state FROM runner_instances WHERE instance_id = $1
-            "#,
-        )
-        .bind(instance_id)
-        .fetch_optional(&self.pool)
-        .await?;
-
-        let Some(row) = row else {
-            return Ok(None);
-        };
-
-        let state_bytes: Option<Vec<u8>> = row.get("state");
-        let Some(state_bytes) = state_bytes else {
-            return Ok(None);
-        };
-
-        let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
-            .map_err(|e| BackendError::Message(format!("failed to decode state: {}", e)))?;
-
-        let nodes: Vec<ExecutionNodeView> = graph_update
-            .nodes
-            .values()
-            .map(|node| ExecutionNodeView {
-                id: node.node_id.to_string(),
-                node_type: node.node_type.clone(),
-                label: node.label.clone(),
-                status: format_node_status(&node.status),
-                action_name: node.action.as_ref().map(|a| a.action_name.clone()),
-                module_name: node.action.as_ref().and_then(|a| a.module_name.clone()),
-            })
-            .collect();
-
-        let edges: Vec<ExecutionEdgeView> = graph_update
-            .edges
-            .iter()
-            .map(|edge| ExecutionEdgeView {
-                source: edge.source.to_string(),
-                target: edge.target.to_string(),
-                edge_type: format!("{:?}", edge.edge_type),
-            })
-            .collect();
-
-        Ok(Some(ExecutionGraphView { nodes, edges }))
-    }
-
-    async fn get_workflow_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        let row = sqlx::query(
-            r#"
-            SELECT ri.state, wv.program_proto
-            FROM runner_instances ri
-            JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
-            WHERE ri.instance_id = $1
-            "#,
-        )
-        .bind(instance_id)
-        .fetch_optional(&self.pool)
-        .await?;
-
-        let Some(row) = row else {
-            return Ok(None);
-        };
-
-        let program_proto: Vec<u8> = row.get("program_proto");
-        let program = ir::Program::decode(&program_proto[..])
-            .map_err(|err| BackendError::Message(format!("failed to decode workflow IR: {err}")))?;
-        let dag = convert_to_dag(&program).map_err(|err| {
-            BackendError::Message(format!("failed to convert workflow DAG: {err}"))
-        })?;
-
-        let mut template_statuses: HashMap<String, NodeStatus> = HashMap::new();
-        let state_bytes: Option<Vec<u8>> = row.get("state");
-        if let Some(state_bytes) = state_bytes {
-            let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
-                .map_err(|err| BackendError::Message(format!("failed to decode state: {err}")))?;
-
-            for node in graph_update.nodes.values() {
-                let Some(template_id) = node.template_id.as_ref() else {
-                    continue;
-                };
-                template_statuses
-                    .entry(template_id.clone())
-                    .and_modify(|existing| {
-                        *existing = merge_template_status(existing, &node.status);
-                    })
-                    .or_insert_with(|| node.status.clone());
-            }
-        }
-
-        let mut node_ids: Vec<String> = dag.nodes.keys().cloned().collect();
-        node_ids.sort();
-        let nodes: Vec<ExecutionNodeView> = node_ids
-            .into_iter()
-            .filter_map(|node_id| {
-                let node = dag.nodes.get(&node_id)?;
-                let status = template_statuses
-                    .get(&node_id)
-                    .map(format_node_status)
-                    .unwrap_or_else(|| "pending".to_string());
-                let (action_name, module_name) = match node {
-                    DAGNode::ActionCall(action) => {
-                        (Some(action.action_name.clone()), action.module_name.clone())
-                    }
-                    _ => (None, None),
-                };
-
-                Some(ExecutionNodeView {
-                    id: node_id,
-                    node_type: node.node_type().to_string(),
-                    label: node.label(),
-                    status,
-                    action_name,
-                    module_name,
-                })
-            })
-            .collect();
-
-        let edges: Vec<ExecutionEdgeView> = dag
-            .edges
-            .iter()
-            .filter(|edge| edge.edge_type == EdgeType::StateMachine)
-            .map(|edge| ExecutionEdgeView {
-                source: edge.source.clone(),
-                target: edge.target.clone(),
-                edge_type: if edge.is_loop_back {
-                    "state_machine_loop_back".to_string()
-                } else {
-                    "state_machine".to_string()
-                },
-            })
-            .collect();
-
-        Ok(Some(ExecutionGraphView { nodes, edges }))
-    }
-
-    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
-        let row = sqlx::query(
-            r#"
-            SELECT state
-            FROM runner_instances
-            WHERE instance_id = $1
-            "#,
-        )
-        .bind(instance_id)
-        .fetch_optional(&self.pool)
-        .await?;
-
-        let Some(row) = row else {
-            return Ok(Vec::new());
-        };
-        let state_bytes: Option<Vec<u8>> = row.get("state");
-        let Some(state_bytes) = state_bytes else {
-            return Ok(Vec::new());
-        };
-        let graph_update: GraphUpdate = rmp_serde::from_slice(&state_bytes)
-            .map_err(|e| BackendError::Message(format!("failed to decode state: {}", e)))?;
-
-        let runner_state = RunnerState::new(
-            None,
-            Some(graph_update.nodes.clone()),
-            Some(graph_update.edges),
-            false,
-        );
-        let action_nodes: HashMap<Uuid, ExecutionNode> = graph_update
-            .nodes
-            .into_iter()
-            .filter(|(_, node)| node.is_action_call())
-            .collect();
-        if action_nodes.is_empty() {
-            return Ok(Vec::new());
-        }
-        let execution_ids: Vec<Uuid> = action_nodes.keys().copied().collect();
-
-        let rows = sqlx::query(
-            r#"
-            SELECT created_at, execution_id, attempt, status, started_at, completed_at, duration_ms, result
-            FROM runner_actions_done
-            WHERE execution_id = ANY($1)
-            ORDER BY created_at ASC, attempt ASC
-            "#,
-        )
-        .bind(&execution_ids)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut decoded_rows = Vec::with_capacity(rows.len());
-        for row in rows {
-            let created_at: DateTime<Utc> = row.get("created_at");
-            let execution_id: Uuid = row.get("execution_id");
-            let attempt: i32 = row.get("attempt");
-            let status: Option<String> = row.get("status");
-            let started_at: Option<DateTime<Utc>> = row.get("started_at");
-            let completed_at: Option<DateTime<Utc>> = row.get("completed_at");
-            let duration_ms: Option<i64> = row.get("duration_ms");
-            let result_bytes: Option<Vec<u8>> = row.get("result");
-            let result = result_bytes
-                .as_deref()
-                .map(decode_msgpack_json)
-                .transpose()?;
-            decoded_rows.push(DecodedActionResultRow {
-                created_at,
-                execution_id,
-                attempt,
-                status,
-                started_at,
-                completed_at,
-                duration_ms,
-                result,
-            });
-        }
-
-        // Replay needs the current known action outputs by execution id.
-        let mut action_results = HashMap::new();
-        for row in &decoded_rows {
-            if let Some(result) = &row.result {
-                action_results.insert(row.execution_id, result.clone());
-            }
-        }
-
-        let mut request_preview_cache: HashMap<Uuid, String> = HashMap::new();
-        let mut entries = Vec::with_capacity(decoded_rows.len());
-        for row in decoded_rows {
-            let node = action_nodes.get(&row.execution_id);
-            let action_name = node
-                .and_then(|n| n.action.as_ref().map(|a| a.action_name.clone()))
-                .unwrap_or_default();
-            let module_name =
-                node.and_then(|n| n.action.as_ref().and_then(|a| a.module_name.clone()));
-
-            let request_preview =
-                if let Some(existing) = request_preview_cache.get(&row.execution_id) {
-                    existing.clone()
-                } else {
-                    let rendered = render_action_request_preview(
-                        node.and_then(|n| n.action.as_ref()),
-                        &runner_state,
-                        &action_results,
-                        row.execution_id,
-                    );
-                    request_preview_cache.insert(row.execution_id, rendered.clone());
-                    rendered
-                };
-
-            let (response_preview, error) = match &row.result {
-                Some(value) => format_action_result(value),
-                None => ("(no result)".to_string(), None),
-            };
-            let status = row.status.clone().unwrap_or_else(|| {
-                if error.is_some() {
-                    "failed".to_string()
-                } else {
-                    "completed".to_string()
-                }
-            });
-            let (dispatched_at, completed_at, duration_ms) = if row.started_at.is_some()
-                || row.completed_at.is_some()
-                || row.duration_ms.is_some()
-            {
-                (
-                    Some(row.started_at.unwrap_or(row.created_at).to_rfc3339()),
-                    Some(row.completed_at.unwrap_or(row.created_at).to_rfc3339()),
-                    row.duration_ms,
-                )
-            } else {
-                action_timing_from_state(node, row.attempt, row.created_at)
-            };
-
-            entries.push(TimelineEntry {
-                action_id: row.execution_id.to_string(),
-                action_name,
-                module_name,
-                status,
-                attempt_number: row.attempt,
-                dispatched_at,
-                completed_at,
-                duration_ms,
-                request_preview,
-                response_preview,
-                error,
-            });
-        }
-
-        Ok(entries)
-    }
-
-    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
-        let rows = sqlx::query(
-            r#"
-            SELECT DISTINCT COALESCE(ri.workflow_name, wv.workflow_name) AS workflow_name
-            FROM runner_instances ri
-            LEFT JOIN workflow_versions wv ON wv.id = ri.workflow_version_id
-            WHERE COALESCE(ri.workflow_name, wv.workflow_name) IS NOT NULL
-            ORDER BY workflow_name
-            "#,
-        )
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut workflows = Vec::with_capacity(rows.len());
-        for row in rows {
-            let workflow_name: String = row.get("workflow_name");
-            workflows.push(workflow_name);
-        }
-        Ok(workflows)
-    }
-
-    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec![
-            "queued".to_string(),
-            "running".to_string(),
-            "completed".to_string(),
-            "failed".to_string(),
-        ])
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let count = sqlx::query_scalar::<_, i64>(
-            "SELECT COUNT(*) FROM workflow_schedules WHERE status != 'deleted'",
-        )
-        .fetch_one(&self.pool)
-        .await?;
-
-        Ok(count)
-    }
-
-    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
-        let rows = sqlx::query(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   status, next_run_at, last_run_at, created_at
-            FROM workflow_schedules
-            WHERE status != 'deleted'
-            ORDER BY workflow_name, schedule_name
-            LIMIT $1 OFFSET $2
-            "#,
-        )
-        .bind(limit)
-        .bind(offset)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut schedules = Vec::new();
-        for row in rows {
-            schedules.push(ScheduleSummary {
-                id: row.get::<Uuid, _>("id").to_string(),
-                workflow_name: row.get("workflow_name"),
-                schedule_name: row.get("schedule_name"),
-                schedule_type: row.get("schedule_type"),
-                cron_expression: row.get("cron_expression"),
-                interval_seconds: row.get("interval_seconds"),
-                status: row.get("status"),
-                next_run_at: row
-                    .get::<Option<DateTime<Utc>>, _>("next_run_at")
-                    .map(|dt| dt.to_rfc3339()),
-                last_run_at: row
-                    .get::<Option<DateTime<Utc>>, _>("last_run_at")
-                    .map(|dt| dt.to_rfc3339()),
-                created_at: row.get::<DateTime<Utc>, _>("created_at").to_rfc3339(),
-            });
-        }
-
-        Ok(schedules)
-    }
-
-    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
-        let row = sqlx::query(
-            r#"
-            SELECT id, workflow_name, schedule_name, schedule_type, cron_expression, interval_seconds,
-                   jitter_seconds, input_payload, status, next_run_at, last_run_at, last_instance_id,
-                   created_at, updated_at, priority, allow_duplicate
-            FROM workflow_schedules
-            WHERE id = $1
-            "#,
-        )
-        .bind(schedule_id)
-        .fetch_optional(&self.pool)
-        .await?
-        .ok_or_else(|| BackendError::Message(format!("schedule not found: {}", schedule_id)))?;
-
-        let input_payload: Option<String> = row
-            .get::<Option<Vec<u8>>, _>("input_payload")
-            .and_then(|bytes| {
-                rmp_serde::from_slice::<serde_json::Value>(&bytes)
-                    .ok()
-                    .map(|v| serde_json::to_string_pretty(&v).unwrap_or_default())
-            });
-
-        Ok(ScheduleDetail {
-            id: row.get::<Uuid, _>("id").to_string(),
-            workflow_name: row.get("workflow_name"),
-            schedule_name: row.get("schedule_name"),
-            schedule_type: row.get("schedule_type"),
-            cron_expression: row.get("cron_expression"),
-            interval_seconds: row.get("interval_seconds"),
-            jitter_seconds: row.get("jitter_seconds"),
-            status: row.get("status"),
-            next_run_at: row
-                .get::<Option<DateTime<Utc>>, _>("next_run_at")
-                .map(|dt| dt.to_rfc3339()),
-            last_run_at: row
-                .get::<Option<DateTime<Utc>>, _>("last_run_at")
-                .map(|dt| dt.to_rfc3339()),
-            last_instance_id: row
-                .get::<Option<Uuid>, _>("last_instance_id")
-                .map(|id| id.to_string()),
-            created_at: row.get::<DateTime<Utc>, _>("created_at").to_rfc3339(),
-            updated_at: row.get::<DateTime<Utc>, _>("updated_at").to_rfc3339(),
-            priority: row.get("priority"),
-            allow_duplicate: row.get("allow_duplicate"),
-            input_payload,
-        })
-    }
-
-    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64> {
-        let count = sqlx::query_scalar::<_, i64>(
-            r#"
-            SELECT COUNT(*)
-            FROM runner_instances
-            WHERE schedule_id = $1
-            "#,
-        )
-        .bind(schedule_id)
-        .fetch_one(&self.pool)
-        .await?;
-        Ok(count)
-    }
-
-    async fn list_schedule_invocations(
-        &self,
-        schedule_id: Uuid,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
-        let rows = sqlx::query(
-            r#"
-            SELECT instance_id, created_at, state, result, error
-            FROM runner_instances
-            WHERE schedule_id = $1
-            ORDER BY created_at DESC, instance_id DESC
-            LIMIT $2 OFFSET $3
-            "#,
-        )
-        .bind(schedule_id)
-        .bind(limit)
-        .bind(offset)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut invocations = Vec::with_capacity(rows.len());
-        for row in rows {
-            let state_bytes: Option<Vec<u8>> = row.get("state");
-            let result_bytes: Option<Vec<u8>> = row.get("result");
-            let error_bytes: Option<Vec<u8>> = row.get("error");
-
-            invocations.push(ScheduleInvocationSummary {
-                id: row.get("instance_id"),
-                created_at: row.get("created_at"),
-                status: determine_status(&state_bytes, &result_bytes, &error_bytes),
-            });
-        }
-
-        Ok(invocations)
-    }
-
-    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
-        let result = sqlx::query(
-            r#"
-            UPDATE workflow_schedules
-            SET status = $2, updated_at = NOW()
-            WHERE id = $1
-            "#,
-        )
-        .bind(schedule_id)
-        .bind(status)
-        .execute(&self.pool)
-        .await?;
-
-        Ok(result.rows_affected() > 0)
-    }
-
-    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["active".to_string(), "paused".to_string()])
-    }
-
-    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["cron".to_string(), "interval".to_string()])
-    }
-
-    async fn get_worker_action_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<Vec<WorkerActionRow>> {
-        let rows = sqlx::query(
-            r#"
-            SELECT
-                pool_id,
-                COUNT(DISTINCT worker_id) as active_workers,
-                SUM(throughput_per_min) / 60.0 as actions_per_sec,
-                SUM(throughput_per_min) as throughput_per_min,
-                COALESCE(SUM(total_completed), 0)::BIGINT as total_completed,
-                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_dequeue_ms) as median_dequeue_ms,
-                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_handling_ms) as median_handling_ms,
-                MAX(last_action_at) as last_action_at,
-                MAX(updated_at) as updated_at
-            FROM worker_status
-            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
-            GROUP BY pool_id
-            ORDER BY actions_per_sec DESC
-            "#,
-        )
-        .bind(window_minutes)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut stats = Vec::new();
-        for row in rows {
-            stats.push(WorkerActionRow {
-                pool_id: row.get::<Uuid, _>("pool_id").to_string(),
-                active_workers: row.get::<i64, _>("active_workers"),
-                actions_per_sec: format!("{:.1}", row.get::<f64, _>("actions_per_sec")),
-                throughput_per_min: row.get::<f64, _>("throughput_per_min") as i64,
-                total_completed: row.get::<i64, _>("total_completed"),
-                median_dequeue_ms: row
-                    .get::<Option<f64>, _>("median_dequeue_ms")
-                    .map(|v| v as i64),
-                median_handling_ms: row
-                    .get::<Option<f64>, _>("median_handling_ms")
-                    .map(|v| v as i64),
-                last_action_at: row
-                    .get::<Option<DateTime<Utc>>, _>("last_action_at")
-                    .map(|dt| dt.to_rfc3339()),
-                updated_at: row.get::<DateTime<Utc>, _>("updated_at").to_rfc3339(),
-            });
-        }
-
-        Ok(stats)
-    }
-
-    async fn get_worker_aggregate_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<WorkerAggregateStats> {
-        let row = sqlx::query(
-            r#"
-            SELECT
-                COUNT(DISTINCT worker_id) as active_worker_count,
-                COALESCE(SUM(throughput_per_min) / 60.0, 0) as actions_per_sec,
-                COALESCE(SUM(total_in_flight), 0)::BIGINT as total_in_flight,
-                COALESCE(SUM(dispatch_queue_size), 0)::BIGINT as total_queue_depth
-            FROM worker_status
-            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
-            "#,
-        )
-        .bind(window_minutes)
-        .fetch_one(&self.pool)
-        .await?;
-
-        Ok(WorkerAggregateStats {
-            active_worker_count: row.get::<i64, _>("active_worker_count"),
-            actions_per_sec: format!("{:.1}", row.get::<f64, _>("actions_per_sec")),
-            total_in_flight: row.get::<i64, _>("total_in_flight"),
-            total_queue_depth: row.get::<i64, _>("total_queue_depth"),
-        })
-    }
-
-    async fn worker_status_table_exists(&self) -> bool {
-        sqlx::query_scalar::<_, bool>(
-            r#"
-            SELECT EXISTS (
-                SELECT FROM information_schema.tables
-                WHERE table_name = 'worker_status'
-            )
-            "#,
-        )
-        .fetch_one(&self.pool)
-        .await
-        .unwrap_or(false)
-    }
-
-    async fn schedules_table_exists(&self) -> bool {
-        sqlx::query_scalar::<_, bool>(
-            r#"
-            SELECT EXISTS (
-                SELECT FROM information_schema.tables
-                WHERE table_name = 'workflow_schedules'
-            )
-            "#,
-        )
-        .fetch_one(&self.pool)
-        .await
-        .unwrap_or(false)
-    }
-
-    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
-        let rows = sqlx::query(
-            r#"
-            SELECT
-                pool_id,
-                MAX(active_workers) as active_workers,
-                COALESCE(SUM(throughput_per_min), 0) as throughput_per_min,
-                COALESCE(SUM(throughput_per_min) / 60.0, 0) as actions_per_sec,
-                COALESCE(SUM(total_completed), 0)::BIGINT as total_completed,
-                MAX(last_action_at) as last_action_at,
-                MAX(updated_at) as updated_at,
-                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_dequeue_ms) as median_dequeue_ms,
-                PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY median_handling_ms) as median_handling_ms,
-                MAX(dispatch_queue_size) as dispatch_queue_size,
-                MAX(total_in_flight) as total_in_flight,
-                MAX(median_instance_duration_secs) as median_instance_duration_secs,
-                MAX(active_instance_count) as active_instance_count,
-                (
-                    SELECT COUNT(*)::BIGINT
-                    FROM runner_instances ri
-                    WHERE ri.result IS NOT NULL
-                      AND ri.error IS NULL
-                ) as total_instances_completed,
-                MAX(instances_per_sec) as instances_per_sec,
-                MAX(instances_per_min) as instances_per_min,
-                (
-                    SELECT time_series FROM worker_status ws2
-                    WHERE ws2.pool_id = worker_status.pool_id
-                      AND ws2.time_series IS NOT NULL
-                    ORDER BY ws2.updated_at DESC LIMIT 1
-                ) as time_series
-            FROM worker_status
-            WHERE updated_at > NOW() - INTERVAL '1 minute' * $1
-            GROUP BY pool_id
-            ORDER BY actions_per_sec DESC
-            "#,
-        )
-        .bind(window_minutes)
-        .fetch_all(&self.pool)
-        .await?;
-
-        let mut statuses = Vec::new();
-        for row in rows {
-            statuses.push(WorkerStatus {
-                pool_id: row.get::<Uuid, _>("pool_id"),
-                active_workers: row.get::<Option<i32>, _>("active_workers").unwrap_or(0),
-                throughput_per_min: row.get::<f64, _>("throughput_per_min"),
-                actions_per_sec: row.get::<f64, _>("actions_per_sec"),
-                total_completed: row.get::<i64, _>("total_completed"),
-                last_action_at: row.get::<Option<DateTime<Utc>>, _>("last_action_at"),
-                updated_at: row.get::<DateTime<Utc>, _>("updated_at"),
-                median_dequeue_ms: row
-                    .get::<Option<f64>, _>("median_dequeue_ms")
-                    .map(|v| v as i64),
-                median_handling_ms: row
-                    .get::<Option<f64>, _>("median_handling_ms")
-                    .map(|v| v as i64),
-                dispatch_queue_size: row.get::<Option<i64>, _>("dispatch_queue_size"),
-                total_in_flight: row.get::<Option<i64>, _>("total_in_flight"),
-                median_instance_duration_secs: row
-                    .get::<Option<f64>, _>("median_instance_duration_secs"),
-                active_instance_count: row
-                    .get::<Option<i32>, _>("active_instance_count")
-                    .unwrap_or(0),
-                total_instances_completed: row
-                    .get::<Option<i64>, _>("total_instances_completed")
-                    .unwrap_or(0),
-                instances_per_sec: row
-                    .get::<Option<f64>, _>("instances_per_sec")
-                    .unwrap_or(0.0),
-                instances_per_min: row
-                    .get::<Option<f64>, _>("instances_per_min")
-                    .unwrap_or(0.0),
-                time_series: row.get::<Option<Vec<u8>>, _>("time_series"),
-            });
-        }
-
-        Ok(statuses)
-    }
-}
-
-struct DecodedActionResultRow {
-    created_at: DateTime<Utc>,
-    execution_id: Uuid,
-    attempt: i32,
-    status: Option<String>,
-    started_at: Option<DateTime<Utc>>,
-    completed_at: Option<DateTime<Utc>>,
-    duration_ms: Option<i64>,
-    result: Option<Value>,
-}
-
-fn decode_msgpack_json(bytes: &[u8]) -> BackendResult<Value> {
-    rmp_serde::from_slice::<Value>(bytes)
-        .map_err(|err| BackendError::Message(format!("failed to decode action result: {err}")))
-}
-
-fn render_action_request_preview(
-    action: Option<&ActionCallSpec>,
-    state: &RunnerState,
-    action_results: &HashMap<Uuid, Value>,
-    node_id: Uuid,
-) -> String {
-    let Some(action) = action else {
-        return "{}".to_string();
-    };
-
-    match replay_action_kwargs(state, action_results, node_id) {
-        Ok(kwargs) => {
-            let rendered_map: serde_json::Map<String, Value> = kwargs.into_iter().collect();
-            pretty_json(&Value::Object(rendered_map))
-        }
-        Err(_) => format_symbolic_kwargs(action),
-    }
-}
-
-fn format_symbolic_kwargs(action: &ActionCallSpec) -> String {
-    if action.kwargs.is_empty() {
-        return "{}".to_string();
-    }
-    let rendered_map: serde_json::Map<String, Value> = action
-        .kwargs
-        .iter()
-        .map(|(name, expr)| (name.clone(), Value::String(format_value(expr))))
-        .collect();
-    pretty_json(&Value::Object(rendered_map))
-}
-
-fn action_timing_from_state(
-    node: Option<&ExecutionNode>,
-    attempt: i32,
-    fallback_completed_at: DateTime<Utc>,
-) -> (Option<String>, Option<String>, Option<i64>) {
-    // Node timing fields represent the latest attempt for this execution id.
-    // For historical retries, fall back to row timestamps from actions_done.
-    let Some(node) = node else {
-        let at = fallback_completed_at.to_rfc3339();
-        return (Some(at.clone()), Some(at), None);
-    };
-    if node.action_attempt != attempt {
-        let at = fallback_completed_at.to_rfc3339();
-        return (Some(at.clone()), Some(at), None);
-    }
-
-    let dispatched_at = node
-        .started_at
-        .map(|value| value.to_rfc3339())
-        .unwrap_or_else(|| fallback_completed_at.to_rfc3339());
-    let completed_dt = node.completed_at.unwrap_or(fallback_completed_at);
-    let completed_at = completed_dt.to_rfc3339();
-    let duration_ms = node
-        .started_at
-        .map(|started_at| {
-            completed_dt
-                .signed_duration_since(started_at)
-                .num_milliseconds()
-        })
-        .filter(|duration| *duration >= 0);
-
-    (Some(dispatched_at), Some(completed_at), duration_ms)
-}
-
-fn format_action_result(value: &Value) -> (String, Option<String>) {
-    let preview = pretty_json(value);
-    let error = extract_action_error(value);
-    (preview, error)
-}
-
-fn extract_action_error(value: &Value) -> Option<String> {
-    let Value::Object(map) = value else {
-        return None;
-    };
-    let message = map.get("message").and_then(Value::as_str);
-    let is_exception = map.contains_key("type") && map.contains_key("message");
-    if is_exception {
-        return Some(message.unwrap_or("action failed").to_string());
-    }
-    map.get("error")
-        .and_then(Value::as_str)
-        .map(|msg| msg.to_string())
-}
-
-fn pretty_json(value: &Value) -> String {
-    serde_json::to_string_pretty(value).unwrap_or_else(|_| "{}".to_string())
-}
-
-fn determine_status(
-    state_bytes: &Option<Vec<u8>>,
-    result_bytes: &Option<Vec<u8>>,
-    error_bytes: &Option<Vec<u8>>,
-) -> InstanceStatus {
-    if error_bytes.is_some() {
-        return InstanceStatus::Failed;
-    }
-    if result_bytes
-        .as_deref()
-        .is_some_and(result_payload_is_error_wrapper)
-    {
-        return InstanceStatus::Failed;
-    }
-    if result_bytes.is_some() {
-        return InstanceStatus::Completed;
-    }
-    if state_bytes.is_some() {
-        return InstanceStatus::Running;
-    }
-    InstanceStatus::Queued
-}
-
-fn extract_input_preview(state_bytes: &Option<Vec<u8>>) -> String {
-    let Some(bytes) = state_bytes else {
-        return "{}".to_string();
-    };
-
-    match rmp_serde::from_slice::<GraphUpdate>(bytes) {
-        Ok(graph) => {
-            let count = graph.nodes.len();
-            format!("{{nodes: {count}}}")
-        }
-        Err(_) => "{}".to_string(),
-    }
-}
-
-fn format_input_payload(state_bytes: &Option<Vec<u8>>) -> String {
-    let Some(bytes) = state_bytes else {
-        return "{}".to_string();
-    };
-
-    match rmp_serde::from_slice::<GraphUpdate>(bytes) {
-        Ok(graph) => format_extracted_inputs(&graph.nodes),
-        Err(_) => "{}".to_string(),
-    }
-}
-
-fn format_extracted_inputs(nodes: &HashMap<Uuid, ExecutionNode>) -> String {
-    let mut input_pairs: Vec<(String, Value)> = nodes
-        .values()
-        .filter_map(extract_input_assignment)
-        .collect();
-    if input_pairs.is_empty() {
-        return "{}".to_string();
-    }
-    input_pairs.sort_by(|(left, _), (right, _)| left.cmp(right));
-    let input_map: serde_json::Map<String, Value> = input_pairs.into_iter().collect();
-    pretty_json(&Value::Object(input_map))
-}
-
-fn extract_input_assignment(node: &ExecutionNode) -> Option<(String, Value)> {
-    let (name, raw_value) = parse_input_assignment_label(&node.label)?;
-
-    if let Ok(value) = serde_json::from_str::<Value>(raw_value) {
-        return Some((name.to_string(), value));
-    }
-
-    if let Some(value_expr) = node.assignments.get(name) {
-        return Some((name.to_string(), value_expr_to_json(value_expr)));
-    }
-
-    Some((name.to_string(), Value::String(raw_value.to_string())))
-}
-
-fn parse_input_assignment_label(label: &str) -> Option<(&str, &str)> {
-    let payload = label.strip_prefix("input ")?;
-    payload.split_once(" = ")
-}
-
-fn value_expr_to_json(value_expr: &ValueExpr) -> Value {
-    match value_expr {
-        ValueExpr::Literal(value) => value.value.clone(),
-        ValueExpr::List(value) => {
-            Value::Array(value.elements.iter().map(value_expr_to_json).collect())
-        }
-        ValueExpr::Dict(value) => {
-            let mut map = serde_json::Map::new();
-            for entry in &value.entries {
-                let key = match value_expr_to_json(&entry.key) {
-                    Value::String(key) => key,
-                    other => other.to_string(),
-                };
-                map.insert(key, value_expr_to_json(&entry.value));
-            }
-            Value::Object(map)
-        }
-        _ => Value::String(format_value(value_expr)),
-    }
-}
-
-fn format_instance_result_payload(
-    status: InstanceStatus,
-    result_bytes: &Option<Vec<u8>>,
-    error_bytes: &Option<Vec<u8>>,
-) -> String {
-    match status {
-        InstanceStatus::Failed => {
-            let payload = error_bytes.as_deref().or(result_bytes.as_deref());
-            let Some(bytes) = payload else {
-                return "(failed)".to_string();
-            };
-            match rmp_serde::from_slice::<serde_json::Value>(bytes) {
-                Ok(value) => pretty_json(&normalize_error_payload(value)),
-                Err(_) => "(decode error)".to_string(),
-            }
-        }
-        InstanceStatus::Completed => {
-            let Some(bytes) = result_bytes else {
-                return "(pending)".to_string();
-            };
-            match rmp_serde::from_slice::<serde_json::Value>(bytes) {
-                Ok(value) => pretty_json(&normalize_success_payload(value)),
-                Err(_) => "(decode error)".to_string(),
-            }
-        }
-        InstanceStatus::Running | InstanceStatus::Queued => "(pending)".to_string(),
-    }
-}
-
-fn normalize_success_payload(value: Value) -> Value {
-    let Value::Object(mut map) = value else {
-        return value;
-    };
-    map.remove("result").unwrap_or(Value::Object(map))
-}
-
-fn normalize_error_payload(value: Value) -> Value {
-    let Value::Object(mut map) = value else {
-        return value;
-    };
-
-    if let Some(error) = map.remove("error") {
-        return normalize_error_payload(error);
-    }
-    if let Some(exception) = map.remove("__exception__") {
-        return normalize_error_payload(exception);
-    }
-    if let Some(exception) = map.remove("exception") {
-        return normalize_error_payload(exception);
-    }
-
-    Value::Object(map)
-}
-
-fn result_payload_is_error_wrapper(bytes: &[u8]) -> bool {
-    let Ok(value) = rmp_serde::from_slice::<serde_json::Value>(bytes) else {
-        return false;
-    };
-    let Value::Object(map) = value else {
-        return false;
-    };
-    map.len() == 1
-        && (map.contains_key("error")
-            || map.contains_key("__exception__")
-            || map.contains_key("exception"))
-}
-
-fn format_error(error_bytes: &Option<Vec<u8>>) -> Option<String> {
-    let bytes = error_bytes.as_ref()?;
-
-    match rmp_serde::from_slice::<serde_json::Value>(bytes) {
-        Ok(value) => Some(pretty_json(&normalize_error_payload(value))),
-        Err(_) => Some("(decode error)".to_string()),
-    }
-}
-
-fn format_node_status(status: &NodeStatus) -> String {
-    match status {
-        NodeStatus::Queued => "queued".to_string(),
-        NodeStatus::Running => "running".to_string(),
-        NodeStatus::Completed => "completed".to_string(),
-        NodeStatus::Failed => "failed".to_string(),
-    }
-}
-
-fn merge_template_status(existing: &NodeStatus, new_status: &NodeStatus) -> NodeStatus {
-    if node_status_rank(new_status) > node_status_rank(existing) {
-        new_status.clone()
-    } else {
-        existing.clone()
-    }
-}
-
-fn node_status_rank(status: &NodeStatus) -> u8 {
-    match status {
-        NodeStatus::Completed => 0,
-        NodeStatus::Queued => 1,
-        NodeStatus::Running => 2,
-        NodeStatus::Failed => 3,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::{HashMap, HashSet};
-
-    use chrono::{Duration as ChronoDuration, Utc};
-    use prost::Message;
-    use serial_test::serial;
-    use uuid::Uuid;
-
-    use super::super::test_helpers::setup_backend;
-    use super::*;
-    use crate::backends::{
-        SchedulerBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate,
-        WorkflowRegistration, WorkflowRegistryBackend,
-    };
-    use crate::scheduler::{CreateScheduleParams, ScheduleType};
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::ValueExpr;
-    use crate::waymark_core::runner::state::{
-        ActionCallSpec, ExecutionEdge, ExecutionNode, LiteralValue, NodeStatus,
-    };
-    use waymark_dag::EdgeType;
-
-    #[test]
-    fn format_extracted_inputs_happy_path() {
-        let mut nodes = HashMap::new();
-        let mut first_assignments = HashMap::new();
-        first_assignments.insert(
-            "iterations".to_string(),
-            ValueExpr::Literal(LiteralValue {
-                value: serde_json::json!(3),
-            }),
-        );
-        nodes.insert(
-            Uuid::new_v4(),
-            ExecutionNode {
-                node_id: Uuid::new_v4(),
-                node_type: "assignment".to_string(),
-                label: "input iterations = 3".to_string(),
-                status: NodeStatus::Completed,
-                template_id: None,
-                targets: vec!["iterations".to_string()],
-                action: None,
-                value_expr: None,
-                assignments: first_assignments,
-                action_attempt: 0,
-                started_at: None,
-                completed_at: None,
-                scheduled_at: None,
-            },
-        );
-
-        let mut second_assignments = HashMap::new();
-        second_assignments.insert(
-            "sleep_seconds".to_string(),
-            ValueExpr::Literal(LiteralValue {
-                value: serde_json::json!(20),
-            }),
-        );
-        nodes.insert(
-            Uuid::new_v4(),
-            ExecutionNode {
-                node_id: Uuid::new_v4(),
-                node_type: "assignment".to_string(),
-                label: "input sleep_seconds = 20".to_string(),
-                status: NodeStatus::Completed,
-                template_id: None,
-                targets: vec!["sleep_seconds".to_string()],
-                action: None,
-                value_expr: None,
-                assignments: second_assignments,
-                action_attempt: 0,
-                started_at: None,
-                completed_at: None,
-                scheduled_at: None,
-            },
-        );
-
-        let rendered = format_extracted_inputs(&nodes);
-        let value: Value = serde_json::from_str(&rendered).expect("decode rendered input payload");
-        assert_eq!(
-            value,
-            serde_json::json!({
-                "iterations": 3,
-                "sleep_seconds": 20
-            })
-        );
-    }
-
-    #[test]
-    fn format_instance_result_payload_unwraps_success_result_wrapper() {
-        let result_bytes =
-            rmp_serde::to_vec_named(&serde_json::json!({"result": {"total_iterations": 3}}))
-                .expect("encode result");
-        let rendered =
-            format_instance_result_payload(InstanceStatus::Completed, &Some(result_bytes), &None);
-        let value: Value = serde_json::from_str(&rendered).expect("decode result payload");
-        assert_eq!(value, serde_json::json!({"total_iterations": 3}));
-    }
-
-    #[test]
-    fn format_instance_result_payload_unwraps_error_wrapper() {
-        let error_bytes = rmp_serde::to_vec_named(&serde_json::json!({
-            "error": {
-                "__exception__": {
-                    "type": "ValueError",
-                    "message": "boom"
-                }
-            }
-        }))
-        .expect("encode error");
-        let rendered =
-            format_instance_result_payload(InstanceStatus::Failed, &None, &Some(error_bytes));
-        let value: Value = serde_json::from_str(&rendered).expect("decode result payload");
-        assert_eq!(
-            value,
-            serde_json::json!({
-                "type": "ValueError",
-                "message": "boom"
-            })
-        );
-    }
-
-    #[test]
-    fn determine_status_marks_wrapped_result_errors_as_failed() {
-        let result_bytes =
-            rmp_serde::to_vec_named(&serde_json::json!({"error": {"message": "boom"}}))
-                .expect("encode result error");
-        let status = determine_status(&None, &Some(result_bytes), &None);
-        assert_eq!(status, InstanceStatus::Failed);
-    }
-
-    #[test]
-    fn parse_instance_search_expr_handles_boolean_operators() {
-        let parsed = parse_instance_search_expr("(alpha OR beta) AND running");
-        assert_eq!(
-            parsed,
-            Some(InstanceSearchExpr::And(
-                Box::new(InstanceSearchExpr::Or(
-                    Box::new(InstanceSearchExpr::Term("alpha".to_string())),
-                    Box::new(InstanceSearchExpr::Term("beta".to_string())),
-                )),
-                Box::new(InstanceSearchExpr::Term("running".to_string())),
-            ))
-        );
-    }
-
-    #[test]
-    fn parse_instance_search_expr_falls_back_for_unbalanced_parentheses() {
-        let parsed = parse_instance_search_expr("(alpha OR beta");
-        assert_eq!(
-            parsed,
-            Some(InstanceSearchExpr::Term("(alpha OR beta".to_string()))
-        );
-    }
-
-    #[test]
-    fn action_timing_from_state_uses_state_timestamps_for_latest_attempt() {
-        let started_at = Utc::now() - ChronoDuration::milliseconds(1500);
-        let completed_at = started_at + ChronoDuration::milliseconds(450);
-        let fallback = Utc::now();
-        let node = ExecutionNode {
-            node_id: Uuid::new_v4(),
-            node_type: "action_call".to_string(),
-            label: "@tests.action()".to_string(),
-            status: NodeStatus::Completed,
-            template_id: Some("n0".to_string()),
-            targets: Vec::new(),
-            action: Some(ActionCallSpec {
-                action_name: "tests.action".to_string(),
-                module_name: Some("tests".to_string()),
-                kwargs: HashMap::new(),
-            }),
-            value_expr: None,
-            assignments: HashMap::new(),
-            action_attempt: 2,
-            started_at: Some(started_at),
-            completed_at: Some(completed_at),
-            scheduled_at: None,
-        };
-
-        let (dispatched_at, finished_at, duration_ms) =
-            action_timing_from_state(Some(&node), 2, fallback);
-        assert_eq!(dispatched_at, Some(started_at.to_rfc3339()));
-        assert_eq!(finished_at, Some(completed_at.to_rfc3339()));
-        assert_eq!(duration_ms, Some(450));
-    }
-
-    #[test]
-    fn action_timing_from_state_falls_back_for_prior_attempt_rows() {
-        let started_at = Utc::now() - ChronoDuration::milliseconds(1200);
-        let completed_at = started_at + ChronoDuration::milliseconds(600);
-        let fallback = Utc::now();
-        let node = ExecutionNode {
-            node_id: Uuid::new_v4(),
-            node_type: "action_call".to_string(),
-            label: "@tests.action()".to_string(),
-            status: NodeStatus::Completed,
-            template_id: Some("n0".to_string()),
-            targets: Vec::new(),
-            action: Some(ActionCallSpec {
-                action_name: "tests.action".to_string(),
-                module_name: Some("tests".to_string()),
-                kwargs: HashMap::new(),
-            }),
-            value_expr: None,
-            assignments: HashMap::new(),
-            action_attempt: 3,
-            started_at: Some(started_at),
-            completed_at: Some(completed_at),
-            scheduled_at: None,
-        };
-
-        let (dispatched_at, finished_at, duration_ms) =
-            action_timing_from_state(Some(&node), 2, fallback);
-        assert_eq!(dispatched_at, Some(fallback.to_rfc3339()));
-        assert_eq!(finished_at, Some(fallback.to_rfc3339()));
-        assert_eq!(duration_ms, None);
-    }
-
-    fn sample_execution_node(execution_id: Uuid) -> ExecutionNode {
-        ExecutionNode {
-            node_id: execution_id,
-            node_type: "action_call".to_string(),
-            label: "@tests.action()".to_string(),
-            status: NodeStatus::Queued,
-            template_id: Some("n0".to_string()),
-            targets: Vec::new(),
-            action: Some(ActionCallSpec {
-                action_name: "tests.action".to_string(),
-                module_name: Some("tests".to_string()),
-                kwargs: HashMap::from([(
-                    "value".to_string(),
-                    ValueExpr::Literal(LiteralValue {
-                        value: serde_json::json!(7),
-                    }),
-                )]),
-            }),
-            value_expr: None,
-            assignments: HashMap::new(),
-            action_attempt: 1,
-            started_at: None,
-            completed_at: None,
-            scheduled_at: Some(Utc::now()),
-        }
-    }
-
-    fn sample_graph(instance_id: Uuid, execution_id: Uuid) -> GraphUpdate {
-        let mut nodes = HashMap::new();
-        nodes.insert(execution_id, sample_execution_node(execution_id));
-
-        GraphUpdate {
-            instance_id,
-            nodes,
-            edges: HashSet::from([ExecutionEdge {
-                source: execution_id,
-                target: execution_id,
-                edge_type: EdgeType::StateMachine,
-            }]),
-        }
-    }
-
-    async fn insert_instance_with_graph_with_workflow(
-        backend: &PostgresBackend,
-        workflow_name: &str,
-    ) -> (Uuid, Uuid, Uuid) {
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let execution_id = Uuid::new_v4();
-        let workflow_version_id = insert_workflow_version(backend, workflow_name).await;
-        let graph = sample_graph(instance_id, execution_id);
-        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
-
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, state) VALUES ($1, $2, $3, $4)",
-        )
-        .bind(instance_id)
-        .bind(entry_node)
-        .bind(workflow_version_id)
-        .bind(state_payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert runner instance");
-
-        (instance_id, entry_node, execution_id)
-    }
-
-    async fn insert_instance_with_graph(backend: &PostgresBackend) -> (Uuid, Uuid, Uuid) {
-        insert_instance_with_graph_with_workflow(backend, "tests.workflow").await
-    }
-
-    async fn insert_action_result(backend: &PostgresBackend, execution_id: Uuid) {
-        let payload = rmp_serde::to_vec_named(&serde_json::json!({"ok": true}))
-            .expect("encode action result");
-        sqlx::query(
-            "INSERT INTO runner_actions_done (execution_id, attempt, result) VALUES ($1, $2, $3)",
-        )
-        .bind(execution_id)
-        .bind(1_i32)
-        .bind(payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert action result");
-    }
-
-    fn sample_program_proto() -> Vec<u8> {
-        let source = r#"
-fn main(input: [x], output: [y]):
-    y = @tests.action(value=x)
-    return y
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        program.encode_to_vec()
-    }
-
-    fn loop_program_proto() -> Vec<u8> {
-        let source = r#"
-fn main(input: [items], output: [total]):
-    total = 0
-    for item in items:
-        total = total + item
-    return total
-"#;
-        let program = parse_program(source.trim()).expect("parse loop program");
-        program.encode_to_vec()
-    }
-
-    async fn insert_workflow_version(backend: &PostgresBackend, workflow_name: &str) -> Uuid {
-        WorkflowRegistryBackend::upsert_workflow_version(
-            backend,
-            &WorkflowRegistration {
-                workflow_name: workflow_name.to_string(),
-                workflow_version: "v1".to_string(),
-                ir_hash: format!("hash-{workflow_name}"),
-                program_proto: sample_program_proto(),
-                concurrent: false,
-            },
-        )
-        .await
-        .expect("insert workflow version")
-    }
-
-    async fn insert_loop_workflow_version(backend: &PostgresBackend, workflow_name: &str) -> Uuid {
-        WorkflowRegistryBackend::upsert_workflow_version(
-            backend,
-            &WorkflowRegistration {
-                workflow_name: workflow_name.to_string(),
-                workflow_version: "v1-loop".to_string(),
-                ir_hash: format!("hash-loop-{workflow_name}"),
-                program_proto: loop_program_proto(),
-                concurrent: false,
-            },
-        )
-        .await
-        .expect("insert loop workflow version")
-    }
-
-    async fn insert_schedule(backend: &PostgresBackend, schedule_name: &str) -> Uuid {
-        SchedulerBackend::upsert_schedule(
-            backend,
-            &CreateScheduleParams {
-                workflow_name: "tests.workflow".to_string(),
-                schedule_name: schedule_name.to_string(),
-                schedule_type: ScheduleType::Interval,
-                cron_expression: None,
-                interval_seconds: Some(60),
-                jitter_seconds: 0,
-                input_payload: Some(
-                    rmp_serde::to_vec_named(&serde_json::json!({"k": "v"}))
-                        .expect("encode payload"),
-                ),
-                priority: 0,
-                allow_duplicate: false,
-            },
-        )
-        .await
-        .expect("upsert schedule")
-        .0
-    }
-
-    async fn insert_scheduled_instance(
-        backend: &PostgresBackend,
-        schedule_id: Uuid,
-        created_at: DateTime<Utc>,
-        with_result: bool,
-    ) -> Uuid {
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let execution_id = Uuid::new_v4();
-        let workflow_version_id = insert_workflow_version(backend, "tests.workflow").await;
-        let graph = sample_graph(instance_id, execution_id);
-        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
-        let result_payload = if with_result {
-            Some(
-                rmp_serde::to_vec_named(&serde_json::json!({"result": {"ok": true}}))
-                    .expect("encode result"),
-            )
-        } else {
-            None
-        };
-
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, schedule_id, created_at, state, result, error) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)",
-        )
-        .bind(instance_id)
-        .bind(entry_node)
-        .bind(workflow_version_id)
-        .bind(schedule_id)
-        .bind(created_at)
-        .bind(state_payload)
-        .bind(result_payload)
-        .bind(Option::<Vec<u8>>::None)
-        .execute(backend.pool())
-        .await
-        .expect("insert scheduled instance");
-
-        instance_id
-    }
-
-    async fn insert_worker_status(backend: &PostgresBackend, pool_id: Uuid) {
-        WorkerStatusBackend::upsert_worker_status(
-            backend,
-            &WorkerStatusUpdate {
-                pool_id,
-                throughput_per_min: 180.0,
-                total_completed: 20,
-                last_action_at: Some(Utc::now()),
-                median_dequeue_ms: Some(5),
-                median_handling_ms: Some(12),
-                dispatch_queue_size: 3,
-                total_in_flight: 2,
-                active_workers: 4,
-                actions_per_sec: 3.0,
-                median_instance_duration_secs: Some(0.2),
-                active_instance_count: 1,
-                total_instances_completed: 8,
-                instances_per_sec: 0.5,
-                instances_per_min: 30.0,
-                time_series: None,
-            },
-        )
-        .await
-        .expect("upsert worker status");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_count_instances_happy_path() {
-        let backend = setup_backend().await;
-        insert_instance_with_graph(&backend).await;
-
-        let count = WebappBackend::count_instances(&backend, None)
-            .await
-            .expect("count instances");
-        assert_eq!(count, 1);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_count_instances_applies_search_expression() {
-        let backend = setup_backend().await;
-        let (alpha_id, _, _) =
-            insert_instance_with_graph_with_workflow(&backend, "tests.alpha").await;
-        let (beta_id, _, _) =
-            insert_instance_with_graph_with_workflow(&backend, "tests.beta").await;
-        assert_ne!(alpha_id, beta_id);
-
-        let completed_payload =
-            rmp_serde::to_vec_named(&serde_json::json!({"result": {"ok": true}}))
-                .expect("encode completed payload");
-        sqlx::query(
-            "UPDATE runner_instances SET result = $2, current_status = $3 WHERE instance_id = $1",
-        )
-        .bind(beta_id)
-        .bind(completed_payload)
-        .bind("completed")
-        .execute(backend.pool())
-        .await
-        .expect("mark beta completed");
-
-        let alpha_count = WebappBackend::count_instances(&backend, Some("alpha"))
-            .await
-            .expect("count alpha");
-        assert_eq!(alpha_count, 1);
-
-        let completed_count = WebappBackend::count_instances(&backend, Some("completed"))
-            .await
-            .expect("count completed");
-        assert_eq!(completed_count, 1);
-
-        let combined = WebappBackend::count_instances(&backend, Some("(alpha OR completed)"))
-            .await
-            .expect("count combined");
-        assert_eq!(combined, 2);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_list_instances_happy_path() {
-        let backend = setup_backend().await;
-        let (instance_id, _, _) = insert_instance_with_graph(&backend).await;
-
-        let instances = WebappBackend::list_instances(&backend, None, 10, 0)
-            .await
-            .expect("list instances");
-
-        assert_eq!(instances.len(), 1);
-        assert_eq!(instances[0].id, instance_id);
-        assert_eq!(instances[0].status, InstanceStatus::Running);
-        assert_eq!(
-            instances[0].workflow_name,
-            Some("tests.workflow".to_string())
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_list_instances_applies_search_expression() {
-        let backend = setup_backend().await;
-        let (alpha_id, _, _) =
-            insert_instance_with_graph_with_workflow(&backend, "tests.alpha").await;
-        let _ = insert_instance_with_graph_with_workflow(&backend, "tests.beta").await;
-
-        let alpha_instances = WebappBackend::list_instances(&backend, Some("alpha"), 10, 0)
-            .await
-            .expect("list alpha");
-        assert_eq!(alpha_instances.len(), 1);
-        assert_eq!(alpha_instances[0].id, alpha_id);
-
-        let running_instances =
-            WebappBackend::list_instances(&backend, Some("(alpha OR beta) AND running"), 10, 0)
-                .await
-                .expect("list running instances");
-        assert_eq!(running_instances.len(), 2);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_instance_happy_path() {
-        let backend = setup_backend().await;
-        let (instance_id, _, _) = insert_instance_with_graph(&backend).await;
-
-        let instance = WebappBackend::get_instance(&backend, instance_id)
-            .await
-            .expect("get instance");
-
-        assert_eq!(instance.id, instance_id);
-        assert_eq!(instance.status, InstanceStatus::Running);
-        assert_eq!(instance.workflow_name, Some("tests.workflow".to_string()));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_workflow_name_prefers_registered_workflow_name() {
-        let backend = setup_backend().await;
-        let (instance_id, entry_node, execution_id) =
-            insert_instance_with_graph_with_workflow(&backend, "tests.workflow_name").await;
-
-        let list = WebappBackend::list_instances(&backend, None, 10, 0)
-            .await
-            .expect("list instances");
-        assert_eq!(list.len(), 1);
-        assert_eq!(list[0].id, instance_id);
-        assert_eq!(
-            list[0].workflow_name,
-            Some("tests.workflow_name".to_string())
-        );
-
-        let detail = WebappBackend::get_instance(&backend, instance_id)
-            .await
-            .expect("get instance");
-        assert_eq!(detail.id, instance_id);
-        assert_eq!(detail.entry_node, entry_node);
-        assert_eq!(
-            detail.workflow_name,
-            Some("tests.workflow_name".to_string())
-        );
-
-        let graph = WebappBackend::get_execution_graph(&backend, instance_id)
-            .await
-            .expect("get graph")
-            .expect("graph");
-        assert!(
-            graph
-                .nodes
-                .iter()
-                .any(|node| node.id == execution_id.to_string()),
-            "expected action node to remain intact"
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_execution_graph_happy_path() {
-        let backend = setup_backend().await;
-        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
-
-        let graph = WebappBackend::get_execution_graph(&backend, instance_id)
-            .await
-            .expect("get execution graph")
-            .expect("expected execution graph");
-
-        assert_eq!(graph.nodes.len(), 1);
-        assert_eq!(graph.edges.len(), 1);
-        assert_eq!(graph.nodes[0].id, execution_id.to_string());
-        assert_eq!(graph.nodes[0].action_name, Some("tests.action".to_string()));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_workflow_graph_uses_template_node_ids() {
-        let backend = setup_backend().await;
-        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
-
-        let graph = WebappBackend::get_workflow_graph(&backend, instance_id)
-            .await
-            .expect("get workflow graph")
-            .expect("expected workflow graph");
-
-        assert!(!graph.nodes.is_empty(), "workflow graph should have nodes");
-        assert!(
-            graph
-                .nodes
-                .iter()
-                .all(|node| node.id != execution_id.to_string()),
-            "workflow graph should use template node ids, not runtime execution ids"
-        );
-        assert!(
-            graph
-                .nodes
-                .iter()
-                .any(|node| node.node_type == "action_call"),
-            "workflow graph should include action_call template nodes"
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_workflow_graph_marks_loop_back_edges() {
-        let backend = setup_backend().await;
-        let instance_id = Uuid::new_v4();
-        let entry_node = Uuid::new_v4();
-        let execution_id = Uuid::new_v4();
-        let workflow_version_id =
-            insert_loop_workflow_version(&backend, "tests.loop_workflow").await;
-        let graph = sample_graph(instance_id, execution_id);
-        let state_payload = rmp_serde::to_vec_named(&graph).expect("encode graph update");
-
-        sqlx::query(
-            "INSERT INTO runner_instances (instance_id, entry_node, workflow_version_id, state) VALUES ($1, $2, $3, $4)",
-        )
-        .bind(instance_id)
-        .bind(entry_node)
-        .bind(workflow_version_id)
-        .bind(state_payload)
-        .execute(backend.pool())
-        .await
-        .expect("insert loop runner instance");
-
-        let workflow_graph = WebappBackend::get_workflow_graph(&backend, instance_id)
-            .await
-            .expect("get workflow graph")
-            .expect("expected workflow graph");
-
-        assert!(
-            workflow_graph
-                .edges
-                .iter()
-                .any(|edge| edge.edge_type == "state_machine_loop_back"),
-            "loop workflows should emit at least one loop_back edge"
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_action_results_happy_path() {
-        let backend = setup_backend().await;
-        let (instance_id, _, execution_id) = insert_instance_with_graph(&backend).await;
-        insert_action_result(&backend, execution_id).await;
-
-        let entries = WebappBackend::get_action_results(&backend, instance_id)
-            .await
-            .expect("get action results");
-
-        assert_eq!(entries.len(), 1);
-        assert_eq!(entries[0].action_id, execution_id.to_string());
-        assert_eq!(entries[0].action_name, "tests.action");
-        assert_eq!(entries[0].status, "completed");
-        assert!(entries[0].request_preview.contains("\"value\": 7"));
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_distinct_workflows_happy_path() {
-        let backend = setup_backend().await;
-        insert_instance_with_graph_with_workflow(&backend, "tests.workflow_a").await;
-        insert_instance_with_graph_with_workflow(&backend, "tests.workflow_b").await;
-
-        let workflows = WebappBackend::get_distinct_workflows(&backend)
-            .await
-            .expect("get distinct workflows");
-        assert_eq!(
-            workflows,
-            vec![
-                "tests.workflow_a".to_string(),
-                "tests.workflow_b".to_string()
-            ]
-        );
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_distinct_statuses_happy_path() {
-        let backend = setup_backend().await;
-
-        let statuses = WebappBackend::get_distinct_statuses(&backend)
-            .await
-            .expect("get distinct statuses");
-        assert_eq!(statuses, vec!["queued", "running", "completed", "failed"]);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_count_schedules_happy_path() {
-        let backend = setup_backend().await;
-        insert_schedule(&backend, "count").await;
-
-        let count = WebappBackend::count_schedules(&backend)
-            .await
-            .expect("count schedules");
-        assert_eq!(count, 1);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_list_schedules_happy_path() {
-        let backend = setup_backend().await;
-        let schedule_id = insert_schedule(&backend, "list").await;
-
-        let schedules = WebappBackend::list_schedules(&backend, 10, 0)
-            .await
-            .expect("list schedules");
-        assert_eq!(schedules.len(), 1);
-        assert_eq!(schedules[0].id, schedule_id.to_string());
-        assert_eq!(schedules[0].schedule_name, "list");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_schedule_happy_path() {
-        let backend = setup_backend().await;
-        let schedule_id = insert_schedule(&backend, "detail").await;
-
-        let schedule = WebappBackend::get_schedule(&backend, schedule_id)
-            .await
-            .expect("get schedule");
-        assert_eq!(schedule.id, schedule_id.to_string());
-        assert_eq!(schedule.schedule_name, "detail");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_schedule_invocations_are_filtered_by_schedule_id() {
-        let backend = setup_backend().await;
-        let schedule_id = insert_schedule(&backend, "invocations-a").await;
-        let other_schedule_id = insert_schedule(&backend, "invocations-b").await;
-
-        let running_instance_id = insert_scheduled_instance(
-            &backend,
-            schedule_id,
-            Utc::now() - ChronoDuration::minutes(2),
-            false,
-        )
-        .await;
-        let completed_instance_id = insert_scheduled_instance(
-            &backend,
-            schedule_id,
-            Utc::now() - ChronoDuration::minutes(1),
-            true,
-        )
-        .await;
-        let _other_instance_id =
-            insert_scheduled_instance(&backend, other_schedule_id, Utc::now(), true).await;
-
-        let total = WebappBackend::count_schedule_invocations(&backend, schedule_id)
-            .await
-            .expect("count schedule invocations");
-        assert_eq!(total, 2);
-
-        let invocations = WebappBackend::list_schedule_invocations(&backend, schedule_id, 10, 0)
-            .await
-            .expect("list schedule invocations");
-        assert_eq!(invocations.len(), 2);
-        assert_eq!(invocations[0].id, completed_instance_id);
-        assert_eq!(invocations[0].status, InstanceStatus::Completed);
-        assert_eq!(invocations[1].id, running_instance_id);
-        assert_eq!(invocations[1].status, InstanceStatus::Running);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_update_schedule_status_happy_path() {
-        let backend = setup_backend().await;
-        let schedule_id = insert_schedule(&backend, "update").await;
-
-        let updated = WebappBackend::update_schedule_status(&backend, schedule_id, "paused")
-            .await
-            .expect("update schedule status");
-        assert!(updated);
-
-        let schedule = WebappBackend::get_schedule(&backend, schedule_id)
-            .await
-            .expect("get schedule");
-        assert_eq!(schedule.status, "paused");
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_distinct_schedule_statuses_happy_path() {
-        let backend = setup_backend().await;
-
-        let statuses = WebappBackend::get_distinct_schedule_statuses(&backend)
-            .await
-            .expect("get distinct schedule statuses");
-        assert_eq!(statuses, vec!["active", "paused"]);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_distinct_schedule_types_happy_path() {
-        let backend = setup_backend().await;
-
-        let types = WebappBackend::get_distinct_schedule_types(&backend)
-            .await
-            .expect("get distinct schedule types");
-        assert_eq!(types, vec!["cron", "interval"]);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_worker_action_stats_happy_path() {
-        let backend = setup_backend().await;
-        let pool_id = Uuid::new_v4();
-        insert_worker_status(&backend, pool_id).await;
-
-        let rows = WebappBackend::get_worker_action_stats(&backend, 60)
-            .await
-            .expect("get worker action stats");
-        assert_eq!(rows.len(), 1);
-        assert_eq!(rows[0].pool_id, pool_id.to_string());
-        assert_eq!(rows[0].total_completed, 20);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_worker_aggregate_stats_happy_path() {
-        let backend = setup_backend().await;
-        insert_worker_status(&backend, Uuid::new_v4()).await;
-
-        let aggregate = WebappBackend::get_worker_aggregate_stats(&backend, 60)
-            .await
-            .expect("get worker aggregate stats");
-        assert_eq!(aggregate.active_worker_count, 1);
-        assert_eq!(aggregate.total_in_flight, 2);
-        assert_eq!(aggregate.total_queue_depth, 3);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_worker_status_table_exists_happy_path() {
-        let backend = setup_backend().await;
-
-        assert!(WebappBackend::worker_status_table_exists(&backend).await);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_schedules_table_exists_happy_path() {
-        let backend = setup_backend().await;
-
-        assert!(WebappBackend::schedules_table_exists(&backend).await);
-    }
-
-    #[serial(postgres)]
-    #[tokio::test]
-    async fn webapp_get_worker_statuses_happy_path() {
-        let backend = setup_backend().await;
-        let pool_id = Uuid::new_v4();
-        insert_worker_status(&backend, pool_id).await;
-        let (completed_instance_id, _, _) = insert_instance_with_graph(&backend).await;
-        let completed_payload =
-            rmp_serde::to_vec_named(&serde_json::json!({"ok": true})).expect("encode result");
-        sqlx::query("UPDATE runner_instances SET result = $2 WHERE instance_id = $1")
-            .bind(completed_instance_id)
-            .bind(completed_payload)
-            .execute(backend.pool())
-            .await
-            .expect("mark instance completed");
-
-        let (failed_instance_id, _, _) = insert_instance_with_graph(&backend).await;
-        let error_payload = rmp_serde::to_vec_named(&serde_json::json!({
-            "type": "Exception",
-            "message": "boom",
-        }))
-        .expect("encode error");
-        sqlx::query("UPDATE runner_instances SET error = $2 WHERE instance_id = $1")
-            .bind(failed_instance_id)
-            .bind(error_payload)
-            .execute(backend.pool())
-            .await
-            .expect("mark instance failed");
-
-        let statuses = WebappBackend::get_worker_statuses(&backend, 60)
-            .await
-            .expect("get worker statuses");
-        assert_eq!(statuses.len(), 1);
-        assert_eq!(statuses[0].pool_id, pool_id);
-        assert_eq!(statuses[0].total_completed, 20);
-        assert_eq!(statuses[0].total_instances_completed, 1);
-        assert_eq!(statuses[0].total_in_flight, Some(2));
-        assert_eq!(statuses[0].dispatch_queue_size, Some(3));
-    }
-}
diff --git a/crates/waymark/src/bin/integration_test.rs b/crates/waymark/src/bin/integration_test.rs
index 35f86fdf..e7faa3ae 100644
--- a/crates/waymark/src/bin/integration_test.rs
+++ b/crates/waymark/src/bin/integration_test.rs
@@ -19,17 +19,16 @@ use serde_json::Value;
 use sqlx::Row;
 use uuid::Uuid;
 
-use waymark::backends::{
-    CoreBackend, MemoryBackend, PostgresBackend, QueuedInstance, WorkflowRegistration,
-    WorkflowRegistryBackend,
-};
-use waymark::db;
-use waymark::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
 use waymark::messages::ast as ir;
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{PythonWorkerConfig, RemoteWorkerPool};
+use waymark_backend_memory::MemoryBackend;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::{CoreBackend, QueuedInstance};
 use waymark_dag::{DAG, convert_to_dag};
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
+use waymark_runner_state::RunnerState;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
 
 #[derive(Parser, Debug)]
 #[command(name = "integration_test")]
@@ -452,7 +451,7 @@ async fn connect_postgres_backend() -> Result<PostgresBackend> {
     let pool = connect_pool(&dsn)
         .await
         .with_context(|| format!("connect postgres backend: {dsn}"))?;
-    db::run_migrations(&pool)
+    waymark_backend_postgres_migrations::run(&pool)
         .await
         .context("run postgres migrations for integration runner")?;
     Ok(PostgresBackend::new(pool))
diff --git a/crates/waymark/src/bin/soak-harness.rs b/crates/waymark/src/bin/soak-harness.rs
index d4e77388..2bccbb4e 100644
--- a/crates/waymark/src/bin/soak-harness.rs
+++ b/crates/waymark/src/bin/soak-harness.rs
@@ -29,14 +29,13 @@ use tokio::process::{Child, Command};
 use tracing::{error, info, warn};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use uuid::Uuid;
-use waymark::backends::{
-    PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
-use waymark::db;
 use waymark::messages::ast as ir;
-use waymark::waymark_core::runner::RunnerState;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::QueuedInstance;
 use waymark_dag::{DAG, convert_to_dag};
 use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
 const DEFAULT_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
 const DEFAULT_WORKFLOW_NAME: &str = "waymark_soak_timeout_mix_v1";
@@ -287,7 +286,7 @@ async fn main() -> Result<()> {
     }
 
     let pool = wait_for_database(&args.dsn, DB_READY_TIMEOUT).await?;
-    db::run_migrations(&pool)
+    waymark_backend_postgres_migrations::run(&pool)
         .await
         .context("run migrations before soak")?;
 
diff --git a/crates/waymark/src/bin/start-workers.rs b/crates/waymark/src/bin/start-workers.rs
index 2bf9733d..8aa1c493 100644
--- a/crates/waymark/src/bin/start-workers.rs
+++ b/crates/waymark/src/bin/start-workers.rs
@@ -43,13 +43,12 @@ use tracing::{error, info, warn};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
 use uuid::Uuid;
-use waymark::backends::PostgresBackend;
 use waymark::config::WorkerConfig;
-use waymark::db;
 use waymark::messages::ast as ir;
 use waymark::scheduler::{DagResolver, WorkflowDag};
 use waymark::waymark_core::runloop::{RunLoopSupervisorConfig, runloop_supervisor};
 use waymark::{PythonWorkerConfig, RemoteWorkerPool, WebappServer, spawn_status_reporter};
+use waymark_backend_postgres::PostgresBackend;
 use waymark_dag::convert_to_dag;
 
 #[tokio::main]
@@ -87,7 +86,7 @@ async fn main() -> Result<()> {
 
     // Initialize the database and backend.
     let pool = PgPool::connect(&config.database_url).await?;
-    db::run_migrations(&pool).await?;
+    waymark_backend_postgres_migrations::run(&pool).await?;
     let backend = PostgresBackend::new(pool);
 
     // Start the worker pool (bridge + python workers).
diff --git a/crates/waymark/src/bin/waymark-bridge.rs b/crates/waymark/src/bin/waymark-bridge.rs
index 1bc6ac18..878e6a6e 100644
--- a/crates/waymark/src/bin/waymark-bridge.rs
+++ b/crates/waymark/src/bin/waymark-bridge.rs
@@ -29,18 +29,22 @@ use tracing::{debug, info};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use uuid::Uuid;
 
-use waymark::backends::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone,
-    InstanceLockStatus, LockClaim, PostgresBackend, QueuedInstance, QueuedInstanceBatch,
-    SchedulerBackend, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
-use waymark::db;
 use waymark::messages::{self, ast as ir, proto};
-use waymark::scheduler::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType};
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
+use waymark_backend_postgres::PostgresBackend;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::{
+    ActionDone, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
+    QueuedInstance, QueuedInstanceBatch,
+};
 use waymark_dag::convert_to_dag;
+use waymark_runner_state::RunnerState;
+use waymark_scheduler_backend::SchedulerBackend as _;
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
 
 const DEFAULT_GRPC_ADDR: &str = "127.0.0.1:24117";
 
@@ -52,7 +56,7 @@ struct WorkflowStore {
 impl WorkflowStore {
     async fn connect(dsn: &str) -> Result<Self> {
         let pool = PgPool::connect(dsn).await?;
-        db::run_migrations(&pool).await?;
+        waymark_backend_postgres_migrations::run(&pool).await?;
         let backend = PostgresBackend::new(pool);
         Ok(Self { backend })
     }
diff --git a/crates/waymark/src/db.rs b/crates/waymark/src/db.rs
deleted file mode 100644
index f89f0e04..00000000
--- a/crates/waymark/src/db.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-//! Database helpers shared across services.
-
-use sqlx::PgPool;
-
-use crate::backends::{BackendError, BackendResult};
-
-/// Run the embedded SQLx migrations.
-pub async fn run_migrations(pool: &PgPool) -> BackendResult<()> {
-    sqlx::migrate!()
-        .run(pool)
-        .await
-        .map_err(|err| BackendError::Message(err.to_string()))?;
-    Ok(())
-}
diff --git a/crates/waymark/src/garbage_collector/task.rs b/crates/waymark/src/garbage_collector/task.rs
index a96a280d..e1f673ef 100644
--- a/crates/waymark/src/garbage_collector/task.rs
+++ b/crates/waymark/src/garbage_collector/task.rs
@@ -6,8 +6,7 @@ use std::time::Duration;
 
 use chrono::Utc;
 use tracing::{debug, error, info};
-
-use crate::backends::{GarbageCollectionResult, GarbageCollectorBackend};
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
 
 /// Configuration for the garbage collector task.
 #[derive(Debug, Clone)]
@@ -120,9 +119,9 @@ mod tests {
 
     use chrono::{Duration as ChronoDuration, Utc};
     use tonic::async_trait;
+    use waymark_backends_core::BackendResult;
 
     use super::*;
-    use crate::backends::{BackendResult, GarbageCollectorBackend};
 
     #[derive(Clone)]
     struct StubGarbageCollectorBackend {
diff --git a/crates/waymark/src/integration_support/mod.rs b/crates/waymark/src/integration_support/mod.rs
deleted file mode 100644
index db198a86..00000000
--- a/crates/waymark/src/integration_support/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-//! Shared integration harness helpers used by test binaries and Rust tests.
-
-mod postgres;
-
-pub use postgres::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
diff --git a/crates/waymark/src/integration_support/postgres.rs b/crates/waymark/src/integration_support/postgres.rs
deleted file mode 100644
index 5a876f2d..00000000
--- a/crates/waymark/src/integration_support/postgres.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-//! Shared Postgres bootstrap for integration harnesses.
-
-use std::path::PathBuf;
-use std::time::{Duration, Instant};
-
-use anyhow::{Context, Result, anyhow, bail};
-use sqlx::{PgPool, postgres::PgPoolOptions};
-use tokio::process::Command;
-use tokio::sync::OnceCell;
-
-use crate::db;
-
-pub const LOCAL_POSTGRES_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
-
-const READY_TIMEOUT: Duration = Duration::from_secs(45);
-const RETRY_DELAY: Duration = Duration::from_millis(500);
-const POOL_MAX_CONNECTIONS: u32 = 32;
-const POOL_ACQUIRE_TIMEOUT: Duration = Duration::from_secs(15);
-
-static LOCAL_POSTGRES_BOOTSTRAPPED: OnceCell<()> = OnceCell::const_new();
-
-/// Ensure the default local Postgres is available and migrated.
-///
-/// This helper is intended for local integration workflows where the default
-/// DSN maps to the repository docker-compose service.
-pub async fn ensure_local_postgres() -> Result<()> {
-    LOCAL_POSTGRES_BOOTSTRAPPED
-        .get_or_try_init(|| async { ensure_local_postgres_impl().await })
-        .await?;
-    Ok(())
-}
-
-/// Connect a PgPool using integration defaults.
-pub async fn connect_pool(dsn: &str) -> Result<PgPool> {
-    Ok(PgPoolOptions::new()
-        .max_connections(POOL_MAX_CONNECTIONS)
-        .acquire_timeout(POOL_ACQUIRE_TIMEOUT)
-        .connect(dsn)
-        .await?)
-}
-
-async fn ensure_local_postgres_impl() -> Result<()> {
-    if let Ok(pool) = connect_pool(LOCAL_POSTGRES_DSN).await {
-        db::run_migrations(&pool)
-            .await
-            .context("run migrations for existing local postgres")?;
-        pool.close().await;
-        return Ok(());
-    }
-
-    run_compose_up().await?;
-    let pool = wait_for_postgres(LOCAL_POSTGRES_DSN).await?;
-    db::run_migrations(&pool)
-        .await
-        .context("run migrations for local postgres")?;
-    pool.close().await;
-    Ok(())
-}
-
-async fn run_compose_up() -> Result<()> {
-    let root = project_root();
-    let status = Command::new("docker")
-        .arg("compose")
-        .arg("-f")
-        .arg("../../docker-compose.yml")
-        .arg("up")
-        .arg("-d")
-        .arg("postgres")
-        .current_dir(&root)
-        .status()
-        .await
-        .with_context(|| format!("failed to run docker compose in {}", root.display()))?;
-
-    if !status.success() {
-        bail!("docker compose up -d postgres exited with status {status}");
-    }
-
-    Ok(())
-}
-
-async fn wait_for_postgres(dsn: &str) -> Result<PgPool> {
-    let deadline = Instant::now() + READY_TIMEOUT;
-    let mut last_error = None;
-
-    while Instant::now() < deadline {
-        match connect_pool(dsn).await {
-            Ok(pool) => return Ok(pool),
-            Err(err) => {
-                last_error = Some(err);
-                tokio::time::sleep(RETRY_DELAY).await;
-            }
-        }
-    }
-
-    Err(anyhow!(
-        "timed out waiting for postgres at {dsn}; last error: {}",
-        last_error
-            .map(|err| err.to_string())
-            .unwrap_or_else(|| "unknown".to_string())
-    ))
-}
-
-fn project_root() -> PathBuf {
-    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-}
diff --git a/crates/waymark/src/lib.rs b/crates/waymark/src/lib.rs
index 66945900..568c1ba0 100644
--- a/crates/waymark/src/lib.rs
+++ b/crates/waymark/src/lib.rs
@@ -1,17 +1,12 @@
 //! Waymark - worker pool infrastructure plus the core IR/runtime port.
 
-pub mod backends;
 pub mod config;
-pub mod db;
 pub mod garbage_collector;
-pub mod integration_support;
 pub mod messages;
 pub mod observability;
 pub mod pool_status;
 pub mod scheduler;
 pub mod server_worker;
-#[cfg(test)]
-pub mod test_support;
 pub mod waymark_core;
 pub mod webapp;
 pub mod workers;
@@ -21,10 +16,7 @@ pub use garbage_collector::{GarbageCollectorConfig, GarbageCollectorTask};
 pub use messages::{MessageError, ast as ir_ast, proto, workflow_argument_value_to_json};
 pub use observability::obs;
 pub use pool_status::{PoolTimeSeries, TimeSeriesEntry, TimeSeriesJsonEntry};
-pub use scheduler::{
-    CreateScheduleParams, ScheduleId, ScheduleType, SchedulerConfig, SchedulerTask,
-    WorkflowSchedule,
-};
+pub use scheduler::{SchedulerConfig, SchedulerTask};
 pub use server_worker::{WorkerBridgeChannels, WorkerBridgeServer};
 pub use webapp::{WebappConfig, WebappServer};
 pub use workers::{
diff --git a/crates/waymark/src/observability.rs b/crates/waymark/src/observability.rs
index dbb8a7af..49c1700f 100644
--- a/crates/waymark/src/observability.rs
+++ b/crates/waymark/src/observability.rs
@@ -1,6 +1,6 @@
 //! Observability helpers for optional tracing instrumentation.
 
-pub use waymark_observability_macros::obs;
+pub use waymark_observability::obs;
 
 #[cfg(feature = "trace")]
 use std::sync::OnceLock;
diff --git a/crates/waymark/src/scheduler/mod.rs b/crates/waymark/src/scheduler/mod.rs
index d1ba2abb..d14adfc4 100644
--- a/crates/waymark/src/scheduler/mod.rs
+++ b/crates/waymark/src/scheduler/mod.rs
@@ -6,9 +6,5 @@
 //! - Cron and interval utilities
 
 mod task;
-mod types;
-mod utils;
 
 pub use task::{DagResolver, SchedulerConfig, SchedulerTask, WorkflowDag};
-pub use types::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType, WorkflowSchedule};
-pub use utils::{apply_jitter, compute_next_run, next_cron_run, next_interval_run, validate_cron};
diff --git a/crates/waymark/src/scheduler/task.rs b/crates/waymark/src/scheduler/task.rs
index 9f9550cd..283745cc 100644
--- a/crates/waymark/src/scheduler/task.rs
+++ b/crates/waymark/src/scheduler/task.rs
@@ -9,9 +9,9 @@ use std::time::Duration;
 use serde_json::Value;
 use tracing::{debug, error, info};
 use uuid::Uuid;
+use waymark_core_backend::QueuedInstance;
+use waymark_scheduler_core::{ScheduleId, WorkflowSchedule};
 
-use super::types::{ScheduleId, WorkflowSchedule};
-use crate::backends::{CoreBackend, QueuedInstance, SchedulerBackend};
 use crate::messages;
 use crate::messages::ast as ir;
 use waymark_dag::DAG;
@@ -53,7 +53,8 @@ pub struct SchedulerTask<B> {
 
 impl<B> SchedulerTask<B>
 where
-    B: CoreBackend + SchedulerBackend + Clone + Send + Sync + 'static,
+    B: waymark_core_backend::CoreBackend + waymark_scheduler_backend::SchedulerBackend,
+    B: Clone + Send + Sync + 'static,
 {
     /// Run the scheduler loop.
     pub async fn run(self, shutdown: tokio_util::sync::WaitForCancellationFutureOwned) {
@@ -153,12 +154,8 @@ where
             .as_ref()
             .ok_or_else(|| "DAG has no entry node".to_string())?;
 
-        let mut state = crate::waymark_core::runner::RunnerState::new(
-            Some(Arc::clone(&dag)),
-            None,
-            None,
-            false,
-        );
+        let mut state =
+            waymark_runner_state::RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
         if let Some(input_payload) = schedule.input_payload.as_deref() {
             let inputs = messages::workflow_arguments_to_json(input_payload)
                 .ok_or_else(|| "failed to decode schedule input payload".to_string())?;
@@ -278,14 +275,16 @@ mod tests {
     use chrono::{Duration as ChronoDuration, Utc};
     use prost::Message;
     use serde_json::Value;
+    use waymark_backend_memory::MemoryBackend;
+    use waymark_core_backend::{CoreBackend, LockClaim};
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_scheduler_core::{CreateScheduleParams, ScheduleType};
 
     use super::*;
-    use crate::backends::{CoreBackend, LockClaim, MemoryBackend, SchedulerBackend};
     use crate::messages::proto;
-    use crate::scheduler::{CreateScheduleParams, ScheduleType};
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::RunnerExecutor;
     use waymark_dag::convert_to_dag;
+    use waymark_ir_parser::parse_program;
+    use waymark_runner::RunnerExecutor;
 
     fn workflow_args_payload(key: &str, value: i64) -> Vec<u8> {
         proto::WorkflowArguments {
@@ -374,11 +373,8 @@ fn main(input: [number], output: [result]):
         let state = queued.state.clone().expect("queued state");
         let mut executor =
             RunnerExecutor::new(Arc::clone(&dag), state, queued.action_results.clone(), None);
-        let replay = crate::waymark_core::runner::replay_variables(
-            executor.state(),
-            executor.action_results(),
-        )
-        .expect("replay inputs");
+        let replay = waymark_runner::replay_variables(executor.state(), executor.action_results())
+            .expect("replay inputs");
         assert_eq!(
             replay.variables.get("number"),
             Some(&Value::Number(7.into()))
diff --git a/crates/waymark/src/scheduler/types.rs b/crates/waymark/src/scheduler/types.rs
deleted file mode 100644
index 4f8c9104..00000000
--- a/crates/waymark/src/scheduler/types.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-//! Schedule types.
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use uuid::Uuid;
-
-/// Unique identifier for a schedule.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-pub struct ScheduleId(pub Uuid);
-
-impl ScheduleId {
-    pub fn new() -> Self {
-        Self(Uuid::new_v4())
-    }
-}
-
-impl Default for ScheduleId {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl std::fmt::Display for ScheduleId {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-/// Type of schedule.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum ScheduleType {
-    Cron,
-    Interval,
-}
-
-impl ScheduleType {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::Cron => "cron",
-            Self::Interval => "interval",
-        }
-    }
-
-    pub fn parse(s: &str) -> Option<Self> {
-        match s {
-            "cron" => Some(Self::Cron),
-            "interval" => Some(Self::Interval),
-            _ => None,
-        }
-    }
-}
-
-impl std::fmt::Display for ScheduleType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.as_str())
-    }
-}
-
-/// Status of a workflow schedule.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum ScheduleStatus {
-    Active,
-    Paused,
-    Deleted,
-}
-
-impl ScheduleStatus {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::Active => "active",
-            Self::Paused => "paused",
-            Self::Deleted => "deleted",
-        }
-    }
-
-    pub fn parse(s: &str) -> Option<Self> {
-        match s {
-            "active" => Some(Self::Active),
-            "paused" => Some(Self::Paused),
-            "deleted" => Some(Self::Deleted),
-            _ => None,
-        }
-    }
-}
-
-impl std::fmt::Display for ScheduleStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.as_str())
-    }
-}
-
-/// A workflow schedule (recurring execution).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct WorkflowSchedule {
-    pub id: Uuid,
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: String,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub jitter_seconds: i64,
-    pub input_payload: Option<Vec<u8>>,
-    pub status: String,
-    pub next_run_at: Option<DateTime<Utc>>,
-    pub last_run_at: Option<DateTime<Utc>>,
-    pub last_instance_id: Option<Uuid>,
-    pub created_at: DateTime<Utc>,
-    pub updated_at: DateTime<Utc>,
-    pub priority: i32,
-    pub allow_duplicate: bool,
-}
-
-impl WorkflowSchedule {
-    /// Get the schedule type as an enum.
-    pub fn schedule_type_enum(&self) -> Option<ScheduleType> {
-        ScheduleType::parse(&self.schedule_type)
-    }
-
-    /// Get the status as an enum.
-    pub fn status_enum(&self) -> Option<ScheduleStatus> {
-        ScheduleStatus::parse(&self.status)
-    }
-}
-
-/// Parameters for creating a schedule.
-#[derive(Debug, Clone)]
-pub struct CreateScheduleParams {
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: ScheduleType,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub jitter_seconds: i64,
-    pub input_payload: Option<Vec<u8>>,
-    pub priority: i32,
-    pub allow_duplicate: bool,
-}
diff --git a/crates/waymark/src/scheduler/utils.rs b/crates/waymark/src/scheduler/utils.rs
deleted file mode 100644
index 4530329f..00000000
--- a/crates/waymark/src/scheduler/utils.rs
+++ /dev/null
@@ -1,181 +0,0 @@
-//! Cron and interval schedule utilities.
-//!
-//! This module provides utilities for computing the next run time for
-//! cron expressions and fixed intervals.
-//!
-//! Note: This module accepts standard 5-field Unix cron expressions
-//! (minute, hour, day-of-month, month, day-of-week) and converts them
-//! to 6-field format (with seconds) for the `cron` crate.
-
-use chrono::{DateTime, Utc};
-use cron::Schedule;
-use rand::Rng;
-use std::str::FromStr;
-
-use super::ScheduleType;
-
-/// Convert a 5-field Unix cron expression to 6-field format.
-///
-/// The `cron` crate requires 6 fields (sec min hour dom month dow),
-/// but standard Unix cron uses 5 fields (min hour dom month dow).
-/// This function prepends "0 " to run at second 0 of each match.
-fn normalize_cron_expr(cron_expr: &str) -> String {
-    let fields: Vec<&str> = cron_expr.split_whitespace().collect();
-    if fields.len() == 5 {
-        // Standard 5-field cron: prepend "0" for seconds
-        format!("0 {}", cron_expr)
-    } else {
-        // Already 6+ fields, use as-is
-        cron_expr.to_string()
-    }
-}
-
-/// Compute the next run time for a cron expression.
-///
-/// Accepts standard 5-field Unix cron expressions (e.g., "0 * * * *" for hourly)
-/// or 6-field expressions with seconds.
-///
-/// Returns the next occurrence after the current time (UTC).
-pub fn next_cron_run(cron_expr: &str) -> Result<DateTime<Utc>, String> {
-    let normalized = normalize_cron_expr(cron_expr);
-    let schedule = Schedule::from_str(&normalized)
-        .map_err(|e| format!("Invalid cron expression '{}': {}", cron_expr, e))?;
-    schedule
-        .upcoming(Utc)
-        .next()
-        .ok_or_else(|| "No upcoming schedule found".to_string())
-}
-
-/// Compute the next run time for an interval-based schedule.
-///
-/// If `last_run_at` is provided, the next run is `last_run_at + interval_seconds`.
-/// Otherwise, the next run is `now + interval_seconds`.
-pub fn next_interval_run(
-    interval_seconds: i64,
-    last_run_at: Option<DateTime<Utc>>,
-) -> DateTime<Utc> {
-    let base = last_run_at.unwrap_or_else(Utc::now);
-    base + chrono::Duration::seconds(interval_seconds)
-}
-
-/// Validate a cron expression without computing the next run.
-///
-/// Accepts standard 5-field Unix cron expressions or 6-field expressions.
-pub fn validate_cron(cron_expr: &str) -> Result<(), String> {
-    let normalized = normalize_cron_expr(cron_expr);
-    Schedule::from_str(&normalized)
-        .map(|_| ())
-        .map_err(|e| format!("Invalid cron expression '{}': {}", cron_expr, e))
-}
-
-/// Apply a random jitter delay (in seconds) to a scheduled time.
-///
-/// If `jitter_seconds` is 0, the base time is returned unchanged.
-pub fn apply_jitter(base: DateTime<Utc>, jitter_seconds: i64) -> Result<DateTime<Utc>, String> {
-    if jitter_seconds < 0 {
-        return Err("jitter_seconds must be non-negative".to_string());
-    }
-    if jitter_seconds == 0 {
-        return Ok(base);
-    }
-    let jitter = rand::thread_rng().gen_range(0..=jitter_seconds);
-    Ok(base + chrono::Duration::seconds(jitter))
-}
-
-/// Compute the next run time for a schedule type with optional jitter.
-pub fn compute_next_run(
-    schedule_type: ScheduleType,
-    cron_expression: Option<&str>,
-    interval_seconds: Option<i64>,
-    jitter_seconds: i64,
-    last_run_at: Option<DateTime<Utc>>,
-) -> Result<DateTime<Utc>, String> {
-    let base = match schedule_type {
-        ScheduleType::Cron => {
-            let expr = cron_expression.ok_or_else(|| "cron expression required".to_string())?;
-            next_cron_run(expr)?
-        }
-        ScheduleType::Interval => {
-            let seconds =
-                interval_seconds.ok_or_else(|| "interval_seconds required".to_string())?;
-            next_interval_run(seconds, last_run_at)
-        }
-    };
-
-    apply_jitter(base, jitter_seconds)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_normalize_cron_expr() {
-        // 5-field should get "0 " prepended
-        assert_eq!(normalize_cron_expr("* * * * *"), "0 * * * * *");
-        assert_eq!(normalize_cron_expr("0 * * * *"), "0 0 * * * *");
-
-        // 6-field should remain unchanged
-        assert_eq!(normalize_cron_expr("0 0 * * * *"), "0 0 * * * *");
-    }
-
-    #[test]
-    fn test_valid_cron_expression() {
-        // Standard 5-field Unix cron expressions
-        assert!(validate_cron("0 * * * *").is_ok());
-        assert!(validate_cron("0 0 * * *").is_ok());
-        assert!(validate_cron("* * * * *").is_ok());
-
-        // 6-field expression with seconds
-        assert!(validate_cron("0 0 * * * *").is_ok());
-    }
-
-    #[test]
-    fn test_invalid_cron_expression() {
-        assert!(validate_cron("invalid").is_err());
-        assert!(validate_cron("").is_err());
-    }
-
-    #[test]
-    fn test_next_cron_run() {
-        // Every minute should return a time in the future
-        let next = next_cron_run("* * * * *").unwrap();
-        assert!(next > Utc::now());
-    }
-
-    #[test]
-    fn test_next_interval_run_from_now() {
-        let before = Utc::now();
-        let next = next_interval_run(3600, None);
-        let after = Utc::now();
-
-        // Should be approximately 1 hour from now
-        assert!(next >= before + chrono::Duration::seconds(3600));
-        assert!(next <= after + chrono::Duration::seconds(3600));
-    }
-
-    #[test]
-    fn test_next_interval_run_from_last() {
-        let last_run = Utc::now() - chrono::Duration::seconds(1800);
-        let next = next_interval_run(3600, Some(last_run));
-
-        // Should be 1 hour after last_run (30 minutes from now)
-        let expected = last_run + chrono::Duration::seconds(3600);
-        assert_eq!(next, expected);
-    }
-
-    #[test]
-    fn test_apply_jitter_zero() {
-        let base = Utc::now();
-        let jittered = apply_jitter(base, 0).unwrap();
-        assert_eq!(jittered, base);
-    }
-
-    #[test]
-    fn test_apply_jitter_range() {
-        let base = Utc::now();
-        let jittered = apply_jitter(base, 5).unwrap();
-        assert!(jittered >= base);
-        assert!(jittered <= base + chrono::Duration::seconds(5));
-    }
-}
diff --git a/crates/waymark/src/test_support/mod.rs b/crates/waymark/src/test_support/mod.rs
deleted file mode 100644
index 5e34abaa..00000000
--- a/crates/waymark/src/test_support/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-//! Shared test fixtures for Rust tests.
-
-mod postgres;
-
-pub use postgres::postgres_setup;
diff --git a/crates/waymark/src/test_support/postgres.rs b/crates/waymark/src/test_support/postgres.rs
deleted file mode 100644
index 1fb9e50f..00000000
--- a/crates/waymark/src/test_support/postgres.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-//! Shared Postgres fixture bootstrapped from root docker-compose.
-
-use sqlx::PgPool;
-
-use crate::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
-
-/// Ensure test Postgres is available and migrated, then return a pooled connection.
-pub async fn postgres_setup() -> PgPool {
-    ensure_local_postgres()
-        .await
-        .unwrap_or_else(|err| panic!("postgres_setup bootstrap failed: {err:#}"));
-    connect_pool(LOCAL_POSTGRES_DSN)
-        .await
-        .unwrap_or_else(|err| panic!("postgres_setup connect failed: {err:#}"))
-}
diff --git a/crates/waymark/src/waymark_core/cli/benchmark.rs b/crates/waymark/src/waymark_core/cli/benchmark.rs
index 99558eea..241afdd1 100644
--- a/crates/waymark/src/waymark_core/cli/benchmark.rs
+++ b/crates/waymark/src/waymark_core/cli/benchmark.rs
@@ -12,12 +12,11 @@ use serde_json::Value;
 use sha2::{Digest, Sha256};
 use sqlx::PgPool;
 use uuid::Uuid;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres};
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
-use crate::backends::{
-    PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
-use crate::db;
-use crate::integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres};
 use crate::messages::ast as ir;
 use crate::observability::obs;
 use crate::waymark_core::cli::smoke::{
@@ -25,9 +24,9 @@ use crate::waymark_core::cli::smoke::{
     build_try_except_program, build_while_loop_program, literal_from_value,
 };
 use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use crate::waymark_core::runner::RunnerState;
 use crate::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError};
 use waymark_dag::convert_to_dag;
+use waymark_runner_state::RunnerState;
 
 const DEFAULT_DSN: &str = LOCAL_POSTGRES_DSN;
 const DEFAULT_MAX_CONCURRENT_INSTANCES: usize = 500;
@@ -318,7 +317,9 @@ async fn run_benchmark(
     }
     let pool = PgPool::connect(dsn).await.expect("connect postgres");
     drop_benchmark_tables(&pool).await;
-    db::run_migrations(&pool).await.expect("run migrations");
+    waymark_backend_postgres_migrations::run(&pool)
+        .await
+        .expect("run migrations");
     let backend = PostgresBackend::new(pool);
     backend.clear_all().await.expect("clear all");
     let total = queue_benchmark_instances(&backend, &cases, count_per_case, batch_size).await;
diff --git a/crates/waymark/src/waymark_core/cli/smoke.rs b/crates/waymark/src/waymark_core/cli/smoke.rs
index abd34109..bb5a49c3 100644
--- a/crates/waymark/src/waymark_core/cli/smoke.rs
+++ b/crates/waymark/src/waymark_core/cli/smoke.rs
@@ -11,18 +11,18 @@ use prost::Message;
 use serde_json::Value;
 use sha2::{Digest, Sha256};
 use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
-use crate::backends::{
-    MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
 use crate::messages::ast as ir;
 use crate::waymark_core::dag_viz::render_dag_image;
 use crate::waymark_core::ir_format::format_program;
 use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use crate::waymark_core::runner::RunnerState;
 use crate::workers::{PythonWorkerConfig, RemoteWorkerPool};
 use waymark_dag::convert_to_dag;
 use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
 
 #[derive(Parser, Debug)]
 #[command(name = "waymark-smoke", about = "Smoke check core-python components.")]
diff --git a/crates/waymark/src/waymark_core/ir_format.rs b/crates/waymark/src/waymark_core/ir_format.rs
index 57a9460a..45795f0e 100644
--- a/crates/waymark/src/waymark_core/ir_format.rs
+++ b/crates/waymark/src/waymark_core/ir_format.rs
@@ -569,7 +569,7 @@ pub fn format_program(program: &ir::Program) -> String {
 #[cfg(test)]
 mod tests {
     use super::{DEFAULT_INDENT, format_program};
-    use crate::waymark_core::ir_parser::IRParser;
+    use waymark_ir_parser::IRParser;
 
     #[test]
     fn test_format_program_happy_path() {
diff --git a/crates/waymark/src/waymark_core/lock.rs b/crates/waymark/src/waymark_core/lock.rs
index 6838407d..3f3acd5d 100644
--- a/crates/waymark/src/waymark_core/lock.rs
+++ b/crates/waymark/src/waymark_core/lock.rs
@@ -8,8 +8,7 @@ use chrono::{Duration as ChronoDuration, Utc};
 use uuid::Uuid;
 
 use tracing::{debug, info, warn};
-
-use crate::backends::{CoreBackend, LockClaim};
+use waymark_core_backend::LockClaim;
 
 #[derive(Clone)]
 pub struct InstanceLockTracker {
@@ -60,7 +59,7 @@ impl InstanceLockTracker {
 }
 
 pub fn spawn_lock_heartbeat(
-    backend: Arc<dyn CoreBackend>,
+    backend: Arc<dyn waymark_core_backend::CoreBackend>,
     tracker: InstanceLockTracker,
     heartbeat_interval: Duration,
     lock_ttl: Duration,
diff --git a/crates/waymark/src/waymark_core/mod.rs b/crates/waymark/src/waymark_core/mod.rs
index 5e3b9090..d5a4ec27 100644
--- a/crates/waymark/src/waymark_core/mod.rs
+++ b/crates/waymark/src/waymark_core/mod.rs
@@ -6,11 +6,8 @@ pub mod dag_viz;
 pub mod ir_format;
 pub mod lock;
 pub mod runloop;
-pub mod runner;
 
-pub use crate::backends::{InstanceDone, QueuedInstance};
 pub use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, InlineWorkerPool};
 pub use dag_viz::{build_dag_graph, render_dag_image};
 pub use ir_format::format_program;
 pub use runloop::RunLoop;
-pub use runner::RunnerState;
diff --git a/crates/waymark/src/waymark_core/runloop.rs b/crates/waymark/src/waymark_core/runloop.rs
index 6257bda0..407e4a3b 100644
--- a/crates/waymark/src/waymark_core/runloop.rs
+++ b/crates/waymark/src/waymark_core/runloop.rs
@@ -15,24 +15,26 @@ use serde_json::Value;
 use tokio::sync::mpsc;
 use tracing::{debug, error, info, warn};
 use uuid::Uuid;
-
-use crate::backends::{
-    ActionDone, BackendError, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus,
-    LockClaim, QueuedInstance, QueuedInstanceBatch, WorkflowRegistryBackend,
+use waymark_backends_core::BackendError;
+use waymark_core_backend::{
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
 };
+use waymark_workflow_registry_backend::WorkflowRegistryBackend;
+
 use crate::messages::ast as ir;
 use crate::observability::obs;
 use crate::waymark_core::commit_barrier::{CommitBarrier, DeferredInstanceEvent};
 use crate::waymark_core::lock::{InstanceLockTracker, spawn_lock_heartbeat};
-use crate::waymark_core::runner::synthetic_exceptions::{
+use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
+use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag};
+use waymark_runner::synthetic_exceptions::{
     SyntheticExceptionType, build_synthetic_exception_value,
 };
-use crate::waymark_core::runner::{
+use waymark_runner::{
     DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
     replay_variables,
 };
-use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
-use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag};
 
 /// Raised when the run loop cannot coordinate execution.
 #[derive(Debug, thiserror::Error)]
@@ -351,7 +353,7 @@ impl ShardExecutor {
 
 fn run_executor_shard(
     shard_id: usize,
-    backend: Arc<dyn CoreBackend>,
+    backend: Arc<dyn waymark_core_backend::CoreBackend>,
     receiver: std_mpsc::Receiver<ShardCommand>,
     sender: mpsc::UnboundedSender<ShardEvent>,
 ) {
@@ -530,7 +532,7 @@ fn run_executor_shard(
 /// Run loop that fans out executor work across CPU-bound shard threads.
 pub struct RunLoop {
     worker_pool: Arc<dyn BaseWorkerPool>,
-    core_backend: Arc<dyn CoreBackend>,
+    core_backend: Arc<dyn waymark_core_backend::CoreBackend>,
     registry_backend: Arc<dyn WorkflowRegistryBackend>,
     workflow_cache: HashMap<Uuid, Arc<DAG>>,
     max_concurrent_instances: usize,
@@ -566,7 +568,7 @@ pub struct RunLoopSupervisorConfig {
 impl RunLoop {
     pub fn new(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
     ) -> Self {
         Self::new_internal(
@@ -580,7 +582,7 @@ impl RunLoop {
 
     pub fn new_with_shutdown(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
         shutdown_token: tokio_util::sync::CancellationToken,
     ) -> Self {
@@ -589,14 +591,14 @@ impl RunLoop {
 
     fn new_internal(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
         shutdown_token: tokio_util::sync::CancellationToken,
         exit_on_idle: bool,
     ) -> Self {
         let max_concurrent_instances = std::cmp::max(1, config.max_concurrent_instances);
         let backend = Arc::new(backend);
-        let core_backend: Arc<dyn CoreBackend> = backend.clone();
+        let core_backend: Arc<dyn waymark_core_backend::CoreBackend> = backend.clone();
         let registry_backend: Arc<dyn WorkflowRegistryBackend> = backend;
         Self {
             worker_pool: Arc::new(worker_pool),
@@ -1766,7 +1768,7 @@ pub async fn runloop_supervisor<B, W>(
     config: RunLoopSupervisorConfig,
     shutdown_token: tokio_util::sync::CancellationToken,
 ) where
-    B: CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static,
+    B: waymark_core_backend::CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static,
     W: BaseWorkerPool + Clone + Send + Sync + 'static,
 {
     let mut backoff = Duration::from_millis(200);
diff --git a/crates/waymark/src/waymark_core/runloop/tests.rs b/crates/waymark/src/waymark_core/runloop/tests.rs
index 24b0ea34..53d6c634 100644
--- a/crates/waymark/src/waymark_core/runloop/tests.rs
+++ b/crates/waymark/src/waymark_core/runloop/tests.rs
@@ -1,141 +1,23 @@
 use super::*;
 use std::collections::{HashMap, VecDeque};
-use std::sync::{
-    Arc, Mutex,
-    atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
-};
+use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
 use chrono::Utc;
 use prost::Message;
 use sha2::{Digest, Sha256};
-use tonic::async_trait;
+use waymark_backend_fault_injection::FaultInjectingBackend;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::{ActionAttemptStatus, CoreBackend};
+use waymark_workflow_registry_backend::WorkflowRegistration;
 
-use crate::backends::{
-    ActionAttemptStatus, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone,
-    InstanceLockStatus, LockClaim, MemoryBackend, QueuedInstanceBatch, WorkflowRegistration,
-    WorkflowRegistryBackend, WorkflowVersion,
-};
 use crate::messages::ast as ir;
-use crate::waymark_core::ir_parser::parse_program;
-use crate::waymark_core::runner::RunnerState;
-use crate::waymark_core::runner::state::NodeStatus;
 use crate::workers::ActionCallable;
-use waymark_dag::convert_to_dag;
-
-#[derive(Clone)]
-struct FaultInjectingBackend {
-    inner: MemoryBackend,
-    fail_get_queued_instances_with_depth_limit: Arc<AtomicBool>,
-    get_queued_instances_calls: Arc<AtomicUsize>,
-}
-
-impl FaultInjectingBackend {
-    fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self {
-        Self {
-            inner,
-            fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)),
-            get_queued_instances_calls: Arc::new(AtomicUsize::new(0)),
-        }
-    }
-
-    fn get_queued_instances_calls(&self) -> usize {
-        self.get_queued_instances_calls.load(AtomicOrdering::SeqCst)
-    }
-
-    fn queue_len(&self) -> usize {
-        self.inner
-            .instance_queue()
-            .as_ref()
-            .map(|queue| queue.lock().expect("queue poisoned").len())
-            .unwrap_or(0)
-    }
-
-    fn instances_done_len(&self) -> usize {
-        self.inner.instances_done().len()
-    }
-}
-
-#[async_trait]
-impl CoreBackend for FaultInjectingBackend {
-    fn clone_box(&self) -> Box<dyn CoreBackend> {
-        Box::new(self.clone())
-    }
-
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        self.inner.save_graphs(claim, graphs).await
-    }
-
-    async fn save_actions_done(
-        &self,
-        actions: &[crate::backends::ActionDone],
-    ) -> BackendResult<()> {
-        self.inner.save_actions_done(actions).await
-    }
 
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        self.inner.save_instances_done(instances).await
-    }
-
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        self.get_queued_instances_calls
-            .fetch_add(1, AtomicOrdering::SeqCst);
-        if self
-            .fail_get_queued_instances_with_depth_limit
-            .load(AtomicOrdering::SeqCst)
-        {
-            return Err(BackendError::Message("depth limit exceeded".to_string()));
-        }
-        self.inner.get_queued_instances(size, claim).await
-    }
-
-    async fn queue_instances(
-        &self,
-        instances: &[crate::backends::QueuedInstance],
-    ) -> BackendResult<()> {
-        self.inner.queue_instances(instances).await
-    }
-
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        self.inner.refresh_instance_locks(claim, instance_ids).await
-    }
-
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()> {
-        self.inner
-            .release_instance_locks(lock_uuid, instance_ids)
-            .await
-    }
-}
-
-#[async_trait]
-impl WorkflowRegistryBackend for FaultInjectingBackend {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid> {
-        self.inner.upsert_workflow_version(registration).await
-    }
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
-        self.inner.get_workflow_versions(ids).await
-    }
-}
+use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
+use waymark_runner_state::NodeStatus;
+use waymark_runner_state::RunnerState;
 
 fn default_test_config(lock_uuid: Uuid) -> RunLoopSupervisorConfig {
     RunLoopSupervisorConfig {
diff --git a/crates/waymark/src/waymark_core/runner/executor.rs b/crates/waymark/src/waymark_core/runner/executor.rs
deleted file mode 100644
index e0572688..00000000
--- a/crates/waymark/src/waymark_core/runner/executor.rs
+++ /dev/null
@@ -1,3031 +0,0 @@
-//! Incremental DAG executor for runner state graphs.
-
-use std::cell::RefCell;
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-use std::time::Duration;
-
-use chrono::{DateTime, Utc};
-use rustc_hash::FxHashMap;
-use serde_json::Value;
-use uuid::Uuid;
-
-use crate::backends::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate};
-use crate::messages::ast as ir;
-use crate::observability::obs;
-use crate::waymark_core::runner::expression_evaluator::is_exception_value;
-use crate::waymark_core::runner::retry::{
-    RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies,
-};
-use crate::waymark_core::runner::state::{
-    ActionCallSpec, ExecutionEdge, ExecutionNode, ExecutionNodeType, IndexValue, ListValue,
-    LiteralValue, NodeStatus, QueueNodeParams, RunnerState, RunnerStateError,
-};
-use crate::waymark_core::runner::synthetic_exceptions::{
-    SyntheticExceptionType, build_synthetic_exception_value,
-};
-use crate::waymark_core::runner::value_visitor::ValueExpr;
-use waymark_dag::{
-    ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType,
-};
-
-/// Raised when the runner executor cannot advance safely.
-#[derive(Debug, thiserror::Error)]
-#[error("{0}")]
-pub struct RunnerExecutorError(pub String);
-
-#[derive(Clone, Debug)]
-/// Persistence payloads required before dispatching new actions.
-/// These need to be written to the backends in order to ensure that we can mark any
-/// inflight actions as failed before queuing them up again
-pub struct DurableUpdates {
-    pub actions_done: Vec<ActionDone>,
-    pub graph_updates: Vec<GraphUpdate>,
-}
-
-#[derive(Clone, Debug)]
-/// Return value for executor steps with newly queued action nodes.
-pub struct ExecutorStep {
-    pub actions: Vec<ExecutionNode>,
-    pub sleep_requests: Vec<SleepRequest>,
-    pub updates: Option<DurableUpdates>,
-}
-
-#[derive(Clone, Debug)]
-/// Sleep requests emitted by the executor with wake-up times.
-pub struct SleepRequest {
-    pub node_id: Uuid,
-    pub wake_at: DateTime<Utc>,
-}
-
-/// Action result payloads keyed by execution node id.
-type ExecutionResultMap = HashMap<Uuid, Value>;
-
-struct FinishedNodeOutcome {
-    /// Node to continue graph traversal from.
-    start: Option<ExecutionNode>,
-    /// Exception payload forwarded to exception edges.
-    exception_value: Option<Value>,
-    /// Durable attempt metadata for this finished action (if applicable).
-    action_done: Option<ActionDone>,
-    /// Retry action to dispatch immediately after state transition.
-    retry_action: Option<ExecutionNode>,
-}
-
-#[derive(Default)]
-struct IncrementAccumulator {
-    actions_done: Vec<ActionDone>,
-    pending_starts: Vec<(ExecutionNode, Option<Value>)>,
-    actions: Vec<ExecutionNode>,
-    sleep_requests: Vec<SleepRequest>,
-    seen_actions: HashSet<Uuid>,
-    seen_sleep_nodes: HashSet<Uuid>,
-}
-
-impl IncrementAccumulator {
-    fn absorb_finished_outcome(&mut self, outcome: FinishedNodeOutcome) {
-        if let Some(start) = outcome.start {
-            self.pending_starts.push((start, outcome.exception_value));
-        }
-        if let Some(done) = outcome.action_done {
-            self.actions_done.push(done);
-        }
-        if let Some(retry_action) = outcome.retry_action {
-            self.record_action(retry_action);
-        }
-    }
-
-    fn record_action(&mut self, action: ExecutionNode) {
-        // Multiple finished nodes can converge on the same queued action.
-        if self.seen_actions.insert(action.node_id) {
-            self.actions.push(action);
-        }
-    }
-
-    fn record_sleep_request(&mut self, sleep_request: SleepRequest) {
-        if self.seen_sleep_nodes.insert(sleep_request.node_id) {
-            self.sleep_requests.push(sleep_request);
-        }
-    }
-}
-
-struct WalkOutcome {
-    actions: Vec<ExecutionNode>,
-    sleep_requests: Vec<SleepRequest>,
-}
-
-struct FinishedActionMetadata {
-    attempt: i32,
-    started_at: Option<DateTime<Utc>>,
-    result: Value,
-}
-
-enum ActionFailureTransition {
-    RetryQueued(Box<ExecutionNode>),
-    Failed,
-}
-
-enum TemplateKind {
-    SpreadAction(Box<ActionCallNode>),
-    Aggregator(String),
-    Regular(String),
-}
-
-enum SleepDecision {
-    Completed,
-    Blocked(DateTime<Utc>),
-}
-
-/// Advance a DAG template using the current runner state and action results.
-///
-/// The executor treats the DAG as a control-flow template. It queues runtime
-/// execution nodes into RunnerState, unrolling loops/spreads into explicit
-/// iterations, and stops when it encounters action calls that must be executed
-/// by an external worker.
-///
-/// This serves as a runner supervisor for a single instance that's owned
-/// in memory by our logic.
-///
-/// Each call to increment() starts from finished execution nodes, walks
-/// downstream through inline nodes (assignments, branches, joins, etc.), and
-/// returns any newly queued action nodes that are now unblocked.
-pub struct RunnerExecutor {
-    dag: Arc<DAG>,
-    state: RunnerState,
-    action_results: ExecutionResultMap,
-    backend: Option<Arc<dyn CoreBackend>>,
-    template_index: DagEdgeIndex,
-    incoming_exec_edges: FxHashMap<Uuid, Vec<ExecutionEdge>>,
-    /// Index: template_id -> list of execution node IDs with that template
-    template_to_exec_nodes: FxHashMap<String, Vec<Uuid>>,
-    /// Cached assignment evaluations for the current increment pass.
-    /// Cleared at the start of each increment call.
-    eval_cache: RefCell<FxHashMap<(Uuid, String), Value>>,
-    instance_id: Option<Uuid>,
-    terminal_error: Option<Value>,
-}
-
-impl RunnerExecutor {
-    pub fn new(
-        dag: Arc<DAG>,
-        state: RunnerState,
-        // Action results keyed by execution node id.
-        action_results: ExecutionResultMap,
-        backend: Option<Arc<dyn CoreBackend>>,
-    ) -> Self {
-        let mut state = state;
-        state.dag = Some(dag.clone());
-        state.set_link_queued_nodes(false);
-
-        let template_index = dag.edge_index();
-        let incoming_exec_edges = Self::build_incoming_exec_edges(&state);
-        let template_to_exec_nodes = Self::build_template_to_exec_nodes(&state);
-
-        Self {
-            dag,
-            state,
-            action_results,
-            backend,
-            template_index,
-            incoming_exec_edges,
-            template_to_exec_nodes,
-            eval_cache: RefCell::new(FxHashMap::default()),
-            instance_id: None,
-            terminal_error: None,
-        }
-    }
-
-    pub fn state(&self) -> &RunnerState {
-        &self.state
-    }
-
-    pub fn state_mut(&mut self) -> &mut RunnerState {
-        &mut self.state
-    }
-
-    pub fn dag(&self) -> &DAG {
-        &self.dag
-    }
-
-    pub fn action_results(&self) -> &ExecutionResultMap {
-        &self.action_results
-    }
-
-    pub fn instance_id(&self) -> Option<Uuid> {
-        self.instance_id
-    }
-
-    pub fn set_instance_id(&mut self, instance_id: Uuid) {
-        self.instance_id = Some(instance_id);
-    }
-
-    pub fn terminal_error(&self) -> Option<&Value> {
-        self.terminal_error.as_ref()
-    }
-
-    pub(super) fn eval_cache_get(&self, key: &(Uuid, String)) -> Option<Value> {
-        self.eval_cache.borrow().get(key).cloned()
-    }
-
-    pub(super) fn eval_cache_insert(&self, key: (Uuid, String), value: Value) {
-        self.eval_cache.borrow_mut().insert(key, value);
-    }
-
-    /// Store an action result value for a specific execution node id.
-    pub fn set_action_result(&mut self, node_id: Uuid, result: Value) {
-        self.action_results.insert(node_id, result);
-    }
-
-    /// Remove any cached action result for a specific execution node.
-    /// Used when re-queuing an action so we don't replay stale results.
-    pub fn clear_action_result(&mut self, node_id: Uuid) {
-        self.action_results.remove(&node_id);
-    }
-
-    /// Resolve timeout policy seconds for an action node.
-    pub fn action_timeout_seconds(&self, node_id: Uuid) -> Result<u32, RunnerExecutorError> {
-        let node = self.execution_node(node_id)?;
-        if !node.is_action_call() {
-            return Ok(0);
-        }
-        let Some(action_template) = self.template_action_for_execution_node(node)? else {
-            return Ok(0);
-        };
-        Ok(timeout_seconds_from_policies(&action_template.policies).unwrap_or(0))
-    }
-
-    /// Fail inflight actions and return any that should be retried.
-    ///
-    /// Use this after recovering from a crash: running actions are treated as
-    /// failed, their attempt counter is incremented if retry policies allow,
-    /// and retryable nodes are re-queued for execution.
-    pub fn resume(&mut self) -> Result<ExecutorStep, RunnerExecutorError> {
-        let mut finished_nodes = Vec::new();
-        for (node_id, node) in &self.state.nodes {
-            if node.is_action_call() && node.status == NodeStatus::Running {
-                finished_nodes.push(*node_id);
-                self.action_results.insert(
-                    *node_id,
-                    build_synthetic_exception_value(
-                        SyntheticExceptionType::ExecutorResume,
-                        format!(
-                            "action {node_id} was running during resume and is treated as failed"
-                        ),
-                        Vec::new(),
-                    ),
-                );
-            }
-        }
-        if finished_nodes.is_empty() {
-            let updates = self.collect_updates(Vec::new())?;
-            return Ok(ExecutorStep {
-                actions: Vec::new(),
-                sleep_requests: Vec::new(),
-                updates,
-            });
-        }
-        self.increment(&finished_nodes)
-    }
-
-    /// Advance execution for finished nodes in a single batch.
-    ///
-    /// Use this when multiple actions complete in the same tick so the graph
-    /// update and action inserts are persisted together.
-    #[obs]
-    pub fn increment(
-        &mut self,
-        finished_nodes: &[Uuid],
-    ) -> Result<ExecutorStep, RunnerExecutorError> {
-        self.eval_cache.borrow_mut().clear();
-        let mut accum = IncrementAccumulator::default();
-        self.collect_increment_results(finished_nodes, &mut accum)?;
-        self.walk_pending_starts(&mut accum)?;
-
-        let IncrementAccumulator {
-            actions_done,
-            actions,
-            sleep_requests,
-            ..
-        } = accum;
-        let running_actions = self.mark_actions_running(&actions)?;
-        let updates = self.collect_updates(actions_done)?;
-
-        // Note: Action timeouts and delayed retries require wall-clock tracking in the run loop.
-        // The executor only handles timeout failures once they surface as action results.
-
-        Ok(ExecutorStep {
-            actions: running_actions,
-            sleep_requests,
-            updates,
-        })
-    }
-
-    fn collect_increment_results(
-        &mut self,
-        finished_nodes: &[Uuid],
-        accum: &mut IncrementAccumulator,
-    ) -> Result<(), RunnerExecutorError> {
-        for &node_id in finished_nodes {
-            accum.absorb_finished_outcome(self.apply_finished_node(node_id)?);
-        }
-        Ok(())
-    }
-
-    fn walk_pending_starts(
-        &mut self,
-        accum: &mut IncrementAccumulator,
-    ) -> Result<(), RunnerExecutorError> {
-        while let Some((start, exception_value)) = accum.pending_starts.pop() {
-            let outcome = self.walk_from(start, exception_value)?;
-            for action in outcome.actions {
-                accum.record_action(action);
-            }
-            for sleep_request in outcome.sleep_requests {
-                accum.record_sleep_request(sleep_request);
-            }
-        }
-        Ok(())
-    }
-
-    fn mark_actions_running(
-        &mut self,
-        actions: &[ExecutionNode],
-    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
-        let mut running_actions = Vec::with_capacity(actions.len());
-        for action in actions {
-            self.clear_action_result(action.node_id);
-            self.state
-                .mark_running(action.node_id)
-                .map_err(Self::state_error)?;
-            running_actions.push(self.execution_node_clone(action.node_id)?);
-        }
-        Ok(running_actions)
-    }
-
-    /// Walk downstream from a node, executing inline nodes until blocked by an action node.
-    #[obs]
-    fn walk_from(
-        &mut self,
-        node: ExecutionNode,
-        exception_value: Option<Value>,
-    ) -> Result<WalkOutcome, RunnerExecutorError> {
-        let mut pending = vec![(node, exception_value)];
-        let mut actions = Vec::new();
-        let mut sleep_requests = Vec::new();
-        let mut forwarded_completed: HashSet<Uuid> = HashSet::new();
-
-        while let Some((current, current_exception)) = pending.pop() {
-            // template_id is the DAG node id, not the execution id.
-            let template_node_id = match &current.template_id {
-                Some(id) => id,
-                None => continue,
-            };
-            let edges = if let Some(template_edges) = self.template_index.outgoing(template_node_id)
-            {
-                self.select_edges(template_edges, &current, current_exception)?
-            } else {
-                continue;
-            };
-            for edge in edges {
-                let successors = self.queue_successor(&current, &edge)?;
-                for successor in successors {
-                    self.handle_walk_successor(
-                        successor,
-                        &mut pending,
-                        &mut actions,
-                        &mut sleep_requests,
-                        &mut forwarded_completed,
-                    )?;
-                }
-            }
-        }
-        Ok(WalkOutcome {
-            actions,
-            sleep_requests,
-        })
-    }
-
-    fn handle_walk_successor(
-        &mut self,
-        successor: ExecutionNode,
-        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
-        actions: &mut Vec<ExecutionNode>,
-        sleep_requests: &mut Vec<SleepRequest>,
-        forwarded_completed: &mut HashSet<Uuid>,
-    ) -> Result<(), RunnerExecutorError> {
-        if self.forward_completed_successor(&successor, pending, forwarded_completed) {
-            return Ok(());
-        }
-        if successor.is_action_call() {
-            actions.push(successor);
-            return Ok(());
-        }
-        if successor.is_sleep() {
-            self.handle_sleep_successor(successor, pending, sleep_requests)?;
-            return Ok(());
-        }
-        self.handle_inline_successor(successor, pending)
-    }
-
-    fn forward_completed_successor(
-        &self,
-        successor: &ExecutionNode,
-        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
-        forwarded_completed: &mut HashSet<Uuid>,
-    ) -> bool {
-        if successor.status != NodeStatus::Completed {
-            return false;
-        }
-        if forwarded_completed.insert(successor.node_id) {
-            // Rehydrated runs can revisit completed paths to recover downstream
-            // sleep/action work without mutating already completed nodes.
-            pending.push((successor.clone(), None));
-        }
-        true
-    }
-
-    fn handle_sleep_successor(
-        &mut self,
-        successor: ExecutionNode,
-        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
-        sleep_requests: &mut Vec<SleepRequest>,
-    ) -> Result<(), RunnerExecutorError> {
-        if !self.inline_ready(&successor) {
-            return Ok(());
-        }
-        match self.handle_sleep_node(&successor)? {
-            SleepDecision::Completed => pending.push((successor, None)),
-            SleepDecision::Blocked(wake_at) => sleep_requests.push(SleepRequest {
-                node_id: successor.node_id,
-                wake_at,
-            }),
-        }
-        Ok(())
-    }
-
-    fn handle_inline_successor(
-        &mut self,
-        successor: ExecutionNode,
-        pending: &mut Vec<(ExecutionNode, Option<Value>)>,
-    ) -> Result<(), RunnerExecutorError> {
-        if !self.inline_ready(&successor) {
-            return Ok(());
-        }
-        self.execute_inline_node(&successor)?;
-        pending.push((successor, None));
-        Ok(())
-    }
-
-    /// Update state for a finished node and return replay metadata.
-    #[obs]
-    fn apply_finished_node(
-        &mut self,
-        node_id: Uuid,
-    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
-        if self.execution_node(node_id)?.is_action_call() {
-            return self.apply_finished_action_node(node_id);
-        }
-        // Non-action nodes are inline runtime steps; completion is a status flip.
-        self.state
-            .mark_completed(node_id)
-            .map_err(Self::state_error)?;
-        Ok(FinishedNodeOutcome {
-            start: Some(self.execution_node_clone(node_id)?),
-            exception_value: None,
-            action_done: None,
-            retry_action: None,
-        })
-    }
-
-    fn apply_finished_action_node(
-        &mut self,
-        node_id: Uuid,
-    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
-        let metadata = self.finished_action_metadata(node_id)?;
-        if is_exception_value(&metadata.result) {
-            return self.apply_exception_action_completion(node_id, metadata);
-        }
-        self.apply_successful_action_completion(node_id, metadata)
-    }
-
-    fn finished_action_metadata(
-        &self,
-        node_id: Uuid,
-    ) -> Result<FinishedActionMetadata, RunnerExecutorError> {
-        let node = self.execution_node(node_id)?;
-        let result =
-            self.action_results.get(&node_id).cloned().ok_or_else(|| {
-                RunnerExecutorError(format!("missing action result for {node_id}"))
-            })?;
-        Ok(FinishedActionMetadata {
-            attempt: node.action_attempt,
-            started_at: node.started_at,
-            result,
-        })
-    }
-
-    fn apply_successful_action_completion(
-        &mut self,
-        node_id: Uuid,
-        metadata: FinishedActionMetadata,
-    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
-        self.state
-            .mark_completed(node_id)
-            .map_err(Self::state_error)?;
-        let assignments = self.execution_node(node_id)?.assignments.clone();
-        if !assignments.is_empty() {
-            self.state.mark_latest_assignments(node_id, &assignments);
-        }
-        let completed_at = self
-            .execution_node(node_id)?
-            .completed_at
-            .unwrap_or_else(Utc::now);
-        let action_done = build_action_done(
-            node_id,
-            metadata.attempt,
-            ActionAttemptStatus::Completed,
-            metadata.started_at,
-            completed_at,
-            metadata.result,
-        );
-        Ok(FinishedNodeOutcome {
-            start: Some(self.execution_node_clone(node_id)?),
-            exception_value: None,
-            action_done: Some(action_done),
-            retry_action: None,
-        })
-    }
-
-    fn apply_exception_action_completion(
-        &mut self,
-        node_id: Uuid,
-        metadata: FinishedActionMetadata,
-    ) -> Result<FinishedNodeOutcome, RunnerExecutorError> {
-        let exception_value = metadata.result;
-        let status = action_done_status_for_exception(&exception_value);
-        let finished_at = Utc::now();
-
-        match self.apply_action_failure_transition(node_id, Some(&exception_value), finished_at)? {
-            ActionFailureTransition::RetryQueued(retry_action) => {
-                // Retries are re-queued and dispatched in this same increment pass.
-                let action_done = build_action_done(
-                    node_id,
-                    metadata.attempt,
-                    status,
-                    metadata.started_at,
-                    finished_at,
-                    exception_value,
-                );
-                Ok(FinishedNodeOutcome {
-                    start: None,
-                    exception_value: None,
-                    action_done: Some(action_done),
-                    retry_action: Some(*retry_action),
-                })
-            }
-            ActionFailureTransition::Failed => {
-                // Terminal failures keep exception payloads on the node so exception
-                // handler edges can bind $__exception in downstream inline nodes.
-                if !self.failure_has_exception_handler(node_id, &exception_value)?
-                    && self.terminal_error.is_none()
-                {
-                    self.terminal_error = Some(exception_value.clone());
-                }
-                let completed_at = self
-                    .execution_node(node_id)?
-                    .completed_at
-                    .unwrap_or(finished_at);
-                let action_done = build_action_done(
-                    node_id,
-                    metadata.attempt,
-                    status,
-                    metadata.started_at,
-                    completed_at,
-                    exception_value.clone(),
-                );
-                Ok(FinishedNodeOutcome {
-                    start: Some(self.execution_node_clone(node_id)?),
-                    exception_value: Some(exception_value),
-                    action_done: Some(action_done),
-                    retry_action: None,
-                })
-            }
-        }
-    }
-
-    fn apply_action_failure_transition(
-        &mut self,
-        node_id: Uuid,
-        exception_value: Option<&Value>,
-        finished_at: DateTime<Utc>,
-    ) -> Result<ActionFailureTransition, RunnerExecutorError> {
-        let should_retry = {
-            let node = self.execution_node(node_id)?;
-            self.retry_decision(node, exception_value)?.should_retry
-        };
-        if should_retry {
-            let retry_node = self.transition_action_to_retry(node_id, finished_at)?;
-            return Ok(ActionFailureTransition::RetryQueued(Box::new(retry_node)));
-        }
-        self.transition_action_to_failed(node_id, exception_value, finished_at)?;
-        Ok(ActionFailureTransition::Failed)
-    }
-
-    fn transition_action_to_retry(
-        &mut self,
-        node_id: Uuid,
-        finished_at: DateTime<Utc>,
-    ) -> Result<ExecutionNode, RunnerExecutorError> {
-        // Retry transition invariants:
-        // 1) bump attempt counter before re-dispatch
-        // 2) return to queued status
-        // 3) keep completion timestamp for the failed attempt
-        self.state
-            .increment_action_attempt(node_id)
-            .map_err(Self::state_error)?;
-        let should_queue = !self.state.ready_queue.contains(&node_id);
-        {
-            let node = self.execution_node_mut(node_id)?;
-            node.status = NodeStatus::Queued;
-            node.started_at = None;
-            node.completed_at = Some(finished_at);
-        }
-        if should_queue {
-            self.state.ready_queue.push(node_id);
-        }
-        self.execution_node_clone(node_id)
-    }
-
-    fn transition_action_to_failed(
-        &mut self,
-        node_id: Uuid,
-        exception_value: Option<&Value>,
-        finished_at: DateTime<Utc>,
-    ) -> Result<(), RunnerExecutorError> {
-        self.state.mark_failed(node_id).map_err(Self::state_error)?;
-        self.execution_node_mut(node_id)?.completed_at = Some(finished_at);
-        if let Some(exception_value) = exception_value {
-            self.assign_exception_scope(node_id, exception_value.clone())?;
-        }
-        Ok(())
-    }
-
-    fn assign_exception_scope(
-        &mut self,
-        node_id: Uuid,
-        exception_value: Value,
-    ) -> Result<(), RunnerExecutorError> {
-        let exception_expr = ValueExpr::Literal(LiteralValue {
-            value: exception_value,
-        });
-        let mut exception_assignment = HashMap::new();
-        exception_assignment.insert(EXCEPTION_SCOPE_VAR.to_string(), exception_expr.clone());
-        self.execution_node_mut(node_id)?
-            .assignments
-            .insert(EXCEPTION_SCOPE_VAR.to_string(), exception_expr);
-        self.state
-            .mark_latest_assignments(node_id, &exception_assignment);
-        Ok(())
-    }
-
-    fn failure_has_exception_handler(
-        &self,
-        node_id: Uuid,
-        exception_value: &Value,
-    ) -> Result<bool, RunnerExecutorError> {
-        let node = self.execution_node(node_id)?;
-        let template_id = match &node.template_id {
-            Some(id) => id,
-            None => return Ok(false),
-        };
-        let template_edges = match self.template_index.outgoing(template_id) {
-            Some(edges) => edges,
-            None => return Ok(false),
-        };
-        let selected = self.select_edges(template_edges, node, Some(exception_value.clone()))?;
-        Ok(selected
-            .iter()
-            .any(|edge| edge.edge_type == EdgeType::StateMachine))
-    }
-
-    fn retry_decision(
-        &self,
-        node: &ExecutionNode,
-        exception_value: Option<&Value>,
-    ) -> Result<RetryDecision, RunnerExecutorError> {
-        let Some(action) = self.template_action_for_execution_node(node)? else {
-            return Ok(RetryDecision {
-                should_retry: false,
-            });
-        };
-        let exception_name = exception_value.and_then(exception_type);
-        let evaluator = RetryPolicyEvaluator::new(&action.policies, exception_name);
-        Ok(evaluator.decision(node.action_attempt))
-    }
-
-    /// Select outgoing edges based on guards and exception state.
-    fn select_edges(
-        &self,
-        edges: &[DAGEdge],
-        _node: &ExecutionNode,
-        exception_value: Option<Value>,
-    ) -> Result<Vec<DAGEdge>, RunnerExecutorError> {
-        // Fast path: exception handling
-        if let Some(exception_value) = exception_value {
-            let mut result = Vec::new();
-            for edge in edges {
-                if edge.exception_types.is_some() && self.exception_matches(edge, &exception_value)
-                {
-                    result.push(edge.clone());
-                }
-            }
-            return Ok(result);
-        }
-
-        // Check if we have any conditional edges (guards or else)
-        let has_guards = edges.iter().any(|e| e.guard_expr.is_some());
-        let has_else = edges.iter().any(|e| e.is_else);
-
-        if has_guards || has_else {
-            // Evaluate guards first
-            let mut passed = Vec::new();
-            for edge in edges {
-                if edge.guard_expr.is_some() && self.evaluate_guard(edge.guard_expr.as_ref())? {
-                    passed.push(edge.clone());
-                }
-            }
-            if !passed.is_empty() {
-                return Ok(passed);
-            }
-            // Fall through to else edges
-            let mut else_edges = Vec::new();
-            for edge in edges {
-                if edge.is_else {
-                    else_edges.push(edge.clone());
-                }
-            }
-            return Ok(else_edges);
-        }
-
-        // Fast path: regular edges (no exceptions, guards, or else)
-        let mut result = Vec::with_capacity(edges.len());
-        for edge in edges {
-            if edge.exception_types.is_none() {
-                result.push(edge.clone());
-            }
-        }
-        Ok(result)
-    }
-
-    /// Queue successor nodes for a template edge, handling spreads/aggregators.
-    fn queue_successor(
-        &mut self,
-        source: &ExecutionNode,
-        edge: &DAGEdge,
-    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
-        if edge.edge_type != EdgeType::StateMachine {
-            return Ok(Vec::new());
-        }
-
-        // Extract info from template without holding borrow across mutable calls
-        let kind = {
-            let template = self.dag.nodes.get(&edge.target).ok_or_else(|| {
-                RunnerExecutorError(format!("template node not found: {}", edge.target))
-            })?;
-
-            match template {
-                waymark_dag::DAGNode::ActionCall(action) if action.spread_loop_var.is_some() => {
-                    TemplateKind::SpreadAction(Box::new(action.clone()))
-                }
-                waymark_dag::DAGNode::Aggregator(_) => {
-                    TemplateKind::Aggregator(template.id().to_string())
-                }
-                _ => TemplateKind::Regular(template.id().to_string()),
-            }
-        };
-
-        match kind {
-            TemplateKind::SpreadAction(action) => {
-                self.expand_spread_action(source, action.as_ref())
-            }
-            TemplateKind::Aggregator(template_id) => {
-                if let Some(existing) = self.find_connected_successor(source.node_id, &template_id)
-                {
-                    return Ok(vec![existing]);
-                }
-                let agg_node = self.get_or_create_aggregator(&template_id)?;
-                self.add_exec_edge(source.node_id, agg_node.node_id);
-                Ok(vec![agg_node])
-            }
-            TemplateKind::Regular(template_id) => {
-                if let Some(existing) = self.find_connected_successor(source.node_id, &template_id)
-                {
-                    return Ok(vec![existing]);
-                }
-                let exec_node = self.get_or_create_exec_node(&template_id)?;
-                self.add_exec_edge(source.node_id, exec_node.node_id);
-                Ok(vec![exec_node])
-            }
-        }
-    }
-
-    /// Unroll a spread action into per-item action nodes and a shared aggregator.
-    ///
-    /// Example IR:
-    /// - results = spread items:item -> @work(item=item)
-    ///   Produces one action execution node per element in items and connects
-    ///   them to a single aggregator node for results.
-    fn expand_spread_action(
-        &mut self,
-        source: &ExecutionNode,
-        template: &ActionCallNode,
-    ) -> Result<Vec<ExecutionNode>, RunnerExecutorError> {
-        let collection_expr = template.spread_collection_expr.as_ref().ok_or_else(|| {
-            RunnerExecutorError("spread action missing collection expression".to_string())
-        })?;
-        let loop_var = template.spread_loop_var.as_ref().ok_or_else(|| {
-            RunnerExecutorError("spread action missing loop variable".to_string())
-        })?;
-        let elements = self.expand_collection(collection_expr)?;
-        let agg_id = template.aggregates_to.as_ref().ok_or_else(|| {
-            RunnerExecutorError("spread action missing aggregator link".to_string())
-        })?;
-
-        let agg_node = self
-            .state
-            .queue_template_node(agg_id, None)
-            .map_err(|err| RunnerExecutorError(err.0))?;
-        if elements.is_empty() {
-            return Ok(vec![agg_node]);
-        }
-
-        let mut created = Vec::new();
-        for (idx, element) in elements.into_iter().enumerate() {
-            let exec_node = self.queue_action_from_template(
-                template,
-                Some(HashMap::from([(loop_var.clone(), element)])),
-                Some(idx as i32),
-            )?;
-            self.add_exec_edge(source.node_id, exec_node.node_id);
-            self.add_exec_edge(exec_node.node_id, agg_node.node_id);
-            created.push(exec_node);
-        }
-        Ok(created)
-    }
-
-    /// Create an action execution node from a template with optional bindings.
-    ///
-    /// Example IR:
-    /// - @work(value=item) with local_scope{"item": LiteralValue(3)}
-    ///   Produces an action node whose kwargs include the literal 3.
-    fn queue_action_from_template(
-        &mut self,
-        template: &ActionCallNode,
-        local_scope: Option<HashMap<String, ValueExpr>>,
-        iteration_index: Option<i32>,
-    ) -> Result<ExecutionNode, RunnerExecutorError> {
-        let kwargs = template
-            .kwarg_exprs
-            .iter()
-            .map(|(name, expr)| {
-                let value = self
-                    .state
-                    .expr_to_value(expr, local_scope.as_ref())
-                    .map_err(|err| RunnerExecutorError(err.0))?;
-                Ok((name.clone(), value))
-            })
-            .collect::<Result<HashMap<_, _>, RunnerExecutorError>>()?;
-
-        let spec = ActionCallSpec {
-            action_name: template.action_name.clone(),
-            module_name: template.module_name.clone(),
-            kwargs,
-        };
-        let targets = template
-            .targets
-            .clone()
-            .or_else(|| template.target.clone().map(|target| vec![target]))
-            .unwrap_or_default();
-        let node = self
-            .state
-            .queue_node(
-                ExecutionNodeType::ActionCall.as_str(),
-                &template.label(),
-                QueueNodeParams {
-                    template_id: Some(template.id.clone()),
-                    targets: Some(targets.clone()),
-                    action: Some(spec.clone()),
-                    ..QueueNodeParams::default()
-                },
-            )
-            .map_err(|err| RunnerExecutorError(err.0))?;
-        for value in spec.kwargs.values() {
-            self.state.record_data_flow_from_value(node.node_id, value);
-        }
-        let result = self
-            .state
-            .assign_action_results(
-                &node,
-                &template.action_name,
-                Some(&targets),
-                iteration_index,
-                false,
-            )
-            .map_err(|err| RunnerExecutorError(err.0))?;
-        if let Some(node_mut) = self.state.nodes.get_mut(&node.node_id) {
-            node_mut.value_expr = Some(ValueExpr::ActionResult(result));
-        }
-        Ok(node)
-    }
-
-    /// Execute a non-action node inline and update assignments/edges.
-    fn execute_inline_node(&mut self, node: &ExecutionNode) -> Result<(), RunnerExecutorError> {
-        let template_id = node
-            .template_id
-            .as_ref()
-            .ok_or_else(|| RunnerExecutorError("inline node missing template id".to_string()))?;
-        let template = self.dag.nodes.get(template_id).ok_or_else(|| {
-            RunnerExecutorError(format!("template node not found: {template_id}"))
-        })?;
-
-        let aggregator = match template {
-            waymark_dag::DAGNode::Aggregator(aggregator) => Some(aggregator.clone()),
-            _ => None,
-        };
-        if let Some(aggregator) = aggregator {
-            self.apply_aggregator_assignments(node, &aggregator)?;
-        }
-
-        self.state
-            .mark_completed(node.node_id)
-            .map_err(|err| RunnerExecutorError(err.0))
-    }
-
-    fn handle_sleep_node(
-        &mut self,
-        node: &ExecutionNode,
-    ) -> Result<SleepDecision, RunnerExecutorError> {
-        let now = Utc::now();
-        let scheduled_at = self
-            .state
-            .nodes
-            .get(&node.node_id)
-            .and_then(|node| node.scheduled_at);
-        if let Some(wake_at) = scheduled_at {
-            if wake_at <= now {
-                self.state
-                    .mark_completed(node.node_id)
-                    .map_err(|err| RunnerExecutorError(err.0))?;
-                return Ok(SleepDecision::Completed);
-            }
-            return Ok(SleepDecision::Blocked(wake_at));
-        }
-
-        let value_expr = self
-            .state
-            .nodes
-            .get(&node.node_id)
-            .and_then(|node| node.value_expr.clone())
-            .unwrap_or(ValueExpr::Literal(LiteralValue {
-                value: Value::Number(0.into()),
-            }));
-        let materialized = self.state.materialize_value(value_expr);
-        let duration_value = self.evaluate_value_expr(&materialized)?;
-
-        let duration_secs = match duration_value {
-            Value::Number(value) => value.as_f64().ok_or_else(|| {
-                RunnerExecutorError("sleep duration must be a number".to_string())
-            })?,
-            Value::Null => 0.0,
-            _ => {
-                return Err(RunnerExecutorError(
-                    "sleep duration must be a number".to_string(),
-                ));
-            }
-        };
-
-        if !duration_secs.is_finite() {
-            return Err(RunnerExecutorError(
-                "sleep duration must be finite".to_string(),
-            ));
-        }
-
-        if duration_secs <= 0.0 {
-            self.state
-                .mark_completed(node.node_id)
-                .map_err(|err| RunnerExecutorError(err.0))?;
-            return Ok(SleepDecision::Completed);
-        }
-
-        let duration = Duration::from_secs_f64(duration_secs);
-        let chrono_duration = chrono::Duration::from_std(duration)
-            .map_err(|_| RunnerExecutorError("sleep duration is out of range".to_string()))?;
-        let wake_at = now + chrono_duration;
-        self.state
-            .set_node_scheduled_at(node.node_id, Some(wake_at))
-            .map_err(|err| RunnerExecutorError(err.0))?;
-        Ok(SleepDecision::Blocked(wake_at))
-    }
-
-    /// Check if an inline node is ready to run based on incoming edges.
-    fn inline_ready(&self, node: &ExecutionNode) -> bool {
-        if node.status == NodeStatus::Completed {
-            return false;
-        }
-        let incoming = match self.incoming_exec_edges.get(&node.node_id) {
-            Some(edges) if !edges.is_empty() => edges,
-            _ => return true, // No incoming edges means ready
-        };
-
-        let template = match node
-            .template_id
-            .as_ref()
-            .and_then(|id| self.dag.nodes.get(id))
-        {
-            Some(template) => template,
-            None => return false,
-        };
-
-        if let waymark_dag::DAGNode::Aggregator(_) = template {
-            if let Some(required) = self.template_index.incoming(template.id()) {
-                let connected = self.connected_template_sources(node.node_id);
-                if !required.is_subset(&connected) {
-                    return false;
-                }
-            }
-            for edge in incoming {
-                if let Some(source) = self.state.nodes.get(&edge.source) {
-                    if !matches!(source.status, NodeStatus::Completed | NodeStatus::Failed) {
-                        return false;
-                    }
-                } else {
-                    return false;
-                }
-            }
-            return true;
-        }
-
-        for edge in incoming {
-            if let Some(source) = self.state.nodes.get(&edge.source) {
-                if !matches!(source.status, NodeStatus::Completed | NodeStatus::Failed) {
-                    return false;
-                }
-            } else {
-                return false;
-            }
-        }
-        true
-    }
-
-    /// Populate aggregated list assignments for a ready aggregator node.
-    ///
-    /// Example:
-    /// - results = spread items: @work(item)
-    ///   When all action nodes complete, the aggregator assigns
-    ///   results = [ActionResultValue(...), ...].
-    fn apply_aggregator_assignments(
-        &mut self,
-        node: &ExecutionNode,
-        template: &AggregatorNode,
-    ) -> Result<(), RunnerExecutorError> {
-        let targets = template
-            .targets
-            .clone()
-            .or_else(|| template.target.clone().map(|target| vec![target]))
-            .unwrap_or_default();
-        if targets.len() != 1 {
-            return Ok(());
-        }
-
-        let incoming_nodes: Vec<ExecutionNode> = self
-            .incoming_exec_edges
-            .get(&node.node_id)
-            .cloned()
-            .unwrap_or_default()
-            .into_iter()
-            .filter(|edge| edge.edge_type == EdgeType::StateMachine)
-            .filter_map(|edge| self.state.nodes.get(&edge.source).cloned())
-            .collect();
-
-        let mut values = Vec::new();
-        for source in &incoming_nodes {
-            let value_expr = source.value_expr.clone().ok_or_else(|| {
-                RunnerExecutorError("aggregator missing source value".to_string())
-            })?;
-            values.push(value_expr);
-        }
-
-        let ordered = self.order_aggregated_values(&incoming_nodes, &values)?;
-        let list_value = ValueExpr::List(ListValue { elements: ordered });
-        let assignment = HashMap::from([(targets[0].clone(), list_value.clone())]);
-        if let Some(node_mut) = self.state.nodes.get_mut(&node.node_id) {
-            node_mut.assignments.extend(assignment.clone());
-        }
-        self.state
-            .mark_latest_assignments(node.node_id, &assignment);
-        self.state
-            .record_data_flow_from_value(node.node_id, &list_value);
-        Ok(())
-    }
-
-    /// Order aggregator values by spread iteration or parallel index.
-    fn order_aggregated_values(
-        &self,
-        sources: &[ExecutionNode],
-        values: &[ValueExpr],
-    ) -> Result<Vec<ValueExpr>, RunnerExecutorError> {
-        // Order by explicit iteration/parallel indices when available, then fall back to timeline.
-        if sources.len() != values.len() {
-            return Err(RunnerExecutorError(
-                "aggregator sources/value mismatch".to_string(),
-            ));
-        }
-        let timeline_index: HashMap<Uuid, usize> = self
-            .state
-            .timeline
-            .iter()
-            .enumerate()
-            .map(|(idx, node_id)| (*node_id, idx))
-            .collect();
-        let mut pairs: Vec<((i32, i32), ValueExpr)> = Vec::with_capacity(values.len());
-        for (source, value) in sources.iter().zip(values.iter()) {
-            let key = self.aggregated_sort_key(source, value, &timeline_index);
-            pairs.push((key, value.clone()));
-        }
-        pairs.sort_by_key(|item| item.0);
-        Ok(pairs.into_iter().map(|(_, value)| value).collect())
-    }
-
-    fn aggregated_sort_key(
-        &self,
-        source: &ExecutionNode,
-        value: &ValueExpr,
-        timeline_index: &HashMap<Uuid, usize>,
-    ) -> (i32, i32) {
-        let mut primary = 2;
-        let mut secondary = *timeline_index.get(&source.node_id).unwrap_or(&0) as i32;
-        if let ValueExpr::ActionResult(action) = value {
-            if let Some(iter_idx) = action.iteration_index {
-                primary = 0;
-                secondary = iter_idx;
-            }
-        } else if let Some(template_id) = &source.template_id
-            && let Some(waymark_dag::DAGNode::ActionCall(action)) = self.dag.nodes.get(template_id)
-            && let Some(idx) = action.parallel_index
-        {
-            primary = 1;
-            secondary = idx;
-        }
-        (primary, secondary)
-    }
-
-    /// Expand a collection expression into element ValueExprs.
-    ///
-    /// Example IR:
-    /// - spread range(3):i -> @work(i)
-    ///   Produces [LiteralValue(0), LiteralValue(1), LiteralValue(2)].
-    fn expand_collection(
-        &mut self,
-        expr: &ir::Expr,
-    ) -> Result<Vec<ValueExpr>, RunnerExecutorError> {
-        let value = Self::expr_to_value(expr)?;
-        let value = self.state.materialize_value(value);
-        if let ValueExpr::List(list) = value {
-            return Ok(list.elements);
-        }
-
-        if let ValueExpr::ActionResult(action_value) = value.clone() {
-            let action_result = self.resolve_action_result(&action_value)?;
-            if let Value::Array(items) = action_result {
-                return Ok(items
-                    .iter()
-                    .enumerate()
-                    .map(|(idx, _)| {
-                        ValueExpr::Index(IndexValue {
-                            object: Box::new(ValueExpr::ActionResult(action_value.clone())),
-                            index: Box::new(ValueExpr::Literal(LiteralValue {
-                                value: Value::Number((idx as i64).into()),
-                            })),
-                        })
-                    })
-                    .collect());
-            }
-            return Err(RunnerExecutorError(
-                "spread collection is not iterable".to_string(),
-            ));
-        }
-
-        let evaluated = self.evaluate_value_expr(&value)?;
-        if let Value::Array(items) = evaluated {
-            return Ok(items
-                .into_iter()
-                .map(|item| ValueExpr::Literal(LiteralValue { value: item }))
-                .collect());
-        }
-
-        Err(RunnerExecutorError(
-            "spread collection is not iterable".to_string(),
-        ))
-    }
-
-    fn build_incoming_exec_edges(state: &RunnerState) -> FxHashMap<Uuid, Vec<ExecutionEdge>> {
-        let mut incoming: FxHashMap<Uuid, Vec<ExecutionEdge>> = FxHashMap::default();
-        for edge in &state.edges {
-            if edge.edge_type != EdgeType::StateMachine {
-                continue;
-            }
-            incoming.entry(edge.target).or_default().push(edge.clone());
-        }
-        incoming
-    }
-
-    fn build_template_to_exec_nodes(state: &RunnerState) -> FxHashMap<String, Vec<Uuid>> {
-        let mut index: FxHashMap<String, Vec<Uuid>> = FxHashMap::default();
-        for (node_id, node) in &state.nodes {
-            if let Some(template_id) = &node.template_id {
-                index.entry(template_id.clone()).or_default().push(*node_id);
-            }
-        }
-        index
-    }
-
-    /// Register a new execution node in the template index
-    fn register_exec_node(&mut self, template_id: &str, node_id: Uuid) {
-        self.template_to_exec_nodes
-            .entry(template_id.to_string())
-            .or_default()
-            .push(node_id);
-    }
-
-    fn add_exec_edge(&mut self, source: Uuid, target: Uuid) {
-        let edge = ExecutionEdge {
-            source,
-            target,
-            edge_type: EdgeType::StateMachine,
-        };
-        if self.state.edges.contains(&edge) {
-            return;
-        }
-        self.state.edges.insert(edge.clone());
-        self.incoming_exec_edges
-            .entry(target)
-            .or_default()
-            .push(edge);
-    }
-
-    fn connected_template_sources(&self, exec_node_id: Uuid) -> HashSet<String> {
-        let mut connected = HashSet::new();
-        for edge in self
-            .incoming_exec_edges
-            .get(&exec_node_id)
-            .cloned()
-            .unwrap_or_default()
-        {
-            if let Some(source) = self.state.nodes.get(&edge.source)
-                && let Some(template_id) = &source.template_id
-            {
-                connected.insert(template_id.clone());
-            }
-        }
-        connected
-    }
-
-    fn find_connected_successor(
-        &self,
-        source_id: Uuid,
-        template_id: &str,
-    ) -> Option<ExecutionNode> {
-        for edge in &self.state.edges {
-            if edge.edge_type != EdgeType::StateMachine || edge.source != source_id {
-                continue;
-            }
-            let target = self.state.nodes.get(&edge.target)?;
-            if target.template_id.as_deref() == Some(template_id) {
-                return Some(target.clone());
-            }
-        }
-        None
-    }
-
-    fn get_or_create_aggregator(
-        &mut self,
-        template_id: &str,
-    ) -> Result<ExecutionNode, RunnerExecutorError> {
-        let mut candidates: Vec<ExecutionNode> = self
-            .state
-            .nodes
-            .values()
-            .filter(|node| {
-                node.template_id.as_deref() == Some(template_id)
-                    && node.status != NodeStatus::Completed
-            })
-            .cloned()
-            .collect();
-        if !candidates.is_empty() {
-            let timeline_index: HashMap<Uuid, usize> = self
-                .state
-                .timeline
-                .iter()
-                .enumerate()
-                .map(|(idx, node_id)| (*node_id, idx))
-                .collect();
-            candidates.sort_by_key(|node| {
-                std::cmp::Reverse(timeline_index.get(&node.node_id).copied().unwrap_or(0))
-            });
-            return Ok(candidates[0].clone());
-        }
-        self.state
-            .queue_template_node(template_id, None)
-            .map_err(|err| RunnerExecutorError(err.0))
-    }
-
-    fn get_or_create_exec_node(
-        &mut self,
-        template_id: &str,
-    ) -> Result<ExecutionNode, RunnerExecutorError> {
-        // Use the index to find candidate nodes - O(k) where k is nodes for this template
-        if let Some(node_ids) = self.template_to_exec_nodes.get(template_id) {
-            // Find the most recent non-completed node
-            let mut best_node_id: Option<Uuid> = None;
-            let mut best_timeline_pos: Option<usize> = None;
-
-            for &node_id in node_ids {
-                if let Some(node) = self.state.nodes.get(&node_id)
-                    && !matches!(node.status, NodeStatus::Completed | NodeStatus::Failed)
-                {
-                    let timeline_pos = self.state.timeline.iter().position(|&id| id == node_id);
-                    if let Some(pos) = timeline_pos {
-                        if best_timeline_pos.is_none() || pos > best_timeline_pos.unwrap() {
-                            best_timeline_pos = Some(pos);
-                            best_node_id = Some(node_id);
-                        }
-                    } else if best_node_id.is_none() {
-                        best_node_id = Some(node_id);
-                    }
-                }
-            }
-
-            if let Some(node_id) = best_node_id {
-                return self
-                    .state
-                    .nodes
-                    .get(&node_id)
-                    .cloned()
-                    .ok_or_else(|| RunnerExecutorError(format!("node disappeared: {node_id}")));
-            }
-        }
-
-        // Create new node and register it in the index
-        let node = self
-            .state
-            .queue_template_node(template_id, None)
-            .map_err(|err| RunnerExecutorError(err.0))?;
-        self.register_exec_node(template_id, node.node_id);
-        Ok(node)
-    }
-
-    fn execution_node(&self, node_id: Uuid) -> Result<&ExecutionNode, RunnerExecutorError> {
-        self.state
-            .nodes
-            .get(&node_id)
-            .ok_or_else(|| RunnerExecutorError(format!("execution node not found: {node_id}")))
-    }
-
-    fn execution_node_mut(
-        &mut self,
-        node_id: Uuid,
-    ) -> Result<&mut ExecutionNode, RunnerExecutorError> {
-        self.state
-            .nodes
-            .get_mut(&node_id)
-            .ok_or_else(|| RunnerExecutorError(format!("execution node not found: {node_id}")))
-    }
-
-    fn execution_node_clone(&self, node_id: Uuid) -> Result<ExecutionNode, RunnerExecutorError> {
-        self.execution_node(node_id).cloned()
-    }
-
-    fn template_action_for_execution_node(
-        &self,
-        node: &ExecutionNode,
-    ) -> Result<Option<&ActionCallNode>, RunnerExecutorError> {
-        let Some(template_id) = node.template_id.as_ref() else {
-            return Ok(None);
-        };
-        let template = self.dag.nodes.get(template_id).ok_or_else(|| {
-            RunnerExecutorError(format!("template node not found: {template_id}"))
-        })?;
-        match template {
-            waymark_dag::DAGNode::ActionCall(action) => Ok(Some(action)),
-            _ => Ok(None),
-        }
-    }
-
-    fn state_error(err: RunnerStateError) -> RunnerExecutorError {
-        RunnerExecutorError(err.0)
-    }
-
-    fn collect_updates(
-        &mut self,
-        actions_done: Vec<ActionDone>,
-    ) -> Result<Option<DurableUpdates>, RunnerExecutorError> {
-        if self.backend.is_none() {
-            return Ok(None);
-        }
-        let graph_dirty = self.state.consume_graph_dirty_for_durable_execution();
-        let mut graph_updates = Vec::new();
-        if graph_dirty {
-            let instance_id = self.instance_id.ok_or_else(|| {
-                RunnerExecutorError("instance_id is required for graph persistence".to_string())
-            })?;
-            graph_updates.push(GraphUpdate::from_state(instance_id, &self.state));
-        }
-        let updates = DurableUpdates {
-            actions_done,
-            graph_updates,
-        };
-        if updates.actions_done.is_empty() && updates.graph_updates.is_empty() {
-            Ok(None)
-        } else {
-            Ok(Some(updates))
-        }
-    }
-}
-
-fn exception_type(value: &Value) -> Option<&str> {
-    match value {
-        Value::Object(map) => map.get("type").and_then(|value| value.as_str()),
-        _ => None,
-    }
-}
-
-fn action_done_status_for_exception(value: &Value) -> ActionAttemptStatus {
-    match SyntheticExceptionType::from_value(value) {
-        Some(SyntheticExceptionType::ExecutorResume)
-        | Some(SyntheticExceptionType::ActionTimeout) => ActionAttemptStatus::TimedOut,
-        None => ActionAttemptStatus::Failed,
-    }
-}
-
-fn compute_action_duration_ms(
-    started_at: Option<DateTime<Utc>>,
-    completed_at: DateTime<Utc>,
-) -> Option<i64> {
-    started_at
-        .map(|started_at| {
-            completed_at
-                .signed_duration_since(started_at)
-                .num_milliseconds()
-        })
-        .filter(|duration| *duration >= 0)
-}
-
-fn build_action_done(
-    execution_id: Uuid,
-    attempt: i32,
-    status: ActionAttemptStatus,
-    started_at: Option<DateTime<Utc>>,
-    completed_at: DateTime<Utc>,
-    result: Value,
-) -> ActionDone {
-    ActionDone {
-        execution_id,
-        attempt,
-        status,
-        started_at,
-        completed_at: Some(completed_at),
-        duration_ms: compute_action_duration_ms(started_at, completed_at),
-        result,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::collections::{HashMap, HashSet};
-    use std::sync::Arc;
-
-    use crate::backends::MemoryBackend;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::state::{
-        ExecutionEdge, ExecutionNode, NodeStatus, RunnerState,
-    };
-    use waymark_dag::{
-        ActionCallNode, ActionCallParams, AggregatorNode, AssignmentNode, DAG, DAGEdge,
-        convert_to_dag,
-    };
-
-    fn variable(name: &str) -> ir::Expr {
-        ir::Expr {
-            kind: Some(ir::expr::Kind::Variable(ir::Variable {
-                name: name.to_string(),
-            })),
-            span: None,
-        }
-    }
-
-    fn literal_int(value: i64) -> ir::Expr {
-        ir::Expr {
-            kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                value: Some(ir::literal::Value::IntValue(value)),
-            })),
-            span: None,
-        }
-    }
-
-    fn binary(left: ir::Expr, op: ir::BinaryOperator, right: ir::Expr) -> ir::Expr {
-        ir::Expr {
-            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
-                left: Some(Box::new(left)),
-                op: op as i32,
-                right: Some(Box::new(right)),
-            }))),
-            span: None,
-        }
-    }
-
-    #[test]
-    fn test_action_done_status_for_resume_exception_is_timed_out() {
-        let value = serde_json::json!({
-            "type": "ExecutorResume",
-            "message": "resumed action timed out",
-        });
-        assert_eq!(
-            action_done_status_for_exception(&value),
-            ActionAttemptStatus::TimedOut
-        );
-    }
-
-    #[test]
-    fn test_action_done_status_for_action_timeout_exception_is_timed_out() {
-        let value = serde_json::json!({
-            "type": "ActionTimeout",
-            "message": "action timed out",
-            "timeout_seconds": 1,
-            "attempt": 1,
-        });
-        assert_eq!(
-            action_done_status_for_exception(&value),
-            ActionAttemptStatus::TimedOut
-        );
-    }
-
-    #[test]
-    fn test_action_done_status_for_generic_exception_is_failed() {
-        let value = serde_json::json!({
-            "type": "ValueError",
-            "message": "boom",
-        });
-        assert_eq!(
-            action_done_status_for_exception(&value),
-            ActionAttemptStatus::Failed
-        );
-    }
-
-    #[test]
-    fn test_action_done_status_for_non_synthetic_timeout_error_is_failed() {
-        let value = serde_json::json!({
-            "type": "TimeoutError",
-            "message": "user action raised timeout",
-        });
-        assert_eq!(
-            action_done_status_for_exception(&value),
-            ActionAttemptStatus::Failed
-        );
-    }
-
-    #[test]
-    fn test_build_action_done_sets_duration_from_started_and_completed() {
-        let execution_id = Uuid::new_v4();
-        let started_at = Utc::now();
-        let completed_at = started_at + chrono::Duration::milliseconds(275);
-        let done = build_action_done(
-            execution_id,
-            2,
-            ActionAttemptStatus::Completed,
-            Some(started_at),
-            completed_at,
-            serde_json::json!({"ok": true}),
-        );
-
-        assert_eq!(done.execution_id, execution_id);
-        assert_eq!(done.attempt, 2);
-        assert_eq!(done.status, ActionAttemptStatus::Completed);
-        assert_eq!(done.started_at, Some(started_at));
-        assert_eq!(done.completed_at, Some(completed_at));
-        assert_eq!(done.duration_ms, Some(275));
-    }
-
-    #[derive(Default)]
-    struct ActionNodeOptions {
-        policies: Vec<ir::PolicyBracket>,
-        spread_loop_var: Option<String>,
-        spread_collection_expr: Option<ir::Expr>,
-        aggregates_to: Option<String>,
-    }
-
-    fn action_node(
-        node_id: &str,
-        action_name: &str,
-        kwarg_exprs: HashMap<String, ir::Expr>,
-        targets: Vec<String>,
-        options: ActionNodeOptions,
-    ) -> ActionCallNode {
-        let ActionNodeOptions {
-            policies,
-            spread_loop_var,
-            spread_collection_expr,
-            aggregates_to,
-        } = options;
-        ActionCallNode::new(
-            node_id,
-            action_name,
-            ActionCallParams {
-                module_name: None,
-                kwargs: HashMap::new(),
-                kwarg_exprs,
-                policies,
-                targets: Some(targets),
-                target: None,
-                parallel_index: None,
-                aggregates_to,
-                spread_loop_var,
-                spread_collection_expr,
-                function_name: Some("main".to_string()),
-            },
-        )
-    }
-
-    fn assignment_node(
-        node_id: &str,
-        targets: Vec<String>,
-        assign_expr: ir::Expr,
-    ) -> AssignmentNode {
-        AssignmentNode::new(
-            node_id,
-            targets,
-            None,
-            Some(assign_expr),
-            None,
-            Some("main".to_string()),
-        )
-    }
-
-    fn aggregator_node(
-        node_id: &str,
-        aggregates_from: &str,
-        targets: Vec<String>,
-    ) -> AggregatorNode {
-        AggregatorNode::new(
-            node_id,
-            aggregates_from,
-            Some(targets),
-            None,
-            "aggregate",
-            Some("main".to_string()),
-        )
-    }
-
-    fn snapshot_state(
-        state: &RunnerState,
-        action_results: &HashMap<Uuid, Value>,
-    ) -> (
-        HashMap<Uuid, ExecutionNode>,
-        HashSet<ExecutionEdge>,
-        HashMap<Uuid, Value>,
-    ) {
-        (
-            state.nodes.clone(),
-            state.edges.clone(),
-            action_results.clone(),
-        )
-    }
-
-    fn create_rehydrated_executor(
-        dag: &Arc<DAG>,
-        nodes: HashMap<Uuid, ExecutionNode>,
-        edges: HashSet<ExecutionEdge>,
-        action_results: HashMap<Uuid, Value>,
-    ) -> RunnerExecutor {
-        let state = RunnerState::new(Some(Arc::clone(dag)), Some(nodes), Some(edges), false);
-        RunnerExecutor::new(Arc::clone(dag), state, action_results, None)
-    }
-
-    fn compare_executor_states(original: &RunnerExecutor, rehydrated: &RunnerExecutor) {
-        let orig_state = original.state();
-        let rehy_state = rehydrated.state();
-        assert_eq!(
-            orig_state.nodes.keys().collect::<HashSet<_>>(),
-            rehy_state.nodes.keys().collect::<HashSet<_>>(),
-        );
-        for node_id in orig_state.nodes.keys() {
-            let orig_node = orig_state.nodes.get(node_id).unwrap();
-            let rehy_node = rehy_state.nodes.get(node_id).unwrap();
-            assert_eq!(orig_node.node_type, rehy_node.node_type);
-            assert_eq!(orig_node.status, rehy_node.status);
-            assert_eq!(orig_node.template_id, rehy_node.template_id);
-            assert_eq!(orig_node.targets, rehy_node.targets);
-            assert_eq!(orig_node.action_attempt, rehy_node.action_attempt);
-        }
-        assert_eq!(orig_state.edges, rehy_state.edges);
-    }
-
-    fn completion_action_result(action: &ExecutionNode) -> Value {
-        Value::String(format!(
-            "{}:attempt{}",
-            action.template_id.as_deref().unwrap_or("unknown_action"),
-            action.action_attempt
-        ))
-    }
-
-    fn dag_from_ir_source(source: &str) -> Arc<DAG> {
-        let program = parse_program(source.trim()).expect("parse program");
-        Arc::new(convert_to_dag(&program).expect("convert program to DAG"))
-    }
-
-    fn build_executor_at_entry(dag: &Arc<DAG>) -> (RunnerExecutor, Uuid) {
-        let mut state = RunnerState::new(Some(Arc::clone(dag)), None, None, false);
-        let entry_template = dag.entry_node.as_ref().expect("dag entry node");
-        let entry_exec = state
-            .queue_template_node(entry_template, None)
-            .expect("queue entry node");
-        (
-            RunnerExecutor::new(Arc::clone(dag), state, HashMap::new(), None),
-            entry_exec.node_id,
-        )
-    }
-
-    type ActionResultFor = fn(&ExecutionNode) -> Value;
-
-    struct RehydrateBranchHarness {
-        dag: Arc<DAG>,
-        canonical: RunnerExecutor,
-        branches: Vec<RunnerExecutor>,
-        action_result_for: ActionResultFor,
-    }
-
-    impl RehydrateBranchHarness {
-        const MAX_TICKS: usize = 256;
-
-        fn new(
-            dag: Arc<DAG>,
-            canonical: RunnerExecutor,
-            action_result_for: ActionResultFor,
-        ) -> Self {
-            let mut harness = Self {
-                dag,
-                canonical,
-                branches: Vec::new(),
-                action_result_for,
-            };
-            harness.fork_from_canonical();
-            harness
-        }
-
-        fn run_and_assert(mut self) {
-            self.advance_canonical_with_forks();
-            for (index, branch) in self.branches.iter_mut().enumerate() {
-                Self::advance_executor_to_completion(branch, self.action_result_for)
-                    .unwrap_or_else(|err| panic!("branch {index} failed to complete: {err}"));
-                Self::assert_completed_executor_equivalent(&self.canonical, branch);
-            }
-        }
-
-        fn fork_from_canonical(&mut self) {
-            let (nodes_snap, edges_snap, results_snap) =
-                snapshot_state(self.canonical.state(), self.canonical.action_results());
-            self.branches.push(create_rehydrated_executor(
-                &self.dag,
-                nodes_snap,
-                edges_snap,
-                results_snap,
-            ));
-        }
-
-        fn advance_canonical_with_forks(&mut self) {
-            let mut converged = false;
-            for _ in 0..Self::MAX_TICKS {
-                let progressed = Self::advance_executor_one_increment(
-                    &mut self.canonical,
-                    self.action_result_for,
-                )
-                .expect("advance canonical executor");
-                if !progressed {
-                    converged = true;
-                    break;
-                }
-                self.fork_from_canonical();
-            }
-            assert!(converged, "canonical executor did not converge");
-            assert!(
-                !self.branches.is_empty(),
-                "expected at least one rehydrated branch"
-            );
-        }
-
-        fn advance_executor_one_increment(
-            executor: &mut RunnerExecutor,
-            action_result_for: ActionResultFor,
-        ) -> Result<bool, RunnerExecutorError> {
-            let active_actions: Vec<ExecutionNode> = executor
-                .state()
-                .nodes
-                .values()
-                .filter(|node| {
-                    node.is_action_call()
-                        && matches!(node.status, NodeStatus::Queued | NodeStatus::Running)
-                })
-                .cloned()
-                .collect();
-            for action in &active_actions {
-                if !executor.action_results().contains_key(&action.node_id) {
-                    executor.set_action_result(action.node_id, action_result_for(action));
-                }
-            }
-
-            let mut finished_nodes: Vec<Uuid> =
-                active_actions.iter().map(|node| node.node_id).collect();
-            finished_nodes.extend(
-                executor
-                    .state()
-                    .nodes
-                    .values()
-                    .filter(|node| {
-                        node.status == NodeStatus::Queued
-                            && node.is_sleep()
-                            && node.scheduled_at.is_some()
-                    })
-                    .map(|node| node.node_id),
-            );
-
-            if finished_nodes.is_empty() {
-                return Ok(false);
-            }
-
-            let step = executor.increment(&finished_nodes)?;
-            for action in &step.actions {
-                if !executor.action_results().contains_key(&action.node_id) {
-                    executor.set_action_result(action.node_id, action_result_for(action));
-                }
-            }
-            for sleep_request in &step.sleep_requests {
-                executor
-                    .state_mut()
-                    .set_node_scheduled_at(
-                        sleep_request.node_id,
-                        Some(Utc::now() - chrono::Duration::seconds(1)),
-                    )
-                    .map_err(|err| RunnerExecutorError(err.0))?;
-            }
-            Ok(true)
-        }
-
-        fn advance_executor_to_completion(
-            executor: &mut RunnerExecutor,
-            action_result_for: ActionResultFor,
-        ) -> Result<(), RunnerExecutorError> {
-            for _ in 0..Self::MAX_TICKS {
-                if !Self::advance_executor_one_increment(executor, action_result_for)? {
-                    return Ok(());
-                }
-            }
-
-            Err(RunnerExecutorError(
-                "executor did not converge to completion".to_string(),
-            ))
-        }
-
-        fn count_keyed(items: impl IntoIterator<Item = String>) -> HashMap<String, usize> {
-            let mut counts: HashMap<String, usize> = HashMap::new();
-            for item in items {
-                *counts.entry(item).or_insert(0) += 1;
-            }
-            counts
-        }
-
-        fn node_shape_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
-            Self::count_keyed(executor.state().nodes.values().map(|node| {
-                let mut targets = node.targets.clone();
-                targets.sort();
-                let mut assignment_keys: Vec<String> = node.assignments.keys().cloned().collect();
-                assignment_keys.sort();
-                let mut action_kwarg_keys = node
-                    .action
-                    .as_ref()
-                    .map(|action| action.kwargs.keys().cloned().collect::<Vec<_>>())
-                    .unwrap_or_default();
-                action_kwarg_keys.sort();
-                format!(
-                    "type={}|template={}|status={:?}|attempt={}|targets={targets:?}|assignments={assignment_keys:?}|action={}({action_kwarg_keys:?})|scheduled={}",
-                    node.node_type,
-                    node.template_id.clone().unwrap_or_default(),
-                    node.status,
-                    node.action_attempt,
-                    node.action
-                        .as_ref()
-                        .map(|action| action.action_name.clone())
-                        .unwrap_or_default(),
-                    node.scheduled_at.is_some(),
-                )
-            }))
-        }
-
-        fn edge_shape_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
-            Self::count_keyed(executor.state().edges.iter().map(|edge| {
-                let source = executor
-                    .state()
-                    .nodes
-                    .get(&edge.source)
-                    .expect("source node")
-                    .template_id
-                    .clone()
-                    .unwrap_or_else(|| "__unknown_source".to_string());
-                let target = executor
-                    .state()
-                    .nodes
-                    .get(&edge.target)
-                    .expect("target node")
-                    .template_id
-                    .clone()
-                    .unwrap_or_else(|| "__unknown_target".to_string());
-                format!("{source}-{:?}->{target}", edge.edge_type)
-            }))
-        }
-
-        fn action_result_counts(executor: &RunnerExecutor) -> HashMap<String, usize> {
-            Self::count_keyed(executor.action_results().iter().map(|(node_id, value)| {
-                let template_id = executor
-                    .state()
-                    .nodes
-                    .get(node_id)
-                    .and_then(|node| node.template_id.clone())
-                    .unwrap_or_else(|| "__unknown_action".to_string());
-                let rendered =
-                    serde_json::to_string(value).expect("action result should serialize to JSON");
-                format!("{template_id}:{rendered}")
-            }))
-        }
-
-        fn assert_completed_executor_equivalent(
-            canonical: &RunnerExecutor,
-            rehydrated: &RunnerExecutor,
-        ) {
-            assert_eq!(
-                Self::node_shape_counts(canonical),
-                Self::node_shape_counts(rehydrated)
-            );
-            assert_eq!(
-                Self::edge_shape_counts(canonical),
-                Self::edge_shape_counts(rehydrated)
-            );
-            assert_eq!(
-                canonical.state().timeline.len(),
-                rehydrated.state().timeline.len()
-            );
-            assert_eq!(
-                Self::action_result_counts(canonical),
-                Self::action_result_counts(rehydrated)
-            );
-            assert_eq!(
-                canonical.state().ready_queue.is_empty(),
-                rehydrated.state().ready_queue.is_empty()
-            );
-
-            let replay_canonical = crate::waymark_core::runner::replay_variables(
-                canonical.state(),
-                canonical.action_results(),
-            )
-            .expect("replay canonical");
-            let replay_rehydrated = crate::waymark_core::runner::replay_variables(
-                rehydrated.state(),
-                rehydrated.action_results(),
-            )
-            .expect("replay rehydrated");
-
-            let mut assignment_counts: HashMap<String, usize> = HashMap::new();
-            for node in canonical.state().nodes.values() {
-                for target in node.assignments.keys() {
-                    *assignment_counts.entry(target.clone()).or_insert(0) += 1;
-                }
-            }
-            let stable_canonical: HashMap<String, Value> = replay_canonical
-                .variables
-                .into_iter()
-                .filter(|(name, _)| assignment_counts.get(name).copied().unwrap_or(0) <= 1)
-                .collect();
-            let stable_rehydrated: HashMap<String, Value> = replay_rehydrated
-                .variables
-                .into_iter()
-                .filter(|(name, _)| assignment_counts.get(name).copied().unwrap_or(0) <= 1)
-                .collect();
-            assert_eq!(stable_canonical, stable_rehydrated);
-        }
-    }
-
-    fn setup_linear_assignment_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
-        let dag = dag_from_ir_source(
-            r#"
-fn main(input: [], output: [z]):
-    x = @fetch()
-    y = x + 1
-    z = @process(value=y)
-    return z
-"#,
-        );
-        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
-
-        let first_step = executor
-            .increment(&[entry_exec_id])
-            .expect("advance from entry");
-        assert_eq!(first_step.actions.len(), 1);
-        let first_exec = first_step.actions[0].clone();
-        executor.set_action_result(first_exec.node_id, Value::Number(10.into()));
-
-        let step = executor.increment(&[first_exec.node_id]).expect("advance");
-        assert_eq!(step.actions.len(), 1);
-        (dag, executor)
-    }
-
-    fn setup_sleep_resume_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
-        let dag = dag_from_ir_source(
-            r#"
-fn main(input: [], output: [resumed]):
-    seed = 1
-    started = @get_timestamp()
-    sleep 60
-    resumed = @get_timestamp()
-    return resumed
-"#,
-        );
-        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
-
-        let start_step = executor.increment(&[entry_exec_id]).expect("start");
-        assert_eq!(start_step.actions.len(), 1);
-        let start_exec = start_step.actions[0].clone();
-        executor.set_action_result(start_exec.node_id, Value::String("t0".to_string()));
-
-        let sleep_step = executor
-            .increment(&[start_exec.node_id])
-            .expect("advance to sleep");
-        assert!(sleep_step.actions.is_empty());
-        assert_eq!(sleep_step.sleep_requests.len(), 1);
-        (dag, executor)
-    }
-
-    fn setup_spread_checkpoint() -> (Arc<DAG>, RunnerExecutor) {
-        let dag = dag_from_ir_source(
-            r#"
-fn main(input: [], output: [done]):
-    items = @get_items()
-    results = spread items:item -> @double(value=item)
-    done = @finalize(values=results)
-    return done
-"#,
-        );
-        let (mut executor, entry_exec_id) = build_executor_at_entry(&dag);
-
-        let first_step = executor.increment(&[entry_exec_id]).expect("start");
-        assert_eq!(first_step.actions.len(), 1);
-        let initial_exec = first_step.actions[0].clone();
-        executor.set_action_result(
-            initial_exec.node_id,
-            Value::Array(vec![1.into(), 2.into(), 3.into()]),
-        );
-
-        let step1 = executor
-            .increment(&[initial_exec.node_id])
-            .expect("expand spread");
-        assert_eq!(step1.actions.len(), 3);
-        for (idx, node) in step1.actions.iter().enumerate() {
-            executor.set_action_result(node.node_id, Value::Number(((idx + 1) as i64).into()));
-        }
-
-        let step2 = executor
-            .increment(
-                &step1
-                    .actions
-                    .iter()
-                    .map(|node| node.node_id)
-                    .collect::<Vec<_>>(),
-            )
-            .expect("complete spread");
-        assert_eq!(step2.actions.len(), 1);
-        (dag, executor)
-    }
-
-    #[test]
-    fn test_executor_unblocks_downstream_action() {
-        let mut dag = DAG::default();
-
-        let action_start = action_node(
-            "action_start",
-            "fetch",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let assign_node = assignment_node(
-            "assign",
-            vec!["y".to_string()],
-            binary(
-                variable("x"),
-                ir::BinaryOperator::BinaryOpAdd,
-                literal_int(1),
-            ),
-        );
-        let action_next = action_node(
-            "action_next",
-            "work",
-            HashMap::from([("value".to_string(), variable("y"))]),
-            vec!["z".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action_start.clone()));
-        dag.add_node(waymark_dag::DAGNode::Assignment(assign_node.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action_next.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action_start.id.clone(),
-            assign_node.id.clone(),
-        ));
-        dag.add_edge(DAGEdge::state_machine(
-            assign_node.id.clone(),
-            action_next.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let start_exec = state
-            .queue_template_node(&action_start.id, None)
-            .expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(start_exec.node_id, Value::Number(10.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-
-        let step = executor
-            .increment(&[start_exec.node_id])
-            .expect("increment");
-        assert_eq!(step.actions.len(), 1);
-        assert_eq!(
-            step.actions[0].template_id.as_deref(),
-            Some(action_next.id.as_str())
-        );
-    }
-
-    #[test]
-    fn test_rehydrate_after_first_action_queued() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "fetch",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action2 = action_node(
-            "action2",
-            "process",
-            HashMap::from([("value".to_string(), variable("x"))]),
-            vec!["y".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            action2.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        compare_executor_states(&executor, &rehydrated);
-        let node = rehydrated.state().nodes.get(&exec1.node_id).expect("node");
-        assert_eq!(node.status, NodeStatus::Queued);
-    }
-
-    #[test]
-    fn test_rehydrate_after_action_completed_and_increment() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "fetch",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action2 = action_node(
-            "action2",
-            "process",
-            HashMap::from([("value".to_string(), variable("x"))]),
-            vec!["y".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            action2.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(exec1.node_id, Value::Number(42.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-
-        let step = executor.increment(&[exec1.node_id]).expect("increment");
-        assert_eq!(step.actions.len(), 1);
-        let exec2 = &step.actions[0];
-        assert_eq!(exec2.template_id.as_deref(), Some(action2.id.as_str()));
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        let node1 = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
-        assert_eq!(node1.status, NodeStatus::Completed);
-        let node2 = rehydrated.state().nodes.get(&exec2.node_id).unwrap();
-        assert_eq!(node2.status, NodeStatus::Running);
-    }
-
-    #[test]
-    fn test_rehydrate_multi_step_chain() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "step1",
-            HashMap::new(),
-            vec!["a".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action2 = action_node(
-            "action2",
-            "step2",
-            HashMap::from([("input".to_string(), variable("a"))]),
-            vec!["b".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action3 = action_node(
-            "action3",
-            "step3",
-            HashMap::from([("input".to_string(), variable("b"))]),
-            vec!["c".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action3.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            action2.id.clone(),
-        ));
-        dag.add_edge(DAGEdge::state_machine(
-            action2.id.clone(),
-            action3.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-        let mut executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        executor.set_action_result(exec1.node_id, Value::Number(10.into()));
-        let step1 = executor.increment(&[exec1.node_id]).expect("increment");
-        let exec2 = step1.actions[0].clone();
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        executor.set_action_result(exec2.node_id, Value::Number(20.into()));
-        let step2 = executor.increment(&[exec2.node_id]).expect("increment");
-        let exec3 = step2.actions[0].clone();
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        executor.set_action_result(exec3.node_id, Value::Number(30.into()));
-        let step3 = executor.increment(&[exec3.node_id]).expect("increment");
-        assert!(step3.actions.is_empty());
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        for node in rehydrated.state().nodes.values() {
-            if node.is_action_call() {
-                assert_eq!(node.status, NodeStatus::Completed);
-            }
-        }
-    }
-
-    #[test]
-    fn test_rehydrate_with_assignment_node() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "fetch",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let assign = assignment_node(
-            "assign",
-            vec!["y".to_string()],
-            binary(
-                variable("x"),
-                ir::BinaryOperator::BinaryOpAdd,
-                literal_int(1),
-            ),
-        );
-        let action2 = action_node(
-            "action2",
-            "process",
-            HashMap::from([("value".to_string(), variable("y"))]),
-            vec!["z".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::Assignment(assign.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            assign.id.clone(),
-        ));
-        dag.add_edge(DAGEdge::state_machine(
-            assign.id.clone(),
-            action2.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(exec1.node_id, Value::Number(10.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-
-        let step = executor.increment(&[exec1.node_id]).expect("increment");
-        assert_eq!(step.actions.len(), 1);
-        assert_eq!(
-            step.actions[0].template_id.as_deref(),
-            Some(action2.id.as_str())
-        );
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        let assign_nodes: Vec<_> = rehydrated
-            .state()
-            .nodes
-            .values()
-            .filter(|node| node.template_id.as_deref() == Some(&assign.id))
-            .collect();
-        assert_eq!(assign_nodes.len(), 1);
-        assert_eq!(assign_nodes[0].status, NodeStatus::Completed);
-        assert!(assign_nodes[0].assignments.contains_key("y"));
-    }
-
-    #[test]
-    fn test_rehydrate_preserves_action_kwargs() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "compute",
-            HashMap::from([
-                ("a".to_string(), literal_int(5)),
-                (
-                    "b".to_string(),
-                    ir::Expr {
-                        kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                            value: Some(ir::literal::Value::StringValue("test".to_string())),
-                        })),
-                        span: None,
-                    },
-                ),
-            ]),
-            vec!["result".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        let orig_node = executor.state().nodes.get(&exec1.node_id).unwrap();
-        let rehy_node = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
-        assert!(orig_node.action.is_some());
-        assert!(rehy_node.action.is_some());
-        let orig_action = orig_node.action.as_ref().unwrap();
-        let rehy_action = rehy_node.action.as_ref().unwrap();
-        assert_eq!(orig_action.action_name, rehy_action.action_name);
-        let orig_keys: HashSet<_> = orig_action.kwargs.keys().cloned().collect();
-        let rehy_keys: HashSet<_> = rehy_action.kwargs.keys().cloned().collect();
-        assert_eq!(orig_keys, rehy_keys);
-    }
-
-    #[test]
-    fn test_rehydrate_increments_from_same_position() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "first",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action2 = action_node(
-            "action2",
-            "second",
-            HashMap::new(),
-            vec!["y".to_string()],
-            ActionNodeOptions::default(),
-        );
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            action2.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(exec1.node_id, Value::Number(100.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let mut rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        let orig_step = executor.increment(&[exec1.node_id]).expect("increment");
-        let rehy_step = rehydrated.increment(&[exec1.node_id]).expect("increment");
-        assert_eq!(orig_step.actions.len(), rehy_step.actions.len());
-        assert_eq!(
-            orig_step.actions[0].template_id,
-            rehy_step.actions[0].template_id
-        );
-    }
-
-    #[test]
-    fn test_rehydrate_resume_marks_running_as_retryable() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "work",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions {
-                policies: vec![ir::PolicyBracket {
-                    kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
-                        max_retries: 3,
-                        backoff: None,
-                        exception_types: vec!["ExecutorResume".to_string()],
-                    })),
-                }],
-                ..ActionNodeOptions::default()
-            },
-        );
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-        state.mark_running(exec1.node_id).expect("mark running");
-
-        let executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let mut rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        assert_eq!(
-            rehydrated.state().nodes.get(&exec1.node_id).unwrap().status,
-            NodeStatus::Running
-        );
-
-        let step = rehydrated.resume().expect("resume");
-        assert_eq!(step.actions.len(), 1);
-        assert_eq!(step.actions[0].node_id, exec1.node_id);
-        let node = rehydrated.state().nodes.get(&exec1.node_id).unwrap();
-        assert_eq!(node.status, NodeStatus::Running);
-        assert_eq!(node.action_attempt, 2);
-        assert!(node.started_at.is_some());
-    }
-
-    #[test]
-    fn test_increment_records_failed_action_attempt() {
-        let mut dag = DAG::default();
-        let action = action_node(
-            "action1",
-            "work",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec = state.queue_template_node(&action.id, None).expect("queue");
-
-        let mut executor = RunnerExecutor::new(
-            dag,
-            state,
-            HashMap::new(),
-            Some(Arc::new(MemoryBackend::new())),
-        );
-        executor.set_instance_id(Uuid::new_v4());
-        executor.set_action_result(
-            exec.node_id,
-            serde_json::json!({"type": "ValueError", "message": "boom"}),
-        );
-
-        let step = executor.increment(&[exec.node_id]).expect("increment");
-        let updates = step.updates.expect("durable updates");
-        assert_eq!(updates.actions_done.len(), 1);
-        assert_eq!(updates.actions_done[0].execution_id, exec.node_id);
-        assert_eq!(updates.actions_done[0].attempt, 1);
-        assert_eq!(
-            updates.actions_done[0]
-                .result
-                .get("type")
-                .and_then(Value::as_str),
-            Some("ValueError")
-        );
-        assert_eq!(
-            executor
-                .state()
-                .nodes
-                .get(&exec.node_id)
-                .map(|n| n.status.clone()),
-            Some(NodeStatus::Failed)
-        );
-    }
-
-    #[test]
-    fn test_increment_records_failed_attempt_before_retry() {
-        let mut dag = DAG::default();
-        let action = action_node(
-            "action1",
-            "work",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions {
-                policies: vec![ir::PolicyBracket {
-                    kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
-                        max_retries: 2,
-                        backoff: None,
-                        exception_types: Vec::new(),
-                    })),
-                }],
-                ..ActionNodeOptions::default()
-            },
-        );
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec = state.queue_template_node(&action.id, None).expect("queue");
-
-        let mut executor = RunnerExecutor::new(
-            dag,
-            state,
-            HashMap::new(),
-            Some(Arc::new(MemoryBackend::new())),
-        );
-        executor.set_instance_id(Uuid::new_v4());
-        executor.set_action_result(
-            exec.node_id,
-            serde_json::json!({"type": "ValueError", "message": "retry me"}),
-        );
-
-        let first_step = executor
-            .increment(&[exec.node_id])
-            .expect("first increment");
-        assert_eq!(first_step.actions.len(), 1);
-        assert_eq!(first_step.actions[0].node_id, exec.node_id);
-        let first_updates = first_step.updates.expect("first durable updates");
-        assert_eq!(first_updates.actions_done.len(), 1);
-        assert_eq!(first_updates.actions_done[0].attempt, 1);
-        assert_eq!(
-            executor
-                .state()
-                .nodes
-                .get(&exec.node_id)
-                .map(|n| n.status.clone()),
-            Some(NodeStatus::Running)
-        );
-        assert_eq!(
-            executor
-                .state()
-                .nodes
-                .get(&exec.node_id)
-                .map(|n| n.action_attempt),
-            Some(2)
-        );
-
-        executor.set_action_result(exec.node_id, Value::String("ok".to_string()));
-        let second_step = executor
-            .increment(&[exec.node_id])
-            .expect("second increment");
-        let second_updates = second_step.updates.expect("second durable updates");
-        assert_eq!(second_updates.actions_done.len(), 1);
-        assert_eq!(second_updates.actions_done[0].attempt, 2);
-        assert_eq!(
-            executor
-                .state()
-                .nodes
-                .get(&exec.node_id)
-                .map(|n| n.status.clone()),
-            Some(NodeStatus::Completed)
-        );
-    }
-
-    #[test]
-    fn test_rehydrate_replay_variables_consistent() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "fetch",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let assign = assignment_node(
-            "assign",
-            vec!["doubled".to_string()],
-            binary(
-                variable("x"),
-                ir::BinaryOperator::BinaryOpMul,
-                literal_int(2),
-            ),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::Assignment(assign.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            assign.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(exec1.node_id, Value::Number(21.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-        executor.increment(&[exec1.node_id]).expect("increment");
-
-        let orig_replay = crate::waymark_core::runner::replay_variables(
-            executor.state(),
-            executor.action_results(),
-        )
-        .expect("replay");
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        let rehy_replay = crate::waymark_core::runner::replay_variables(
-            rehydrated.state(),
-            rehydrated.action_results(),
-        )
-        .expect("replay");
-        assert_eq!(orig_replay.variables, rehy_replay.variables);
-        assert_eq!(
-            rehy_replay.variables.get("doubled"),
-            Some(&Value::Number(42.into()))
-        );
-    }
-
-    #[test]
-    fn test_rehydrate_completion_equivalent_across_ir_scenarios() {
-        let (linear_dag, linear_executor) = setup_linear_assignment_checkpoint();
-        RehydrateBranchHarness::new(linear_dag, linear_executor, completion_action_result)
-            .run_and_assert();
-
-        let (sleep_dag, sleep_executor) = setup_sleep_resume_checkpoint();
-        RehydrateBranchHarness::new(sleep_dag, sleep_executor, completion_action_result)
-            .run_and_assert();
-
-        let (spread_dag, spread_executor) = setup_spread_checkpoint();
-        RehydrateBranchHarness::new(spread_dag, spread_executor, completion_action_result)
-            .run_and_assert();
-    }
-
-    #[test]
-    fn test_rehydrate_spread_action_with_aggregator() {
-        let mut dag = DAG::default();
-        let initial_action = action_node(
-            "initial",
-            "get_items",
-            HashMap::new(),
-            vec!["items".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let spread_action = action_node(
-            "spread_action",
-            "process_item",
-            HashMap::from([("item".to_string(), variable("item"))]),
-            vec!["item_result".to_string()],
-            ActionNodeOptions {
-                spread_loop_var: Some("item".to_string()),
-                spread_collection_expr: Some(variable("items")),
-                aggregates_to: Some("aggregator".to_string()),
-                ..ActionNodeOptions::default()
-            },
-        );
-        let aggregator =
-            aggregator_node("aggregator", "spread_action", vec!["results".to_string()]);
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(initial_action.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(spread_action.clone()));
-        dag.add_node(waymark_dag::DAGNode::Aggregator(aggregator.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            initial_action.id.clone(),
-            spread_action.id.clone(),
-        ));
-        dag.add_edge(DAGEdge::state_machine(
-            spread_action.id.clone(),
-            aggregator.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let initial_exec = state
-            .queue_template_node(&initial_action.id, None)
-            .expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(
-            initial_exec.node_id,
-            Value::Array(vec![1.into(), 2.into(), 3.into()]),
-        );
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-
-        let step1 = executor
-            .increment(&[initial_exec.node_id])
-            .expect("increment");
-        assert_eq!(step1.actions.len(), 3);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        compare_executor_states(&executor, &rehydrated);
-        let action_nodes: Vec<_> = executor
-            .state()
-            .nodes
-            .values()
-            .filter(|node| {
-                node.is_action_call() && node.template_id.as_deref() == Some(&spread_action.id)
-            })
-            .collect();
-        assert_eq!(action_nodes.len(), 3);
-        for action_node in action_nodes {
-            let rehy_node = rehydrated.state().nodes.get(&action_node.node_id).unwrap();
-            assert_eq!(rehy_node.node_type, action_node.node_type);
-            assert_eq!(rehy_node.status, action_node.status);
-        }
-    }
-
-    #[test]
-    fn test_rehydrate_full_spread_execution() {
-        let mut dag = DAG::default();
-        let initial_action = action_node(
-            "initial",
-            "get_items",
-            HashMap::new(),
-            vec!["items".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let spread_action = action_node(
-            "spread_action",
-            "double",
-            HashMap::from([("value".to_string(), variable("item"))]),
-            vec!["item_result".to_string()],
-            ActionNodeOptions {
-                spread_loop_var: Some("item".to_string()),
-                spread_collection_expr: Some(variable("items")),
-                aggregates_to: Some("aggregator".to_string()),
-                ..ActionNodeOptions::default()
-            },
-        );
-        let aggregator =
-            aggregator_node("aggregator", "spread_action", vec!["results".to_string()]);
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(initial_action.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(spread_action.clone()));
-        dag.add_node(waymark_dag::DAGNode::Aggregator(aggregator.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            initial_action.id.clone(),
-            spread_action.id.clone(),
-        ));
-        dag.add_edge(DAGEdge::state_machine(
-            spread_action.id.clone(),
-            aggregator.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let initial_exec = state
-            .queue_template_node(&initial_action.id, None)
-            .expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(
-            initial_exec.node_id,
-            Value::Array(vec![10.into(), 20.into()]),
-        );
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results.clone(), None);
-
-        let step1 = executor
-            .increment(&[initial_exec.node_id])
-            .expect("increment");
-        let spread_nodes = step1.actions;
-        assert_eq!(spread_nodes.len(), 2);
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        for (idx, node) in spread_nodes.iter().enumerate() {
-            executor.set_action_result(node.node_id, Value::Number(((idx + 1) * 100).into()));
-        }
-
-        let _step2 = executor
-            .increment(&spread_nodes.iter().map(|n| n.node_id).collect::<Vec<_>>())
-            .expect("increment");
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-        compare_executor_states(&executor, &rehydrated);
-
-        let agg_nodes: Vec<_> = rehydrated
-            .state()
-            .nodes
-            .values()
-            .filter(|node| node.template_id.as_deref() == Some(&aggregator.id))
-            .collect();
-        assert_eq!(agg_nodes.len(), 1);
-        assert_eq!(agg_nodes[0].status, NodeStatus::Completed);
-        assert!(agg_nodes[0].assignments.contains_key("results"));
-    }
-
-    #[test]
-    fn test_rehydrate_timeline_ordering_preserved() {
-        let mut dag = DAG::default();
-        let mut actions = Vec::new();
-        for i in 0..4 {
-            actions.push(action_node(
-                &format!("action{i}"),
-                &format!("step{i}"),
-                HashMap::new(),
-                vec![format!("x{i}")],
-                ActionNodeOptions::default(),
-            ));
-        }
-        for action in &actions {
-            dag.add_node(waymark_dag::DAGNode::ActionCall(action.clone()));
-        }
-        for i in 0..actions.len() - 1 {
-            dag.add_edge(DAGEdge::state_machine(
-                actions[i].id.clone(),
-                actions[i + 1].id.clone(),
-            ));
-        }
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let mut exec_nodes: Vec<ExecutionNode> = Vec::new();
-        exec_nodes.push(
-            state
-                .queue_template_node(&actions[0].id, None)
-                .expect("queue"),
-        );
-        let mut executor = RunnerExecutor::new(dag.clone(), state, HashMap::new(), None);
-
-        for i in 0..3 {
-            executor.set_action_result(
-                exec_nodes.last().unwrap().node_id,
-                Value::Number((i * 10).into()),
-            );
-            let step = executor
-                .increment(&[exec_nodes.last().unwrap().node_id])
-                .expect("increment");
-            if !step.actions.is_empty() {
-                exec_nodes.push(step.actions[0].clone());
-            }
-        }
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        let orig_timeline = executor.state().timeline.clone();
-        let rehy_timeline = rehydrated.state().timeline.clone();
-        assert_eq!(orig_timeline.len(), rehy_timeline.len());
-        assert_eq!(
-            orig_timeline.iter().collect::<HashSet<_>>(),
-            rehy_timeline.iter().collect::<HashSet<_>>()
-        );
-    }
-
-    #[test]
-    fn test_rehydrate_ready_queue_rebuilt_for_running_actions() {
-        let mut dag = DAG::default();
-        let action1 = action_node(
-            "action1",
-            "first",
-            HashMap::new(),
-            vec!["x".to_string()],
-            ActionNodeOptions::default(),
-        );
-        let action2 = action_node(
-            "action2",
-            "second",
-            HashMap::new(),
-            vec!["y".to_string()],
-            ActionNodeOptions::default(),
-        );
-
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action1.clone()));
-        dag.add_node(waymark_dag::DAGNode::ActionCall(action2.clone()));
-        dag.add_edge(DAGEdge::state_machine(
-            action1.id.clone(),
-            action2.id.clone(),
-        ));
-
-        let dag = Arc::new(dag);
-        let mut state = RunnerState::new(Some(dag.clone()), None, None, false);
-        let exec1 = state.queue_template_node(&action1.id, None).expect("queue");
-
-        let mut action_results = HashMap::new();
-        action_results.insert(exec1.node_id, Value::Number(50.into()));
-        let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
-        let step = executor.increment(&[exec1.node_id]).expect("increment");
-        let exec2 = step.actions[0].clone();
-
-        let (nodes_snap, edges_snap, results_snap) =
-            snapshot_state(executor.state(), executor.action_results());
-        let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
-
-        let queued_nodes: Vec<_> = rehydrated
-            .state()
-            .nodes
-            .values()
-            .filter(|node| node.status == NodeStatus::Queued)
-            .collect();
-        assert!(queued_nodes.is_empty());
-        let running_nodes: Vec<_> = rehydrated
-            .state()
-            .nodes
-            .values()
-            .filter(|node| node.status == NodeStatus::Running)
-            .collect();
-        assert_eq!(running_nodes.len(), 1);
-        assert_eq!(running_nodes[0].node_id, exec2.node_id);
-        assert!(
-            rehydrated.state().ready_queue.is_empty(),
-            "rehydration should not requeue running action nodes"
-        );
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs b/crates/waymark/src/waymark_core/runner/expression_evaluator.rs
deleted file mode 100644
index 96d908e2..00000000
--- a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs
+++ /dev/null
@@ -1,1058 +0,0 @@
-use std::cell::RefCell;
-use std::collections::{HashMap, HashSet};
-use std::rc::Rc;
-
-use serde_json::Value;
-use uuid::Uuid;
-
-use crate::messages::ast as ir;
-use crate::observability::obs;
-use crate::waymark_core::runner::state::{
-    ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue,
-    FunctionCallValue, IndexValue, ListValue, LiteralValue, UnaryOpValue, VariableValue,
-    literal_value,
-};
-use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator};
-use waymark_dag::{DAGEdge, EdgeType};
-
-use super::{RunnerExecutor, RunnerExecutorError};
-
-impl RunnerExecutor {
-    /// Convert a pure IR expression into a ValueExpr without side effects.
-    pub(super) fn expr_to_value(expr: &ir::Expr) -> Result<ValueExpr, RunnerExecutorError> {
-        match expr.kind.as_ref() {
-            Some(ir::expr::Kind::Literal(lit)) => Ok(ValueExpr::Literal(LiteralValue {
-                value: literal_value(lit),
-            })),
-            Some(ir::expr::Kind::Variable(var)) => Ok(ValueExpr::Variable(VariableValue {
-                name: var.name.clone(),
-            })),
-            Some(ir::expr::Kind::BinaryOp(op)) => {
-                let left = op
-                    .left
-                    .as_ref()
-                    .ok_or_else(|| RunnerExecutorError("binary op missing left".to_string()))?;
-                let right = op
-                    .right
-                    .as_ref()
-                    .ok_or_else(|| RunnerExecutorError("binary op missing right".to_string()))?;
-                Ok(ValueExpr::BinaryOp(BinaryOpValue {
-                    left: Box::new(Self::expr_to_value(left)?),
-                    op: op.op,
-                    right: Box::new(Self::expr_to_value(right)?),
-                }))
-            }
-            Some(ir::expr::Kind::UnaryOp(op)) => {
-                let operand = op
-                    .operand
-                    .as_ref()
-                    .ok_or_else(|| RunnerExecutorError("unary op missing operand".to_string()))?;
-                Ok(ValueExpr::UnaryOp(UnaryOpValue {
-                    op: op.op,
-                    operand: Box::new(Self::expr_to_value(operand)?),
-                }))
-            }
-            Some(ir::expr::Kind::List(list)) => {
-                let mut elements = Vec::new();
-                for item in &list.elements {
-                    elements.push(Self::expr_to_value(item)?);
-                }
-                Ok(ValueExpr::List(ListValue { elements }))
-            }
-            Some(ir::expr::Kind::Dict(dict_expr)) => {
-                let mut entries = Vec::new();
-                for entry in &dict_expr.entries {
-                    let key = entry
-                        .key
-                        .as_ref()
-                        .ok_or_else(|| RunnerExecutorError("dict entry missing key".to_string()))?;
-                    let value = entry.value.as_ref().ok_or_else(|| {
-                        RunnerExecutorError("dict entry missing value".to_string())
-                    })?;
-                    entries.push(DictEntryValue {
-                        key: Self::expr_to_value(key)?,
-                        value: Self::expr_to_value(value)?,
-                    });
-                }
-                Ok(ValueExpr::Dict(DictValue { entries }))
-            }
-            Some(ir::expr::Kind::Index(index)) => {
-                let object = index.object.as_ref().ok_or_else(|| {
-                    RunnerExecutorError("index access missing object".to_string())
-                })?;
-                let index_expr = index
-                    .index
-                    .as_ref()
-                    .ok_or_else(|| RunnerExecutorError("index access missing index".to_string()))?;
-                Ok(ValueExpr::Index(IndexValue {
-                    object: Box::new(Self::expr_to_value(object)?),
-                    index: Box::new(Self::expr_to_value(index_expr)?),
-                }))
-            }
-            Some(ir::expr::Kind::Dot(dot)) => {
-                let object = dot
-                    .object
-                    .as_ref()
-                    .ok_or_else(|| RunnerExecutorError("dot access missing object".to_string()))?;
-                Ok(ValueExpr::Dot(DotValue {
-                    object: Box::new(Self::expr_to_value(object)?),
-                    attribute: dot.attribute.clone(),
-                }))
-            }
-            Some(ir::expr::Kind::FunctionCall(call)) => {
-                let mut args = Vec::new();
-                for arg in &call.args {
-                    args.push(Self::expr_to_value(arg)?);
-                }
-                let mut kwargs = HashMap::new();
-                for kw in &call.kwargs {
-                    if let Some(value) = &kw.value {
-                        kwargs.insert(kw.name.clone(), Self::expr_to_value(value)?);
-                    }
-                }
-                let global_fn = if call.global_function != 0 {
-                    Some(call.global_function)
-                } else {
-                    None
-                };
-                Ok(ValueExpr::FunctionCall(FunctionCallValue {
-                    name: call.name.clone(),
-                    args,
-                    kwargs,
-                    global_function: global_fn,
-                }))
-            }
-            Some(
-                ir::expr::Kind::ActionCall(_)
-                | ir::expr::Kind::ParallelExpr(_)
-                | ir::expr::Kind::SpreadExpr(_),
-            ) => Err(RunnerExecutorError(
-                "action/spread calls not allowed in guard expressions".to_string(),
-            )),
-            None => Ok(ValueExpr::Literal(LiteralValue { value: Value::Null })),
-        }
-    }
-
-    /// Evaluate a guard expression using current symbolic assignments.
-    pub(super) fn evaluate_guard(
-        &self,
-        expr: Option<&ir::Expr>,
-    ) -> Result<bool, RunnerExecutorError> {
-        let expr = match expr {
-            Some(expr) => expr,
-            None => return Ok(false),
-        };
-        let value_expr = self.state().materialize_value(Self::expr_to_value(expr)?);
-        let result = self.evaluate_value_expr(&value_expr)?;
-        Ok(is_truthy(&result))
-    }
-
-    /// Resolve an action's symbolic kwargs to concrete Python values.
-    ///
-    /// Example:
-    /// - spec.kwargs={"value": VariableValue("x")}
-    /// - with x assigned to LiteralValue(10), returns {"value": 10}.
-    #[obs]
-    pub fn resolve_action_kwargs(
-        &self,
-        node_id: Uuid,
-        action: &ActionCallSpec,
-    ) -> Result<HashMap<String, Value>, RunnerExecutorError> {
-        let mut resolved = HashMap::new();
-        for (name, expr) in &action.kwargs {
-            resolved.insert(
-                name.clone(),
-                self.evaluate_value_expr_for_node(expr, Some(node_id))?,
-            );
-        }
-        Ok(resolved)
-    }
-
-    /// Evaluate a ValueExpr into a concrete Python value.
-    #[obs]
-    pub(super) fn evaluate_value_expr(
-        &self,
-        expr: &ValueExpr,
-    ) -> Result<Value, RunnerExecutorError> {
-        self.evaluate_value_expr_for_node(expr, None)
-    }
-
-    fn evaluate_value_expr_for_node(
-        &self,
-        expr: &ValueExpr,
-        current_node_id: Option<Uuid>,
-    ) -> Result<Value, RunnerExecutorError> {
-        let stack = Rc::new(RefCell::new(HashSet::new()));
-        let resolve_variable = {
-            let stack = stack.clone();
-            let this = self;
-            move |name: &str| {
-                this.evaluate_variable_with_context(current_node_id, name, stack.clone())
-            }
-        };
-        let resolve_action_result = {
-            let this = self;
-            move |value: &ActionResultValue| this.resolve_action_result(value)
-        };
-        let resolve_function_call = {
-            let this = self;
-            move |value: &FunctionCallValue, args, kwargs| {
-                this.evaluate_function_call(value, args, kwargs)
-            }
-        };
-        let apply_binary = |op, left, right| Self::apply_binary(op, left, right);
-        let apply_unary = |op, operand| Self::apply_unary(op, operand);
-        let error_factory = |message: &str| RunnerExecutorError(message.to_string());
-        let evaluator = ValueExprEvaluator::new(
-            &resolve_variable,
-            &resolve_action_result,
-            &resolve_function_call,
-            &apply_binary,
-            &apply_unary,
-            &error_factory,
-        );
-        evaluator.visit(expr)
-    }
-
-    fn find_variable_source_node(&self, current_node_id: Uuid, name: &str) -> Option<Uuid> {
-        let timeline_index: HashMap<Uuid, usize> = self
-            .state()
-            .timeline
-            .iter()
-            .enumerate()
-            .map(|(idx, node_id)| (*node_id, idx))
-            .collect();
-
-        self.state()
-            .edges
-            .iter()
-            .filter(|edge| edge.edge_type == EdgeType::DataFlow && edge.target == current_node_id)
-            .map(|edge| edge.source)
-            .filter(|source| {
-                self.state()
-                    .nodes
-                    .get(source)
-                    .map(|node| node.assignments.contains_key(name))
-                    .unwrap_or(false)
-            })
-            .max_by_key(|source| timeline_index.get(source).copied().unwrap_or(0))
-    }
-
-    fn evaluate_variable_with_context(
-        &self,
-        current_node_id: Option<Uuid>,
-        name: &str,
-        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
-    ) -> Result<Value, RunnerExecutorError> {
-        let node_id = current_node_id
-            .and_then(|node_id| self.find_variable_source_node(node_id, name))
-            .or_else(|| self.state().latest_assignment(name))
-            .ok_or_else(|| RunnerExecutorError(format!("variable not found: {name}")))?;
-        self.evaluate_assignment(node_id, name, stack)
-    }
-
-    pub(super) fn evaluate_assignment(
-        &self,
-        node_id: Uuid,
-        target: &str,
-        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
-    ) -> Result<Value, RunnerExecutorError> {
-        let key = (node_id, target.to_string());
-        if let Some(value) = self.eval_cache_get(&key) {
-            return Ok(value);
-        }
-        if stack.borrow().contains(&key) {
-            return Err(RunnerExecutorError(format!(
-                "recursive assignment detected for {target}"
-            )));
-        }
-
-        let node = self
-            .state()
-            .nodes
-            .get(&node_id)
-            .ok_or_else(|| RunnerExecutorError(format!("missing assignment for {target}")))?;
-        let expr = node
-            .assignments
-            .get(target)
-            .ok_or_else(|| RunnerExecutorError(format!("missing assignment for {target}")))?;
-
-        stack.borrow_mut().insert(key.clone());
-        let resolve_variable = {
-            let stack = stack.clone();
-            let this = self;
-            move |name: &str| {
-                this.evaluate_variable_with_context(Some(node_id), name, stack.clone())
-            }
-        };
-        let resolve_action_result = {
-            let this = self;
-            move |value: &ActionResultValue| this.resolve_action_result(value)
-        };
-        let resolve_function_call = {
-            let this = self;
-            move |value: &FunctionCallValue, args, kwargs| {
-                this.evaluate_function_call(value, args, kwargs)
-            }
-        };
-        let apply_binary = |op, left, right| Self::apply_binary(op, left, right);
-        let apply_unary = |op, operand| Self::apply_unary(op, operand);
-        let error_factory = |message: &str| RunnerExecutorError(message.to_string());
-        let evaluator = ValueExprEvaluator::new(
-            &resolve_variable,
-            &resolve_action_result,
-            &resolve_function_call,
-            &apply_binary,
-            &apply_unary,
-            &error_factory,
-        );
-        let value = evaluator.visit(expr)?;
-        stack.borrow_mut().remove(&key);
-        self.eval_cache_insert(key, value.clone());
-        Ok(value)
-    }
-
-    pub(super) fn resolve_action_result(
-        &self,
-        expr: &ActionResultValue,
-    ) -> Result<Value, RunnerExecutorError> {
-        let value = self
-            .action_results()
-            .get(&expr.node_id)
-            .cloned()
-            .ok_or_else(|| {
-                RunnerExecutorError(format!("missing action result for {}", expr.node_id))
-            })?;
-        if let Some(idx) = expr.result_index {
-            if let Value::Array(items) = value {
-                let idx = idx as usize;
-                return items.get(idx).cloned().ok_or_else(|| {
-                    RunnerExecutorError(format!(
-                        "action result for {} has no index {}",
-                        expr.node_id, idx
-                    ))
-                });
-            }
-            return Err(RunnerExecutorError(format!(
-                "action result for {} has no index {}",
-                expr.node_id, idx
-            )));
-        }
-        Ok(value)
-    }
-
-    pub(super) fn evaluate_function_call(
-        &self,
-        expr: &FunctionCallValue,
-        args: Vec<Value>,
-        kwargs: HashMap<String, Value>,
-    ) -> Result<Value, RunnerExecutorError> {
-        if let Some(global_fn) = expr.global_function
-            && global_fn != ir::GlobalFunction::Unspecified as i32
-        {
-            return self.evaluate_global_function(global_fn, args, kwargs);
-        }
-        Err(RunnerExecutorError(format!(
-            "cannot evaluate non-global function call: {}",
-            expr.name
-        )))
-    }
-
-    pub(super) fn evaluate_global_function(
-        &self,
-        global_function: i32,
-        args: Vec<Value>,
-        kwargs: HashMap<String, Value>,
-    ) -> Result<Value, RunnerExecutorError> {
-        let error = executor_error;
-        match ir::GlobalFunction::try_from(global_function).ok() {
-            Some(ir::GlobalFunction::Range) => Ok(range_from_args(&args).into()),
-            Some(ir::GlobalFunction::Len) => {
-                if let Some(first) = args.first() {
-                    return Ok(Value::Number(len_of_value(first, error)?));
-                }
-                if let Some(items) = kwargs.get("items") {
-                    return Ok(Value::Number(len_of_value(items, error)?));
-                }
-                Err(RunnerExecutorError("len() missing argument".to_string()))
-            }
-            Some(ir::GlobalFunction::Enumerate) => {
-                let items = if let Some(first) = args.first() {
-                    first.clone()
-                } else if let Some(items) = kwargs.get("items") {
-                    items.clone()
-                } else {
-                    return Err(RunnerExecutorError(
-                        "enumerate() missing argument".to_string(),
-                    ));
-                };
-                let list = match items {
-                    Value::Array(items) => items,
-                    _ => return Err(RunnerExecutorError("enumerate() expects list".to_string())),
-                };
-                let pairs: Vec<Value> = list
-                    .into_iter()
-                    .enumerate()
-                    .map(|(idx, item)| Value::Array(vec![Value::Number((idx as i64).into()), item]))
-                    .collect();
-                Ok(Value::Array(pairs))
-            }
-            Some(ir::GlobalFunction::Isexception) => {
-                if let Some(first) = args.first() {
-                    return Ok(Value::Bool(is_exception_value(first)));
-                }
-                if let Some(value) = kwargs.get("value") {
-                    return Ok(Value::Bool(is_exception_value(value)));
-                }
-                Err(RunnerExecutorError(
-                    "isexception() missing argument".to_string(),
-                ))
-            }
-            Some(ir::GlobalFunction::Unspecified) | None => Err(RunnerExecutorError(
-                "global function unspecified".to_string(),
-            )),
-        }
-    }
-
-    pub(super) fn apply_binary(
-        op: i32,
-        left: Value,
-        right: Value,
-    ) -> Result<Value, RunnerExecutorError> {
-        let error = executor_error;
-        match ir::BinaryOperator::try_from(op).ok() {
-            Some(ir::BinaryOperator::BinaryOpOr) => {
-                if is_truthy(&left) {
-                    Ok(left)
-                } else {
-                    Ok(right)
-                }
-            }
-            Some(ir::BinaryOperator::BinaryOpAnd) => {
-                if is_truthy(&left) {
-                    Ok(right)
-                } else {
-                    Ok(left)
-                }
-            }
-            Some(ir::BinaryOperator::BinaryOpEq) => Ok(Value::Bool(left == right)),
-            Some(ir::BinaryOperator::BinaryOpNe) => Ok(Value::Bool(left != right)),
-            Some(ir::BinaryOperator::BinaryOpLt) => {
-                compare_values(left, right, |a, b| a < b, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpLe) => {
-                compare_values(left, right, |a, b| a <= b, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpGt) => {
-                compare_values(left, right, |a, b| a > b, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpGe) => {
-                compare_values(left, right, |a, b| a >= b, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpIn) => Ok(Value::Bool(value_in(&left, &right))),
-            Some(ir::BinaryOperator::BinaryOpNotIn) => Ok(Value::Bool(!value_in(&left, &right))),
-            Some(ir::BinaryOperator::BinaryOpAdd) => add_values(left, right, error),
-            Some(ir::BinaryOperator::BinaryOpSub) => {
-                numeric_op(left, right, |a, b| a - b, true, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpMul) => {
-                numeric_op(left, right, |a, b| a * b, true, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpDiv) => {
-                numeric_op(left, right, |a, b| a / b, false, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
-                numeric_op(left, right, |a, b| (a / b).floor(), true, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpMod) => {
-                numeric_op(left, right, |a, b| a % b, true, error)
-            }
-            Some(ir::BinaryOperator::BinaryOpUnspecified) | None => Err(RunnerExecutorError(
-                "binary operator unspecified".to_string(),
-            )),
-        }
-    }
-
-    pub(super) fn apply_unary(op: i32, operand: Value) -> Result<Value, RunnerExecutorError> {
-        match ir::UnaryOperator::try_from(op).ok() {
-            Some(ir::UnaryOperator::UnaryOpNeg) => {
-                if let Some(value) = int_value(&operand) {
-                    return Ok(Value::Number((-value).into()));
-                }
-                match operand.as_f64() {
-                    Some(value) => Ok(Value::Number(
-                        serde_json::Number::from_f64(-value)
-                            .unwrap_or_else(|| serde_json::Number::from(0)),
-                    )),
-                    None => Err(RunnerExecutorError("unary neg expects number".to_string())),
-                }
-            }
-            Some(ir::UnaryOperator::UnaryOpNot) => Ok(Value::Bool(!is_truthy(&operand))),
-            Some(ir::UnaryOperator::UnaryOpUnspecified) | None => Err(RunnerExecutorError(
-                "unary operator unspecified".to_string(),
-            )),
-        }
-    }
-
-    pub(super) fn exception_matches(&self, edge: &DAGEdge, exception_value: &Value) -> bool {
-        let exception_types = match &edge.exception_types {
-            Some(types) => types,
-            None => return false,
-        };
-        if exception_types.is_empty() {
-            return true;
-        }
-        let exc_name = match exception_value {
-            Value::Object(map) => map
-                .get("type")
-                .and_then(|value| value.as_str())
-                .map(|value| value.to_string()),
-            _ => None,
-        };
-        if let Some(name) = exc_name {
-            return exception_types.iter().any(|value| value == &name);
-        }
-        false
-    }
-}
-
-fn executor_error(message: &'static str) -> RunnerExecutorError {
-    RunnerExecutorError(message.to_string())
-}
-
-pub(crate) fn int_value(value: &Value) -> Option<i64> {
-    value
-        .as_i64()
-        .or_else(|| value.as_u64().and_then(|value| i64::try_from(value).ok()))
-}
-
-pub(crate) fn numeric_op<E>(
-    left: Value,
-    right: Value,
-    op: impl Fn(f64, f64) -> f64,
-    prefer_int: bool,
-    error: fn(&'static str) -> E,
-) -> Result<Value, E> {
-    let left_num = left
-        .as_f64()
-        .ok_or_else(|| error("numeric operation expects number"))?;
-    let right_num = right
-        .as_f64()
-        .ok_or_else(|| error("numeric operation expects number"))?;
-    let result = op(left_num, right_num);
-    if prefer_int && int_value(&left).is_some() && int_value(&right).is_some() && result.is_finite()
-    {
-        let rounded = result.round();
-        if (result - rounded).abs() < 1e-9
-            && rounded >= (i64::MIN as f64)
-            && rounded <= (i64::MAX as f64)
-        {
-            return Ok(Value::Number((rounded as i64).into()));
-        }
-    }
-    Ok(Value::Number(
-        serde_json::Number::from_f64(result).unwrap_or_else(|| serde_json::Number::from(0)),
-    ))
-}
-
-pub(crate) fn add_values<E>(
-    left: Value,
-    right: Value,
-    error: fn(&'static str) -> E,
-) -> Result<Value, E> {
-    if let (Value::Array(mut left), Value::Array(right)) = (left.clone(), right.clone()) {
-        left.extend(right);
-        return Ok(Value::Array(left));
-    }
-    if let (Some(left), Some(right)) = (left.as_str(), right.as_str()) {
-        return Ok(Value::String(format!("{left}{right}")));
-    }
-    numeric_op(left, right, |a, b| a + b, true, error)
-}
-
-pub(crate) fn compare_values<E>(
-    left: Value,
-    right: Value,
-    op: impl Fn(f64, f64) -> bool,
-    error: fn(&'static str) -> E,
-) -> Result<Value, E> {
-    let left = left
-        .as_f64()
-        .ok_or_else(|| error("comparison expects number"))?;
-    let right = right
-        .as_f64()
-        .ok_or_else(|| error("comparison expects number"))?;
-    Ok(Value::Bool(op(left, right)))
-}
-
-pub(crate) fn value_in(value: &Value, container: &Value) -> bool {
-    match container {
-        Value::Array(items) => items.iter().any(|item| item == value),
-        Value::Object(map) => value
-            .as_str()
-            .map(|key| map.contains_key(key))
-            .unwrap_or(false),
-        Value::String(text) => value
-            .as_str()
-            .map(|needle| text.contains(needle))
-            .unwrap_or(false),
-        _ => false,
-    }
-}
-
-pub(crate) fn is_truthy(value: &Value) -> bool {
-    match value {
-        Value::Null => false,
-        Value::Bool(value) => *value,
-        Value::Number(number) => number.as_f64().map(|value| value != 0.0).unwrap_or(false),
-        Value::String(value) => !value.is_empty(),
-        Value::Array(values) => !values.is_empty(),
-        Value::Object(map) => !map.is_empty(),
-    }
-}
-
-pub(crate) fn is_exception_value(value: &Value) -> bool {
-    if let Value::Object(map) = value {
-        return map.contains_key("type") && map.contains_key("message");
-    }
-    false
-}
-
-pub(crate) fn len_of_value<E>(
-    value: &Value,
-    error: fn(&'static str) -> E,
-) -> Result<serde_json::Number, E> {
-    let len = match value {
-        Value::Array(items) => items.len() as i64,
-        Value::String(text) => text.len() as i64,
-        Value::Object(map) => map.len() as i64,
-        _ => return Err(error("len() expects list, string, or dict")),
-    };
-    Ok(len.into())
-}
-
-pub(crate) fn range_from_args(args: &[Value]) -> Vec<Value> {
-    let mut start = 0i64;
-    let mut end = 0i64;
-    let mut step = 1i64;
-    if args.len() == 1 {
-        end = args[0].as_i64().unwrap_or(0);
-    } else if args.len() >= 2 {
-        start = args[0].as_i64().unwrap_or(0);
-        end = args[1].as_i64().unwrap_or(0);
-        if args.len() >= 3 {
-            step = args[2].as_i64().unwrap_or(1);
-        }
-    }
-    if step == 0 {
-        return Vec::new();
-    }
-    let mut values = Vec::new();
-    if step > 0 {
-        let mut current = start;
-        while current < end {
-            values.push(Value::Number(current.into()));
-            current += step;
-        }
-    } else {
-        let mut current = start;
-        while current > end {
-            values.push(Value::Number(current.into()));
-            current += step;
-        }
-    }
-    values
-}
-
-#[cfg(test)]
-mod tests {
-    use std::cell::RefCell;
-    use std::collections::{HashMap, HashSet};
-    use std::rc::Rc;
-    use std::sync::Arc;
-
-    use uuid::Uuid;
-
-    use super::*;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::ir_parser::IRParser;
-    use crate::waymark_core::runner::RunnerState;
-    use crate::waymark_core::runner::state::{
-        ActionCallSpec, ActionResultValue, BinaryOpValue, FunctionCallValue, LiteralValue,
-        VariableValue,
-    };
-    use crate::waymark_core::runner::value_visitor::ValueExpr;
-    use waymark_dag::{DAG, DAGEdge};
-
-    fn parse_expr(source: &str) -> ir::Expr {
-        IRParser::new("    ")
-            .parse_expr(source)
-            .expect("parse expression")
-    }
-
-    fn literal_int(value: i64) -> ValueExpr {
-        ValueExpr::Literal(LiteralValue {
-            value: Value::Number(value.into()),
-        })
-    }
-
-    fn empty_executor() -> RunnerExecutor {
-        let dag = Arc::new(DAG::default());
-        let state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        RunnerExecutor::new(dag, state, HashMap::new(), None)
-    }
-
-    fn executor_with_assignment(name: &str, value: ValueExpr) -> RunnerExecutor {
-        let dag = Arc::new(DAG::default());
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        state
-            .record_assignment_value(
-                vec![name.to_string()],
-                value,
-                None,
-                Some("test assignment".to_string()),
-            )
-            .expect("record assignment");
-        RunnerExecutor::new(dag, state, HashMap::new(), None)
-    }
-
-    #[test]
-    fn test_expr_to_value_happy_path() {
-        let expr = parse_expr("x + 2");
-        let value = RunnerExecutor::expr_to_value(&expr).expect("convert expression");
-        match value {
-            ValueExpr::BinaryOp(binary) => {
-                assert!(matches!(*binary.left, ValueExpr::Variable(_)));
-                assert!(matches!(*binary.right, ValueExpr::Literal(_)));
-            }
-            other => panic!("expected binary op, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_evaluate_guard_happy_path() {
-        let executor = executor_with_assignment("x", literal_int(2));
-        let guard = parse_expr("x > 1");
-        let result = executor
-            .evaluate_guard(Some(&guard))
-            .expect("evaluate guard");
-        assert!(result);
-    }
-
-    #[test]
-    fn test_resolve_action_kwargs_happy_path() {
-        let executor = executor_with_assignment("x", literal_int(10));
-        let action = ActionCallSpec {
-            action_name: "double".to_string(),
-            module_name: Some("tests".to_string()),
-            kwargs: HashMap::from([(
-                "value".to_string(),
-                ValueExpr::Variable(VariableValue {
-                    name: "x".to_string(),
-                }),
-            )]),
-        };
-        let resolved = executor
-            .resolve_action_kwargs(Uuid::new_v4(), &action)
-            .expect("resolve kwargs");
-        assert_eq!(resolved.get("value"), Some(&Value::Number(10.into())));
-    }
-
-    #[test]
-    fn test_resolve_action_kwargs_uses_data_flow_for_self_referential_targets() {
-        let dag = Arc::new(DAG::default());
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        state
-            .record_assignment_value(
-                vec!["current".to_string()],
-                literal_int(0),
-                None,
-                Some("current = 0".to_string()),
-            )
-            .expect("record current");
-        let action_result = state
-            .queue_action(
-                "increment",
-                Some(vec!["current".to_string()]),
-                Some(HashMap::from([(
-                    "value".to_string(),
-                    ValueExpr::Variable(VariableValue {
-                        name: "current".to_string(),
-                    }),
-                )])),
-                None,
-                None,
-            )
-            .expect("queue increment");
-        let action_node = state
-            .nodes
-            .get(&action_result.node_id)
-            .expect("action node")
-            .clone();
-        let action_spec = action_node.action.expect("action spec");
-
-        let executor = RunnerExecutor::new(dag, state, HashMap::new(), None);
-        let resolved = executor
-            .resolve_action_kwargs(action_result.node_id, &action_spec)
-            .expect("resolve kwargs");
-        assert_eq!(resolved.get("value"), Some(&Value::Number(0.into())));
-    }
-
-    #[test]
-    fn test_evaluate_value_expr_happy_path() {
-        let executor = executor_with_assignment("x", literal_int(3));
-        let expr = ValueExpr::BinaryOp(crate::waymark_core::runner::state::BinaryOpValue {
-            left: Box::new(ValueExpr::Variable(VariableValue {
-                name: "x".to_string(),
-            })),
-            op: ir::BinaryOperator::BinaryOpAdd as i32,
-            right: Box::new(literal_int(1)),
-        });
-        let value = executor
-            .evaluate_value_expr(&expr)
-            .expect("evaluate value expression");
-        assert_eq!(value, Value::Number(4.into()));
-    }
-
-    #[test]
-    fn test_evaluate_variable_happy_path() {
-        let executor = executor_with_assignment("value", literal_int(5));
-        let stack = Rc::new(RefCell::new(HashSet::new()));
-        let value = executor
-            .evaluate_variable_with_context(None, "value", stack)
-            .expect("evaluate variable");
-        assert_eq!(value, Value::Number(5.into()));
-    }
-
-    #[test]
-    fn test_evaluate_assignment_happy_path() {
-        let executor = executor_with_assignment("value", literal_int(9));
-        let node_id = executor
-            .state()
-            .latest_assignment("value")
-            .expect("latest assignment");
-        let stack = Rc::new(RefCell::new(HashSet::new()));
-        let value = executor
-            .evaluate_assignment(node_id, "value", stack)
-            .expect("evaluate assignment");
-        assert_eq!(value, Value::Number(9.into()));
-    }
-
-    #[test]
-    fn test_evaluate_assignment_uses_data_flow_for_self_referential_updates() {
-        let dag = Arc::new(DAG::default());
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        state
-            .record_assignment_value(
-                vec!["count".to_string()],
-                literal_int(0),
-                None,
-                Some("count = 0".to_string()),
-            )
-            .expect("record initial count");
-        state
-            .record_assignment_value(
-                vec!["count".to_string()],
-                ValueExpr::BinaryOp(BinaryOpValue {
-                    left: Box::new(ValueExpr::Variable(VariableValue {
-                        name: "count".to_string(),
-                    })),
-                    op: ir::BinaryOperator::BinaryOpAdd as i32,
-                    right: Box::new(literal_int(1)),
-                }),
-                None,
-                Some("count = count + 1".to_string()),
-            )
-            .expect("record updated count");
-
-        let executor = RunnerExecutor::new(dag, state, HashMap::new(), None);
-        let node_id = executor
-            .state()
-            .latest_assignment("count")
-            .expect("latest assignment");
-        let stack = Rc::new(RefCell::new(HashSet::new()));
-        let value = executor
-            .evaluate_assignment(node_id, "count", stack)
-            .expect("evaluate self-referential assignment");
-        assert_eq!(value, Value::Number(1.into()));
-    }
-
-    #[test]
-    fn test_resolve_action_result_happy_path() {
-        let mut executor = empty_executor();
-        let action_id = Uuid::new_v4();
-        executor.set_action_result(
-            action_id,
-            Value::Array(vec![Value::Number(7.into()), Value::Number(8.into())]),
-        );
-        let result = executor
-            .resolve_action_result(&ActionResultValue {
-                node_id: action_id,
-                action_name: "fetch".to_string(),
-                iteration_index: None,
-                result_index: Some(1),
-            })
-            .expect("resolve action result");
-        assert_eq!(result, Value::Number(8.into()));
-    }
-
-    #[test]
-    fn test_evaluate_function_call_happy_path() {
-        let executor = empty_executor();
-        let value = executor
-            .evaluate_function_call(
-                &FunctionCallValue {
-                    name: "len".to_string(),
-                    args: Vec::new(),
-                    kwargs: HashMap::new(),
-                    global_function: Some(ir::GlobalFunction::Len as i32),
-                },
-                vec![Value::Array(vec![Value::Null, Value::Null])],
-                HashMap::new(),
-            )
-            .expect("evaluate function call");
-        assert_eq!(value, Value::Number(2.into()));
-    }
-
-    #[test]
-    fn test_evaluate_global_function_happy_path() {
-        let executor = empty_executor();
-        let value = executor
-            .evaluate_global_function(
-                ir::GlobalFunction::Range as i32,
-                vec![Value::Number(1.into()), Value::Number(4.into())],
-                HashMap::new(),
-            )
-            .expect("evaluate global function");
-        assert_eq!(
-            value,
-            Value::Array(vec![
-                Value::Number(1.into()),
-                Value::Number(2.into()),
-                Value::Number(3.into())
-            ])
-        );
-    }
-
-    #[test]
-    fn test_apply_binary_happy_path() {
-        let value = RunnerExecutor::apply_binary(
-            ir::BinaryOperator::BinaryOpAdd as i32,
-            Value::Number(2.into()),
-            Value::Number(3.into()),
-        )
-        .expect("apply binary");
-        assert_eq!(value, Value::Number(5.into()));
-    }
-
-    #[test]
-    fn test_apply_unary_happy_path() {
-        let value =
-            RunnerExecutor::apply_unary(ir::UnaryOperator::UnaryOpNot as i32, Value::Bool(true))
-                .expect("apply unary");
-        assert_eq!(value, Value::Bool(false));
-    }
-
-    #[test]
-    fn test_exception_matches_happy_path() {
-        let executor = empty_executor();
-        let edge = DAGEdge::state_machine_with_exception("a", "b", vec!["ValueError".to_string()]);
-        let exception = serde_json::json!({
-            "type": "ValueError",
-            "message": "boom",
-        });
-        assert!(executor.exception_matches(&edge, &exception));
-    }
-
-    #[test]
-    fn test_executor_error_happy_path() {
-        let error = executor_error("hello");
-        assert_eq!(error.0, "hello");
-    }
-
-    #[test]
-    fn test_int_value_happy_path() {
-        let value = Value::Number(7_u64.into());
-        assert_eq!(int_value(&value), Some(7));
-    }
-
-    #[test]
-    fn test_numeric_op_happy_path() {
-        let value = numeric_op(
-            Value::Number(10.into()),
-            Value::Number(3.into()),
-            |a, b| a + b,
-            true,
-            executor_error,
-        )
-        .expect("numeric op");
-        assert_eq!(value, Value::Number(13.into()));
-    }
-
-    #[test]
-    fn test_add_values_happy_path() {
-        let value = add_values(
-            Value::String("hello ".to_string()),
-            Value::String("world".to_string()),
-            executor_error,
-        )
-        .expect("add values");
-        assert_eq!(value, Value::String("hello world".to_string()));
-    }
-
-    #[test]
-    fn test_compare_values_happy_path() {
-        let value = compare_values(
-            Value::Number(3.into()),
-            Value::Number(5.into()),
-            |a, b| a < b,
-            executor_error,
-        )
-        .expect("compare values");
-        assert_eq!(value, Value::Bool(true));
-    }
-
-    #[test]
-    fn test_value_in_happy_path() {
-        let container = Value::Array(vec![Value::Number(1.into()), Value::Number(2.into())]);
-        assert!(value_in(&Value::Number(2.into()), &container));
-    }
-
-    #[test]
-    fn test_is_truthy_happy_path() {
-        assert!(is_truthy(&Value::String("non-empty".to_string())));
-    }
-
-    #[test]
-    fn test_is_exception_value_happy_path() {
-        let value = serde_json::json!({
-            "type": "RuntimeError",
-            "message": "bad",
-        });
-        assert!(is_exception_value(&value));
-    }
-
-    #[test]
-    fn test_len_of_value_happy_path() {
-        let value = Value::Array(vec![Value::Null, Value::Null, Value::Null]);
-        let len = len_of_value(&value, executor_error).expect("length");
-        assert_eq!(len.as_i64(), Some(3));
-    }
-
-    #[test]
-    fn test_range_from_args_happy_path() {
-        let values = range_from_args(&[
-            Value::Number(0.into()),
-            Value::Number(5.into()),
-            Value::Number(2.into()),
-        ]);
-        assert_eq!(
-            values,
-            vec![
-                Value::Number(0.into()),
-                Value::Number(2.into()),
-                Value::Number(4.into())
-            ]
-        );
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/mod.rs b/crates/waymark/src/waymark_core/runner/mod.rs
deleted file mode 100644
index 4e7a491d..00000000
--- a/crates/waymark/src/waymark_core/runner/mod.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-//! Runner utilities.
-
-pub mod executor;
-pub mod expression_evaluator;
-pub mod replay;
-pub(crate) mod retry;
-pub mod state;
-pub(crate) mod synthetic_exceptions;
-pub mod value_visitor;
-
-pub use executor::{
-    DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
-};
-pub use replay::{ReplayError, ReplayResult, replay_action_kwargs, replay_variables};
-pub use state::{
-    ActionCallSpec, ActionResultValue, ExecutionEdge, ExecutionNode, NodeStatus, RunnerState,
-    RunnerStateError, format_value,
-};
-pub use value_visitor::ValueExpr;
diff --git a/crates/waymark/src/waymark_core/runner/replay.rs b/crates/waymark/src/waymark_core/runner/replay.rs
deleted file mode 100644
index 246caace..00000000
--- a/crates/waymark/src/waymark_core/runner/replay.rs
+++ /dev/null
@@ -1,658 +0,0 @@
-//! Replay variable values from a runner state snapshot.
-
-use std::cell::RefCell;
-use std::collections::{HashMap, HashSet};
-use std::rc::Rc;
-
-use serde_json::Value;
-use uuid::Uuid;
-
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::expression_evaluator::{
-    add_values, compare_values, int_value, is_exception_value, is_truthy, len_of_value, numeric_op,
-    range_from_args, value_in,
-};
-use crate::waymark_core::runner::state::{ActionResultValue, FunctionCallValue, RunnerState};
-use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator};
-use waymark_dag::{EXCEPTION_SCOPE_VAR, EdgeType};
-
-/// Raised when replay cannot reconstruct variable values.
-#[derive(Debug, thiserror::Error)]
-#[error("{0}")]
-pub struct ReplayError(pub String);
-
-#[derive(Clone, Debug)]
-pub struct ReplayResult {
-    pub variables: HashMap<String, Value>,
-}
-
-/// Replay variable values from a runner state snapshot.
-pub struct ReplayEngine<'a> {
-    state: &'a RunnerState,
-    action_results: &'a HashMap<Uuid, Value>,
-    cache: RefCell<HashMap<(Uuid, String), Value>>,
-    timeline: Vec<Uuid>,
-    index: HashMap<Uuid, usize>,
-    incoming_data: HashMap<Uuid, Vec<Uuid>>,
-}
-
-impl<'a> ReplayEngine<'a> {
-    /// Prepare replay state derived from a runner snapshot.
-    ///
-    /// We precompute a timeline index and incoming data-flow map so lookups are
-    /// O(1) during evaluation.
-    ///
-    /// Example:
-    /// - timeline = [node_a, node_b]
-    /// - index[node_b] == 1 and incoming data edges are pre-sorted.
-    pub fn new(state: &'a RunnerState, action_results: &'a HashMap<Uuid, Value>) -> Self {
-        let timeline = if state.timeline.is_empty() {
-            state.nodes.keys().cloned().collect()
-        } else {
-            state.timeline.clone()
-        };
-        let index = timeline
-            .iter()
-            .enumerate()
-            .map(|(idx, node_id)| (*node_id, idx))
-            .collect();
-        let incoming_data = build_incoming_data_map(state, &index);
-        Self {
-            state,
-            action_results,
-            cache: RefCell::new(HashMap::new()),
-            timeline,
-            index,
-            incoming_data,
-        }
-    }
-
-    /// Replay variable values by scanning assignments from newest to oldest.
-    ///
-    /// We walk the timeline in reverse to capture the latest assignment for each
-    /// variable and skip older definitions once a value is known. This mirrors
-    /// "last write wins" semantics while avoiding redundant evaluation work.
-    ///
-    /// Example:
-    /// - x = 1
-    /// - x = 2
-    ///   Reverse traversal yields x=2 without evaluating the older assignment.
-    pub fn replay_variables(&self) -> Result<ReplayResult, ReplayError> {
-        let mut variables: HashMap<String, Value> = HashMap::new();
-        for node_id in self.timeline.iter().rev() {
-            let node = match self.state.nodes.get(node_id) {
-                Some(node) => node,
-                None => continue,
-            };
-            if node.assignments.is_empty() {
-                continue;
-            }
-            for target in node.assignments.keys() {
-                if variables.contains_key(target) {
-                    continue;
-                }
-                let value = self.evaluate_assignment(
-                    *node_id,
-                    target,
-                    Rc::new(RefCell::new(HashSet::new())),
-                )?;
-                variables.insert(target.clone(), value);
-            }
-        }
-        Ok(ReplayResult { variables })
-    }
-
-    /// Replay concrete kwargs for an action execution node.
-    ///
-    /// This resolves symbolic kwargs from the action node in the context of
-    /// the node's incoming data-flow edges.
-    pub fn replay_action_kwargs(
-        &self,
-        node_id: Uuid,
-    ) -> Result<HashMap<String, Value>, ReplayError> {
-        let node = self
-            .state
-            .nodes
-            .get(&node_id)
-            .ok_or_else(|| ReplayError(format!("action node not found: {node_id}")))?;
-        let action = node
-            .action
-            .as_ref()
-            .ok_or_else(|| ReplayError(format!("node is not an action call: {node_id}")))?;
-        let mut resolved = HashMap::new();
-        for (name, expr) in &action.kwargs {
-            let value = self.evaluate_value_expr_at_node(node_id, expr)?;
-            resolved.insert(name.clone(), value);
-        }
-        Ok(resolved)
-    }
-
-    /// Evaluate a single assignment expression with cycle detection.
-    ///
-    /// We memoize evaluated (node, target) pairs and guard against recursive
-    /// references by tracking a stack of active evaluations.
-    ///
-    /// Example:
-    /// - x = y + 1
-    /// - y = 2
-    ///   Evaluating x resolves y first, then computes x.
-    fn evaluate_assignment(
-        &self,
-        node_id: Uuid,
-        target: &str,
-        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
-    ) -> Result<Value, ReplayError> {
-        let key = (node_id, target.to_string());
-        if let Some(value) = self.cache.borrow().get(&key) {
-            return Ok(value.clone());
-        }
-        if stack.borrow().contains(&key) {
-            return Err(ReplayError(format!(
-                "recursive assignment detected for {target} in {node_id}"
-            )));
-        }
-
-        let node =
-            self.state.nodes.get(&node_id).ok_or_else(|| {
-                ReplayError(format!("missing assignment for {target} in {node_id}"))
-            })?;
-        let expr = node
-            .assignments
-            .get(target)
-            .ok_or_else(|| ReplayError(format!("missing assignment for {target} in {node_id}")))?;
-
-        stack.borrow_mut().insert(key.clone());
-        let resolve_variable = {
-            let stack = stack.clone();
-            let this = self;
-            move |name: &str| this.resolve_variable(node_id, name, stack.clone())
-        };
-        let resolve_action_result = {
-            let this = self;
-            move |value: &ActionResultValue| this.resolve_action_result(value)
-        };
-        let resolve_function_call = {
-            let this = self;
-            move |value: &FunctionCallValue, args, kwargs| {
-                this.evaluate_function_call(value, args, kwargs)
-            }
-        };
-        let apply_binary = |op, left, right| apply_binary(op, left, right);
-        let apply_unary = |op, operand| apply_unary(op, operand);
-        let error_factory = |message: &str| ReplayError(message.to_string());
-        let evaluator = ValueExprEvaluator::new(
-            &resolve_variable,
-            &resolve_action_result,
-            &resolve_function_call,
-            &apply_binary,
-            &apply_unary,
-            &error_factory,
-        );
-        let value = evaluator.visit(expr)?;
-        stack.borrow_mut().remove(&key);
-        self.cache.borrow_mut().insert(key, value.clone());
-        Ok(value)
-    }
-
-    fn evaluate_value_expr_at_node(
-        &self,
-        node_id: Uuid,
-        expr: &ValueExpr,
-    ) -> Result<Value, ReplayError> {
-        let stack = Rc::new(RefCell::new(HashSet::new()));
-        let resolve_variable = {
-            let stack = stack.clone();
-            let this = self;
-            move |name: &str| this.resolve_variable(node_id, name, stack.clone())
-        };
-        let resolve_action_result = {
-            let this = self;
-            move |value: &ActionResultValue| this.resolve_action_result(value)
-        };
-        let resolve_function_call = {
-            let this = self;
-            move |value: &FunctionCallValue, args, kwargs| {
-                this.evaluate_function_call(value, args, kwargs)
-            }
-        };
-        let apply_binary = |op, left, right| apply_binary(op, left, right);
-        let apply_unary = |op, operand| apply_unary(op, operand);
-        let error_factory = |message: &str| ReplayError(message.to_string());
-        let evaluator = ValueExprEvaluator::new(
-            &resolve_variable,
-            &resolve_action_result,
-            &resolve_function_call,
-            &apply_binary,
-            &apply_unary,
-            &error_factory,
-        );
-        evaluator.visit(expr)
-    }
-
-    /// Resolve a variable reference via data-flow edges.
-    ///
-    /// This walks to the closest upstream definition and replays that
-    /// assignment for the requested variable.
-    ///
-    /// Example:
-    /// - action_1 defines x
-    /// - assign_2 uses x
-    ///   Resolving x from assign_2 evaluates action_1's assignment.
-    fn resolve_variable(
-        &self,
-        current_node_id: Uuid,
-        name: &str,
-        stack: Rc<RefCell<HashSet<(Uuid, String)>>>,
-    ) -> Result<Value, ReplayError> {
-        let mut source_node_id = self.find_variable_source_node(current_node_id, name);
-        if source_node_id.is_none() && name == EXCEPTION_SCOPE_VAR {
-            source_node_id = self.state.latest_assignment(name);
-        }
-        let source_node_id = source_node_id.ok_or_else(|| {
-            ReplayError(format!("variable not found via data-flow edges: {name}"))
-        })?;
-        self.evaluate_assignment(source_node_id, name, stack)
-    }
-
-    /// Find the nearest upstream node that defines the variable.
-    ///
-    /// We consult pre-sorted incoming data edges and ignore sources that are
-    /// later in the timeline than the current node.
-    ///
-    /// Example:
-    /// - if node_b comes after node_a, node_b cannot be a source for node_a.
-    fn find_variable_source_node(&self, current_node_id: Uuid, name: &str) -> Option<Uuid> {
-        let sources = self.incoming_data.get(&current_node_id)?;
-        let current_idx = self
-            .index
-            .get(&current_node_id)
-            .copied()
-            .unwrap_or(self.index.len());
-        for source_id in sources {
-            if self.index.get(source_id).copied().unwrap_or(0) > current_idx {
-                continue;
-            }
-            if let Some(node) = self.state.nodes.get(source_id)
-                && node.assignments.contains_key(name)
-            {
-                return Some(*source_id);
-            }
-        }
-        None
-    }
-
-    /// Fetch an action result by node id, handling indexed results.
-    ///
-    /// Example:
-    /// - result = @fetch()
-    /// - result[0]
-    ///   The evaluator looks up the action result and returns index 0.
-    fn resolve_action_result(&self, expr: &ActionResultValue) -> Result<Value, ReplayError> {
-        let value = self
-            .action_results
-            .get(&expr.node_id)
-            .cloned()
-            .ok_or_else(|| ReplayError(format!("missing action result for {}", expr.node_id)))?;
-        if let Some(idx) = expr.result_index {
-            if let Value::Array(items) = value {
-                let idx = idx as usize;
-                return items.get(idx).cloned().ok_or_else(|| {
-                    ReplayError(format!(
-                        "action result for {} has no index {}",
-                        expr.node_id, idx
-                    ))
-                });
-            }
-            return Err(ReplayError(format!(
-                "action result for {} has no index {}",
-                expr.node_id, idx
-            )));
-        }
-        Ok(value)
-    }
-
-    /// Evaluate a function call during replay.
-    ///
-    /// Only global functions are supported because user-defined functions are
-    /// not available in this replay context.
-    ///
-    /// Example:
-    /// - len(items=[1, 2]) -> 2
-    fn evaluate_function_call(
-        &self,
-        expr: &FunctionCallValue,
-        args: Vec<Value>,
-        kwargs: HashMap<String, Value>,
-    ) -> Result<Value, ReplayError> {
-        if let Some(global_fn) = expr.global_function
-            && global_fn != ir::GlobalFunction::Unspecified as i32
-        {
-            return evaluate_global_function(global_fn, args, kwargs);
-        }
-        Err(ReplayError(format!(
-            "cannot replay non-global function call: {}",
-            expr.name
-        )))
-    }
-}
-
-fn replay_error(message: &'static str) -> ReplayError {
-    ReplayError(message.to_string())
-}
-
-/// Apply a binary operator to replayed operands.
-///
-/// Example:
-/// - left=1, right=2, op=ADD -> 3
-fn apply_binary(op: i32, left: Value, right: Value) -> Result<Value, ReplayError> {
-    let error = replay_error;
-    match ir::BinaryOperator::try_from(op).ok() {
-        Some(ir::BinaryOperator::BinaryOpOr) => {
-            if is_truthy(&left) {
-                Ok(left)
-            } else {
-                Ok(right)
-            }
-        }
-        Some(ir::BinaryOperator::BinaryOpAnd) => {
-            if is_truthy(&left) {
-                Ok(right)
-            } else {
-                Ok(left)
-            }
-        }
-        Some(ir::BinaryOperator::BinaryOpEq) => Ok(Value::Bool(left == right)),
-        Some(ir::BinaryOperator::BinaryOpNe) => Ok(Value::Bool(left != right)),
-        Some(ir::BinaryOperator::BinaryOpLt) => compare_values(left, right, |a, b| a < b, error),
-        Some(ir::BinaryOperator::BinaryOpLe) => compare_values(left, right, |a, b| a <= b, error),
-        Some(ir::BinaryOperator::BinaryOpGt) => compare_values(left, right, |a, b| a > b, error),
-        Some(ir::BinaryOperator::BinaryOpGe) => compare_values(left, right, |a, b| a >= b, error),
-        Some(ir::BinaryOperator::BinaryOpIn) => Ok(Value::Bool(value_in(&left, &right))),
-        Some(ir::BinaryOperator::BinaryOpNotIn) => Ok(Value::Bool(!value_in(&left, &right))),
-        Some(ir::BinaryOperator::BinaryOpAdd) => add_values(left, right, error),
-        Some(ir::BinaryOperator::BinaryOpSub) => numeric_op(left, right, |a, b| a - b, true, error),
-        Some(ir::BinaryOperator::BinaryOpMul) => numeric_op(left, right, |a, b| a * b, true, error),
-        Some(ir::BinaryOperator::BinaryOpDiv) => {
-            numeric_op(left, right, |a, b| a / b, false, error)
-        }
-        Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
-            numeric_op(left, right, |a, b| (a / b).floor(), true, error)
-        }
-        Some(ir::BinaryOperator::BinaryOpMod) => numeric_op(left, right, |a, b| a % b, true, error),
-        Some(ir::BinaryOperator::BinaryOpUnspecified) | None => {
-            Err(ReplayError("binary operator unspecified".to_string()))
-        }
-    }
-}
-
-/// Apply a unary operator to a replayed operand.
-///
-/// Example:
-/// - op=NOT, operand=True -> False
-fn apply_unary(op: i32, operand: Value) -> Result<Value, ReplayError> {
-    match ir::UnaryOperator::try_from(op).ok() {
-        Some(ir::UnaryOperator::UnaryOpNeg) => {
-            if let Some(value) = int_value(&operand) {
-                return Ok(Value::Number((-value).into()));
-            }
-            match operand.as_f64() {
-                Some(value) => Ok(Value::Number(
-                    serde_json::Number::from_f64(-value)
-                        .unwrap_or_else(|| serde_json::Number::from(0)),
-                )),
-                None => Err(ReplayError("unary neg expects number".to_string())),
-            }
-        }
-        Some(ir::UnaryOperator::UnaryOpNot) => Ok(Value::Bool(!is_truthy(&operand))),
-        Some(ir::UnaryOperator::UnaryOpUnspecified) | None => {
-            Err(ReplayError("unary operator unspecified".to_string()))
-        }
-    }
-}
-
-/// Evaluate supported global helper functions.
-///
-/// Example:
-/// - range(0, 3) -> [0, 1, 2]
-/// - isexception(value={"type": "...", "message": "..."}) -> True
-fn evaluate_global_function(
-    global_function: i32,
-    args: Vec<Value>,
-    kwargs: HashMap<String, Value>,
-) -> Result<Value, ReplayError> {
-    match ir::GlobalFunction::try_from(global_function).ok() {
-        Some(ir::GlobalFunction::Range) => Ok(range_from_args(&args).into()),
-        Some(ir::GlobalFunction::Len) => {
-            if let Some(first) = args.first() {
-                return Ok(Value::Number(len_of_value(first, replay_error)?));
-            }
-            if let Some(items) = kwargs.get("items") {
-                return Ok(Value::Number(len_of_value(items, replay_error)?));
-            }
-            Err(ReplayError("len() missing argument".to_string()))
-        }
-        Some(ir::GlobalFunction::Enumerate) => {
-            let items = if let Some(first) = args.first() {
-                first.clone()
-            } else if let Some(items) = kwargs.get("items") {
-                items.clone()
-            } else {
-                return Err(ReplayError("enumerate() missing argument".to_string()));
-            };
-            let list = match items {
-                Value::Array(items) => items,
-                _ => return Err(ReplayError("enumerate() expects list".to_string())),
-            };
-            let pairs: Vec<Value> = list
-                .into_iter()
-                .enumerate()
-                .map(|(idx, item)| Value::Array(vec![Value::Number((idx as i64).into()), item]))
-                .collect();
-            Ok(Value::Array(pairs))
-        }
-        Some(ir::GlobalFunction::Isexception) => {
-            if let Some(first) = args.first() {
-                return Ok(Value::Bool(is_exception_value(first)));
-            }
-            if let Some(value) = kwargs.get("value") {
-                return Ok(Value::Bool(is_exception_value(value)));
-            }
-            Err(ReplayError("isexception() missing argument".to_string()))
-        }
-        Some(ir::GlobalFunction::Unspecified) | None => {
-            Err(ReplayError("global function unspecified".to_string()))
-        }
-    }
-}
-
-/// Build a reverse index of incoming data-flow edges.
-///
-/// Sources are sorted from most-recent to oldest by timeline index so
-/// lookups can short-circuit on the first viable definition.
-fn build_incoming_data_map(
-    state: &RunnerState,
-    index: &HashMap<Uuid, usize>,
-) -> HashMap<Uuid, Vec<Uuid>> {
-    let mut incoming: HashMap<Uuid, Vec<Uuid>> = HashMap::new();
-    for edge in &state.edges {
-        if edge.edge_type != EdgeType::DataFlow {
-            continue;
-        }
-        incoming.entry(edge.target).or_default().push(edge.source);
-    }
-    for (_target, sources) in incoming.iter_mut() {
-        sources.sort_by_key(|node_id| {
-            (
-                index.get(node_id).copied().unwrap_or(0),
-                node_id.to_string(),
-            )
-        });
-        sources.reverse();
-    }
-    incoming
-}
-
-/// Replay variable values from a runner state snapshot.
-///
-/// This is a convenience wrapper around ReplayEngine that prefers the latest
-/// assignment for each variable and returns a fully materialized mapping.
-pub fn replay_variables(
-    state: &RunnerState,
-    action_results: &HashMap<Uuid, Value>,
-) -> Result<ReplayResult, ReplayError> {
-    ReplayEngine::new(state, action_results).replay_variables()
-}
-
-/// Replay concrete kwargs for a specific action node from a state snapshot.
-pub fn replay_action_kwargs(
-    state: &RunnerState,
-    action_results: &HashMap<Uuid, Value>,
-    node_id: Uuid,
-) -> Result<HashMap<String, Value>, ReplayError> {
-    ReplayEngine::new(state, action_results).replay_action_kwargs(node_id)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::runner::state::{RunnerState, VariableValue};
-    use crate::waymark_core::runner::value_visitor::ValueExpr;
-
-    fn action_plus_two_expr() -> ir::Expr {
-        ir::Expr {
-            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
-                left: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
-                        name: "action_result".to_string(),
-                    })),
-                    span: None,
-                })),
-                op: ir::BinaryOperator::BinaryOpAdd as i32,
-                right: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                        value: Some(ir::literal::Value::IntValue(2)),
-                    })),
-                    span: None,
-                })),
-            }))),
-            span: None,
-        }
-    }
-
-    #[test]
-    fn test_replay_variables_resolves_action_results() {
-        let mut state = RunnerState::new(None, None, None, true);
-
-        let action0 = state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                Some(0),
-            )
-            .expect("queue action");
-        let first_list = ir::Expr {
-            kind: Some(ir::expr::Kind::List(ir::ListExpr {
-                elements: vec![action_plus_two_expr()],
-            })),
-            span: None,
-        };
-        state
-            .record_assignment(vec!["results".to_string()], &first_list, None, None)
-            .expect("record assignment");
-
-        let action1 = state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                Some(1),
-            )
-            .expect("queue action");
-        let second_list = ir::Expr {
-            kind: Some(ir::expr::Kind::List(ir::ListExpr {
-                elements: vec![action_plus_two_expr()],
-            })),
-            span: None,
-        };
-        let concat_expr = ir::Expr {
-            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
-                left: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
-                        name: "results".to_string(),
-                    })),
-                    span: None,
-                })),
-                op: ir::BinaryOperator::BinaryOpAdd as i32,
-                right: Some(Box::new(second_list)),
-            }))),
-            span: None,
-        };
-        state
-            .record_assignment(vec!["results".to_string()], &concat_expr, None, None)
-            .expect("record assignment");
-
-        let replayed = replay_variables(
-            &state,
-            &HashMap::from([
-                (action0.node_id, Value::Number(1.into())),
-                (action1.node_id, Value::Number(2.into())),
-            ]),
-        )
-        .expect("replay");
-
-        assert_eq!(
-            replayed.variables.get("results"),
-            Some(&Value::Array(vec![3.into(), 4.into()])),
-        );
-    }
-
-    #[test]
-    fn test_replay_action_kwargs_resolves_variable_inputs() {
-        let mut state = RunnerState::new(None, None, None, true);
-
-        let number_expr = ir::Expr {
-            kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                value: Some(ir::literal::Value::IntValue(7)),
-            })),
-            span: None,
-        };
-        state
-            .record_assignment(
-                vec!["number".to_string()],
-                &number_expr,
-                None,
-                Some("number = 7".to_string()),
-            )
-            .expect("record assignment");
-
-        let kwargs = HashMap::from([(
-            "value".to_string(),
-            ValueExpr::Variable(VariableValue {
-                name: "number".to_string(),
-            }),
-        )]);
-
-        let action = state
-            .queue_action(
-                "compute",
-                Some(vec!["result".to_string()]),
-                Some(kwargs),
-                Some("tests".to_string()),
-                None,
-            )
-            .expect("queue action");
-
-        let kwargs = replay_action_kwargs(
-            &state,
-            &HashMap::from([(action.node_id, Value::Number(14.into()))]),
-            action.node_id,
-        )
-        .expect("replay kwargs");
-
-        assert_eq!(kwargs.get("value"), Some(&Value::Number(7.into())));
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/retry.rs b/crates/waymark/src/waymark_core/runner/retry.rs
deleted file mode 100644
index e6fb4d70..00000000
--- a/crates/waymark/src/waymark_core/runner/retry.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-//! Retry/timeout policy helpers shared by runner components.
-
-use crate::messages::ast as ir;
-
-#[derive(Clone, Debug)]
-pub(crate) struct RetryDecision {
-    pub(crate) should_retry: bool,
-}
-
-pub(crate) struct RetryPolicyEvaluator<'a> {
-    policies: &'a [ir::PolicyBracket],
-    exception_name: Option<&'a str>,
-}
-
-fn is_synthetic_runtime_exception(exception_name: Option<&str>) -> bool {
-    matches!(exception_name, Some("ExecutorResume" | "ActionTimeout"))
-}
-
-impl<'a> RetryPolicyEvaluator<'a> {
-    pub(crate) fn new(policies: &'a [ir::PolicyBracket], exception_name: Option<&'a str>) -> Self {
-        Self {
-            policies,
-            exception_name,
-        }
-    }
-
-    pub(crate) fn decision(&self, attempt: i32) -> RetryDecision {
-        let mut max_retries: i32 = 0;
-        let mut matched_policy = false;
-
-        for policy in self.policies {
-            let Some(ir::policy_bracket::Kind::Retry(retry)) = policy.kind.as_ref() else {
-                continue;
-            };
-            let matches_exception = if retry.exception_types.is_empty() {
-                // Synthetic runtime exceptions (resume/timeout) can represent in-flight
-                // work that may still be running out-of-band. Require explicit opt-in
-                // exception filters before retrying these cases.
-                !is_synthetic_runtime_exception(self.exception_name)
-            } else if let Some(name) = self.exception_name {
-                retry.exception_types.iter().any(|value| value == name)
-            } else {
-                false
-            };
-            if !matches_exception {
-                continue;
-            }
-            matched_policy = true;
-            max_retries = max_retries.max(retry.max_retries as i32);
-        }
-
-        let should_retry = matched_policy && attempt - 1 < max_retries;
-
-        RetryDecision { should_retry }
-    }
-}
-
-pub(crate) fn timeout_seconds_from_policies(policies: &[ir::PolicyBracket]) -> Option<u32> {
-    let mut timeout_seconds: Option<u64> = None;
-    for policy in policies {
-        let Some(ir::policy_bracket::Kind::Timeout(timeout)) = policy.kind.as_ref() else {
-            continue;
-        };
-        let seconds = timeout
-            .timeout
-            .as_ref()
-            .map(|duration| duration.seconds)
-            .unwrap_or(0);
-        if seconds == 0 {
-            continue;
-        }
-        timeout_seconds = Some(match timeout_seconds {
-            Some(existing) => existing.min(seconds),
-            None => seconds,
-        });
-    }
-    timeout_seconds.map(|seconds| seconds.min(u64::from(u32::MAX)) as u32)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn retry_policy(max_retries: u32, exception_types: Vec<&str>) -> ir::PolicyBracket {
-        ir::PolicyBracket {
-            kind: Some(ir::policy_bracket::Kind::Retry(ir::RetryPolicy {
-                exception_types: exception_types
-                    .into_iter()
-                    .map(ToString::to_string)
-                    .collect(),
-                max_retries,
-                backoff: None,
-            })),
-        }
-    }
-
-    fn timeout_policy(seconds: u64) -> ir::PolicyBracket {
-        ir::PolicyBracket {
-            kind: Some(ir::policy_bracket::Kind::Timeout(ir::TimeoutPolicy {
-                timeout: Some(ir::Duration { seconds }),
-            })),
-        }
-    }
-
-    #[test]
-    fn retry_policy_evaluator_happy_path() {
-        let policies = vec![
-            retry_policy(1, vec!["ValueError"]),
-            retry_policy(3, Vec::new()),
-        ];
-        let decision = RetryPolicyEvaluator::new(&policies, Some("ValueError")).decision(2);
-        assert!(decision.should_retry);
-
-        let exhausted = RetryPolicyEvaluator::new(&policies, Some("ValueError")).decision(4);
-        assert!(!exhausted.should_retry);
-    }
-
-    #[test]
-    fn retry_policy_evaluator_wildcard_does_not_retry_synthetic_timeout() {
-        let policies = vec![retry_policy(3, Vec::new())];
-        let decision = RetryPolicyEvaluator::new(&policies, Some("ActionTimeout")).decision(1);
-        assert!(!decision.should_retry);
-    }
-
-    #[test]
-    fn retry_policy_evaluator_explicit_timeout_retry_happy_path() {
-        let policies = vec![retry_policy(2, vec!["ActionTimeout"])];
-        let decision = RetryPolicyEvaluator::new(&policies, Some("ActionTimeout")).decision(1);
-        assert!(decision.should_retry);
-    }
-
-    #[test]
-    fn timeout_seconds_from_policies_happy_path() {
-        let policies = vec![timeout_policy(30), timeout_policy(10), timeout_policy(0)];
-        assert_eq!(timeout_seconds_from_policies(&policies), Some(10));
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/state.rs b/crates/waymark/src/waymark_core/runner/state.rs
deleted file mode 100644
index dd9e68be..00000000
--- a/crates/waymark/src/waymark_core/runner/state.rs
+++ /dev/null
@@ -1,2201 +0,0 @@
-//! Execution-time DAG state with unrolled nodes and symbolic values.
-
-use std::collections::{HashMap, HashSet};
-use std::fmt;
-use std::sync::Arc;
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use uuid::Uuid;
-
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::expression_evaluator::is_truthy;
-use crate::waymark_core::runner::value_visitor::{
-    ValueExpr, collect_value_sources, resolve_value_tree,
-};
-use waymark_dag::{
-    ActionCallNode, AggregatorNode, AssignmentNode, DAG, DAGNode, EdgeType, FnCallNode, JoinNode,
-    ReturnNode, SleepNode,
-};
-
-/// Raised when the runner state cannot be updated safely.
-#[derive(Debug, thiserror::Error)]
-#[error("{0}")]
-pub struct RunnerStateError(pub String);
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct ActionCallSpec {
-    pub action_name: String,
-    pub module_name: Option<String>,
-    pub kwargs: HashMap<String, ValueExpr>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct LiteralValue {
-    pub value: serde_json::Value,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct VariableValue {
-    pub name: String,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct ActionResultValue {
-    pub node_id: Uuid,
-    pub action_name: String,
-    pub iteration_index: Option<i32>,
-    pub result_index: Option<i32>,
-}
-
-impl ActionResultValue {
-    pub fn label(&self) -> String {
-        let mut label = self.action_name.clone();
-        if let Some(idx) = self.iteration_index {
-            label = format!("{label}[{idx}]");
-        }
-        if let Some(idx) = self.result_index {
-            label = format!("{label}[{idx}]");
-        }
-        label
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct BinaryOpValue {
-    pub left: Box<ValueExpr>,
-    pub op: i32,
-    pub right: Box<ValueExpr>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct UnaryOpValue {
-    pub op: i32,
-    pub operand: Box<ValueExpr>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct ListValue {
-    pub elements: Vec<ValueExpr>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct DictEntryValue {
-    pub key: ValueExpr,
-    pub value: ValueExpr,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct DictValue {
-    pub entries: Vec<DictEntryValue>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct IndexValue {
-    pub object: Box<ValueExpr>,
-    pub index: Box<ValueExpr>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct DotValue {
-    pub object: Box<ValueExpr>,
-    pub attribute: String,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct FunctionCallValue {
-    pub name: String,
-    pub args: Vec<ValueExpr>,
-    pub kwargs: HashMap<String, ValueExpr>,
-    pub global_function: Option<i32>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct SpreadValue {
-    pub collection: Box<ValueExpr>,
-    pub loop_var: String,
-    pub action: ActionCallSpec,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(tag = "type", content = "data")]
-pub enum NodeStatus {
-    Queued,
-    Running,
-    Completed,
-    Failed,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum ExecutionNodeType {
-    Input,
-    Output,
-    Assignment,
-    ActionCall,
-    FnCall,
-    Parallel,
-    Aggregator,
-    Branch,
-    Join,
-    Return,
-    Break,
-    Continue,
-    Sleep,
-    Expression,
-}
-
-impl ExecutionNodeType {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            ExecutionNodeType::Input => "input",
-            ExecutionNodeType::Output => "output",
-            ExecutionNodeType::Assignment => "assignment",
-            ExecutionNodeType::ActionCall => "action_call",
-            ExecutionNodeType::FnCall => "fn_call",
-            ExecutionNodeType::Parallel => "parallel",
-            ExecutionNodeType::Aggregator => "aggregator",
-            ExecutionNodeType::Branch => "branch",
-            ExecutionNodeType::Join => "join",
-            ExecutionNodeType::Return => "return",
-            ExecutionNodeType::Break => "break",
-            ExecutionNodeType::Continue => "continue",
-            ExecutionNodeType::Sleep => "sleep",
-            ExecutionNodeType::Expression => "expression",
-        }
-    }
-}
-
-impl TryFrom<&str> for ExecutionNodeType {
-    type Error = RunnerStateError;
-
-    fn try_from(value: &str) -> Result<Self, Self::Error> {
-        match value {
-            "input" => Ok(ExecutionNodeType::Input),
-            "output" => Ok(ExecutionNodeType::Output),
-            "assignment" => Ok(ExecutionNodeType::Assignment),
-            "action_call" => Ok(ExecutionNodeType::ActionCall),
-            "fn_call" => Ok(ExecutionNodeType::FnCall),
-            "parallel" => Ok(ExecutionNodeType::Parallel),
-            "aggregator" => Ok(ExecutionNodeType::Aggregator),
-            "branch" => Ok(ExecutionNodeType::Branch),
-            "join" => Ok(ExecutionNodeType::Join),
-            "return" => Ok(ExecutionNodeType::Return),
-            "break" => Ok(ExecutionNodeType::Break),
-            "continue" => Ok(ExecutionNodeType::Continue),
-            "sleep" => Ok(ExecutionNodeType::Sleep),
-            "expression" => Ok(ExecutionNodeType::Expression),
-            _ => Err(RunnerStateError(format!(
-                "unknown execution node type: {value}"
-            ))),
-        }
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct ExecutionNode {
-    pub node_id: Uuid,
-    pub node_type: String,
-    pub label: String,
-    pub status: NodeStatus,
-    pub template_id: Option<String>,
-    pub targets: Vec<String>,
-    pub action: Option<ActionCallSpec>,
-    pub value_expr: Option<ValueExpr>,
-    pub assignments: HashMap<String, ValueExpr>,
-    pub action_attempt: i32,
-    #[serde(default)]
-    pub started_at: Option<DateTime<Utc>>,
-    #[serde(default)]
-    pub completed_at: Option<DateTime<Utc>>,
-    #[serde(default)]
-    pub scheduled_at: Option<DateTime<Utc>>,
-}
-
-impl ExecutionNode {
-    pub fn node_type_enum(&self) -> Result<ExecutionNodeType, RunnerStateError> {
-        ExecutionNodeType::try_from(self.node_type.as_str())
-    }
-
-    pub fn is_action_call(&self) -> bool {
-        matches!(
-            ExecutionNodeType::try_from(self.node_type.as_str()),
-            Ok(ExecutionNodeType::ActionCall)
-        )
-    }
-
-    pub fn is_sleep(&self) -> bool {
-        matches!(
-            ExecutionNodeType::try_from(self.node_type.as_str()),
-            Ok(ExecutionNodeType::Sleep)
-        )
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct QueueNodeParams {
-    pub node_id: Option<Uuid>,
-    pub template_id: Option<String>,
-    pub targets: Option<Vec<String>>,
-    pub action: Option<ActionCallSpec>,
-    pub value_expr: Option<ValueExpr>,
-    pub scheduled_at: Option<DateTime<Utc>>,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
-pub struct ExecutionEdge {
-    pub source: Uuid,
-    pub target: Uuid,
-    pub edge_type: EdgeType,
-}
-
-/// Track queued/executed DAG nodes with an unrolled, symbolic state.
-///
-/// Design overview:
-/// - The runner state is not a variable heap; it is the runtime graph itself,
-///   unrolled to the exact nodes that have been queued or executed.
-/// - Each execution node stores assignments as symbolic expressions so action
-///   results can be replayed later without having the concrete payloads.
-/// - Data-flow edges encode which execution node supplies a value to another,
-///   while state-machine edges encode execution ordering and control flow. This
-///   mirrors how the ground truth IR->DAG functions.
-///
-/// Expected usage:
-/// - Callers queue nodes as the program executes (ie. the DAG template is
-///   walked) so loops and spreads expand into explicit iterations.
-/// - Callers never mutate variables directly; they record assignments on nodes
-///   and let replay walk the graph to reconstruct values.
-/// - Persisted state can be rehydrated only with nodes/edges. The constructor will
-///   rebuild in-memory cache (like timeline ordering and latest assignment tracking).
-///
-/// In short, RunnerState is the ground-truth runtime DAG: symbolic assignments
-/// plus control/data edges, suitable for replay and visualization.
-///
-/// Action nodes represent our "frontier" nodes. Because of how we construct the graph and always
-/// greedily walk the state until we hit the next actions that are possible to run, we guarantee that
-/// leaf nodes are only ever actions.
-///
-/// Cycle walkthrough (mid-loop example):
-/// Suppose we are partway through:
-/// - results = []
-/// - for item in items:
-///     - action_result = @action(item)
-///     - results = results + [action_result + 1]
-///
-/// On a single iteration update:
-/// 1) The runner queues an action node for @action(item).
-///    - A new execution node is created with a UUID id.
-///    - Its assignments map action_result -> ActionResultValue(node_id).
-///    - Data-flow edges are added from the node that last defined `item`.
-/// 2) The runner queues the assignment node for results update.
-///    - The RHS expression is materialized:
-///      results + [action_result + 1] becomes a BinaryOpValue whose tree
-///      contains the ActionResultValue from step (1), plus a LiteralValue(1).
-///    - Data-flow edges are added from the prior results definition node and
-///      from the action node created in step (1).
-///    - Latest assignment tracking is updated so `results` now points to this
-///      new execution node.
-///
-/// After this iteration, the state graph has explicit nodes for the current
-/// action and the results update. Subsequent iterations repeat the same
-/// sequence, producing a chain of assignments where replay can reconstruct the
-/// incremental `results` value by following data-flow edges.
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct RunnerState {
-    #[serde(skip, default)]
-    pub dag: Option<Arc<DAG>>,
-    pub nodes: HashMap<Uuid, ExecutionNode>,
-    pub edges: HashSet<ExecutionEdge>,
-    pub ready_queue: Vec<Uuid>,
-    pub timeline: Vec<Uuid>,
-    link_queued_nodes: bool,
-    latest_assignments: HashMap<String, Uuid>,
-    graph_dirty: bool,
-}
-
-impl RunnerState {
-    pub fn new(
-        dag: Option<Arc<DAG>>,
-        nodes: Option<HashMap<Uuid, ExecutionNode>>,
-        edges: Option<HashSet<ExecutionEdge>>,
-        link_queued_nodes: bool,
-    ) -> Self {
-        let mut state = Self {
-            dag,
-            nodes: nodes.unwrap_or_default(),
-            edges: edges.unwrap_or_default(),
-            ready_queue: Vec::new(),
-            timeline: Vec::new(),
-            link_queued_nodes,
-            latest_assignments: HashMap::new(),
-            graph_dirty: false,
-        };
-        if !state.nodes.is_empty() || !state.edges.is_empty() {
-            state.rehydrate_state();
-        }
-        state
-    }
-
-    pub(crate) fn set_link_queued_nodes(&mut self, value: bool) {
-        self.link_queued_nodes = value;
-    }
-
-    pub(crate) fn latest_assignment(&self, name: &str) -> Option<Uuid> {
-        self.latest_assignments.get(name).copied()
-    }
-
-    /// Queue a runtime node based on the DAG template and apply its effects.
-    ///
-    /// Use this when stepping through a compiled DAG so the runtime state mirrors
-    /// the template node (assignments, action results, and data-flow edges).
-    ///
-    /// Example IR:
-    /// - total = a + b
-    ///   When the AssignmentNode template is queued, the execution node records
-    ///   the symbolic BinaryOpValue and updates data-flow edges from a/b.
-    pub fn queue_template_node(
-        &mut self,
-        template_id: &str,
-        iteration_index: Option<i32>,
-    ) -> Result<ExecutionNode, RunnerStateError> {
-        let dag = self
-            .dag
-            .as_ref()
-            .ok_or_else(|| RunnerStateError("runner state has no DAG template".to_string()))?;
-        let template = dag
-            .nodes
-            .get(template_id)
-            .ok_or_else(|| RunnerStateError(format!("template node not found: {template_id}")))?
-            .clone();
-
-        let node_id = Uuid::new_v4();
-        let node = ExecutionNode {
-            node_id,
-            node_type: template.node_type().to_string(),
-            label: template.label(),
-            status: NodeStatus::Queued,
-            template_id: Some(template_id.to_string()),
-            targets: self.node_targets(&template),
-            action: if let DAGNode::ActionCall(action_node) = &template {
-                Some(self.action_spec_from_node(action_node))
-            } else {
-                None
-            },
-            value_expr: None,
-            assignments: HashMap::new(),
-            action_attempt: if matches!(template, DAGNode::ActionCall(_)) {
-                1
-            } else {
-                0
-            },
-            started_at: None,
-            completed_at: None,
-            scheduled_at: None,
-        };
-
-        self.register_node(node.clone())?;
-        self.apply_template_node(&node, &template, iteration_index)?;
-        Ok(node)
-    }
-
-    /// Create a runtime node directly without a DAG template.
-    ///
-    /// Use this for ad-hoc nodes (tests, synthetic steps) and as a common
-    /// builder for higher-level queue helpers like queue_action.
-    ///
-    /// Example:
-    /// - queue_node(node_type="assignment", label="results = []")
-    pub fn queue_node(
-        &mut self,
-        node_type: &str,
-        label: &str,
-        params: QueueNodeParams,
-    ) -> Result<ExecutionNode, RunnerStateError> {
-        let node_type_enum = ExecutionNodeType::try_from(node_type)?;
-        let QueueNodeParams {
-            node_id,
-            template_id,
-            targets,
-            action,
-            value_expr,
-            scheduled_at,
-        } = params;
-        let node_id = node_id.unwrap_or_else(Uuid::new_v4);
-        let action_attempt = if matches!(node_type_enum, ExecutionNodeType::ActionCall) {
-            1
-        } else {
-            0
-        };
-        let node = ExecutionNode {
-            node_id,
-            node_type: node_type.to_string(),
-            label: label.to_string(),
-            status: NodeStatus::Queued,
-            template_id,
-            targets: targets.unwrap_or_default(),
-            action,
-            value_expr,
-            assignments: HashMap::new(),
-            action_attempt,
-            started_at: None,
-            completed_at: None,
-            scheduled_at,
-        };
-        self.register_node(node.clone())?;
-        Ok(node)
-    }
-
-    /// Queue an action call from IR, respecting a local scope for loop vars.
-    ///
-    /// Use this during IR -> runner-state conversion (including spreads) so
-    /// action arguments are converted to symbolic expressions.
-    ///
-    /// Example IR:
-    /// - @double(value=item)
-    ///   With local_scope={"item": LiteralValue(2)}, the queued action uses a
-    ///   literal argument and links data-flow to the literal's source nodes.
-    pub fn queue_action_call(
-        &mut self,
-        action: &ir::ActionCall,
-        targets: Option<Vec<String>>,
-        iteration_index: Option<i32>,
-        local_scope: Option<&HashMap<String, ValueExpr>>,
-    ) -> Result<ActionResultValue, RunnerStateError> {
-        let spec = self.action_spec_from_ir(action, local_scope);
-        let node = self.queue_node(
-            ExecutionNodeType::ActionCall.as_str(),
-            &format!("@{}()", spec.action_name),
-            QueueNodeParams {
-                targets: targets.clone(),
-                action: Some(spec.clone()),
-                ..QueueNodeParams::default()
-            },
-        )?;
-        for value in spec.kwargs.values() {
-            self.record_data_flow_from_value(node.node_id, value);
-        }
-        let result = self.assign_action_results(
-            &node,
-            &spec.action_name,
-            targets.as_deref(),
-            iteration_index,
-            true,
-        )?;
-        if let Some(node_mut) = self.nodes.get_mut(&node.node_id) {
-            node_mut.value_expr = Some(ValueExpr::ActionResult(result.clone()));
-        }
-        Ok(result)
-    }
-
-    pub fn mark_running(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
-        let is_action = {
-            let node = self.get_node_mut(node_id)?;
-            node.status = NodeStatus::Running;
-            let is_action = node.is_action_call();
-            if is_action {
-                node.started_at = Some(Utc::now());
-                node.completed_at = None;
-            }
-            is_action
-        };
-        self.ready_queue.retain(|id| id != &node_id);
-        if is_action {
-            self.mark_graph_dirty();
-        }
-        Ok(())
-    }
-
-    pub fn mark_completed(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
-        let is_action = {
-            let node = self.get_node_mut(node_id)?;
-            node.status = NodeStatus::Completed;
-            let is_action = node.is_action_call();
-            if is_action {
-                node.completed_at = Some(Utc::now());
-            }
-            node.scheduled_at = None;
-            is_action
-        };
-        self.ready_queue.retain(|id| id != &node_id);
-        if is_action {
-            self.mark_graph_dirty();
-        }
-        Ok(())
-    }
-
-    pub fn mark_failed(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
-        let is_action = {
-            let node = self.get_node_mut(node_id)?;
-            node.status = NodeStatus::Failed;
-            let is_action = node.is_action_call();
-            if is_action {
-                node.completed_at = Some(Utc::now());
-            }
-            node.scheduled_at = None;
-            is_action
-        };
-        self.ready_queue.retain(|id| id != &node_id);
-        if is_action {
-            self.mark_graph_dirty();
-        }
-        Ok(())
-    }
-
-    pub fn set_node_scheduled_at(
-        &mut self,
-        node_id: Uuid,
-        scheduled_at: Option<DateTime<Utc>>,
-    ) -> Result<(), RunnerStateError> {
-        let node = self.get_node_mut(node_id)?;
-        node.scheduled_at = scheduled_at;
-        self.mark_graph_dirty();
-        Ok(())
-    }
-
-    pub fn increment_action_attempt(&mut self, node_id: Uuid) -> Result<(), RunnerStateError> {
-        let node = self.get_node_mut(node_id)?;
-        if !node.is_action_call() {
-            return Err(RunnerStateError(
-                "action attempt increment requires an action_call node".to_string(),
-            ));
-        }
-        node.action_attempt += 1;
-        self.mark_graph_dirty();
-        Ok(())
-    }
-
-    /// Return and clear the graph dirty bit for durable execution.
-    ///
-    /// Only action nodes and their retry parameters must be persisted; other
-    /// nodes are deterministic from the ground-truth DAG definition.
-    pub fn consume_graph_dirty_for_durable_execution(&mut self) -> bool {
-        let dirty = self.graph_dirty;
-        self.graph_dirty = false;
-        dirty
-    }
-
-    pub fn add_edge(&mut self, source: Uuid, target: Uuid, edge_type: EdgeType) {
-        self.register_edge(ExecutionEdge {
-            source,
-            target,
-            edge_type,
-        });
-    }
-
-    /// Insert a node into the runtime bookkeeping and optional control flow.
-    ///
-    /// Use this for all queued nodes so the ready queue, timeline, and implicit
-    /// state-machine edge ordering remain consistent.
-    ///
-    /// Example:
-    /// - queue node A then node B with link_queued_nodes=True
-    ///   This creates a state-machine edge A -> B automatically.
-    fn register_node(&mut self, node: ExecutionNode) -> Result<(), RunnerStateError> {
-        if self.nodes.contains_key(&node.node_id) {
-            return Err(RunnerStateError(format!(
-                "execution node already queued: {}",
-                node.node_id
-            )));
-        }
-        self.nodes.insert(node.node_id, node.clone());
-        self.ready_queue.push(node.node_id);
-        if node.is_action_call() {
-            self.mark_graph_dirty();
-        }
-        if self.link_queued_nodes
-            && let Some(last) = self.timeline.last()
-        {
-            self.register_edge(ExecutionEdge {
-                source: *last,
-                target: node.node_id,
-                edge_type: EdgeType::StateMachine,
-            });
-        }
-        self.timeline.push(node.node_id);
-        Ok(())
-    }
-
-    fn register_edge(&mut self, edge: ExecutionEdge) {
-        self.edges.insert(edge);
-    }
-
-    fn mark_graph_dirty(&mut self) {
-        self.graph_dirty = true;
-    }
-
-    /// Rebuild derived structures from persisted nodes and edges.
-    ///
-    /// Use this when loading a snapshot so timeline ordering, latest assignment
-    /// tracking, and ready queue reflect the current node set.
-    ///
-    /// Example:
-    /// - Given nodes {A, B} and edge A -> B, rehydration restores timeline
-    ///   [A, B] and marks the latest assignment targets from node B.
-    fn rehydrate_state(&mut self) {
-        self.timeline = self.build_timeline();
-        self.latest_assignments.clear();
-        for node_id in &self.timeline {
-            if let Some(node) = self.nodes.get(node_id) {
-                for target in node.assignments.keys() {
-                    self.latest_assignments.insert(target.clone(), *node_id);
-                }
-            }
-        }
-        if self.ready_queue.is_empty() {
-            self.ready_queue = self
-                .timeline
-                .iter()
-                .filter(|node_id| {
-                    self.nodes
-                        .get(node_id)
-                        .map(|node| node.status == NodeStatus::Queued)
-                        .unwrap_or(false)
-                })
-                .cloned()
-                .collect();
-        }
-    }
-
-    fn build_timeline(&self) -> Vec<Uuid> {
-        if self.edges.is_empty() {
-            return self.nodes.keys().cloned().collect();
-        }
-        let mut adjacency: HashMap<Uuid, Vec<Uuid>> = self
-            .nodes
-            .keys()
-            .map(|node_id| (*node_id, Vec::new()))
-            .collect();
-        let mut in_degree: HashMap<Uuid, usize> =
-            self.nodes.keys().map(|node_id| (*node_id, 0)).collect();
-        let mut edges: Vec<&ExecutionEdge> = self.edges.iter().collect();
-        edges.sort_by_key(|edge| (edge.source, edge.target));
-        for edge in edges {
-            if edge.edge_type != EdgeType::StateMachine {
-                continue;
-            }
-            if adjacency.contains_key(&edge.source) && adjacency.contains_key(&edge.target) {
-                adjacency.entry(edge.source).or_default().push(edge.target);
-                *in_degree.entry(edge.target).or_insert(0) += 1;
-            }
-        }
-        let mut queue: Vec<Uuid> = in_degree
-            .iter()
-            .filter(|(_, degree)| **degree == 0)
-            .map(|(node_id, _)| *node_id)
-            .collect();
-        queue.sort_by_key(|id| id.to_string());
-        let mut order: Vec<Uuid> = Vec::new();
-        while !queue.is_empty() {
-            let node_id = queue.remove(0);
-            order.push(node_id);
-            if let Some(neighbors) = adjacency.get(&node_id) {
-                let mut sorted = neighbors.clone();
-                sorted.sort_by_key(|id| id.to_string());
-                for neighbor in sorted {
-                    if let Some(degree) = in_degree.get_mut(&neighbor) {
-                        *degree -= 1;
-                        if *degree == 0 {
-                            queue.push(neighbor);
-                        }
-                    }
-                }
-                queue.sort_by_key(|id| id.to_string());
-            }
-        }
-        let mut remaining: Vec<Uuid> = self
-            .nodes
-            .keys()
-            .filter(|node_id| !order.contains(node_id))
-            .cloned()
-            .collect();
-        remaining.sort_by_key(|id| id.to_string());
-        order.extend(remaining);
-        order
-    }
-
-    fn get_node_mut(&mut self, node_id: Uuid) -> Result<&mut ExecutionNode, RunnerStateError> {
-        self.nodes
-            .get_mut(&node_id)
-            .ok_or_else(|| RunnerStateError(format!("execution node not found: {node_id}")))
-    }
-
-    fn node_targets(&self, node: &DAGNode) -> Vec<String> {
-        match node {
-            DAGNode::Assignment(AssignmentNode {
-                targets, target, ..
-            }) => {
-                if !targets.is_empty() {
-                    return targets.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            DAGNode::ActionCall(ActionCallNode {
-                targets, target, ..
-            }) => {
-                if let Some(list) = targets
-                    && !list.is_empty()
-                {
-                    return list.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            DAGNode::FnCall(FnCallNode {
-                targets, target, ..
-            }) => {
-                if let Some(list) = targets
-                    && !list.is_empty()
-                {
-                    return list.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            DAGNode::Join(JoinNode {
-                targets, target, ..
-            }) => {
-                if let Some(list) = targets
-                    && !list.is_empty()
-                {
-                    return list.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            DAGNode::Aggregator(AggregatorNode {
-                targets, target, ..
-            }) => {
-                if let Some(list) = targets
-                    && !list.is_empty()
-                {
-                    return list.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            DAGNode::Return(ReturnNode {
-                targets, target, ..
-            }) => {
-                if let Some(list) = targets
-                    && !list.is_empty()
-                {
-                    return list.clone();
-                }
-                target.clone().map(|item| vec![item]).unwrap_or_default()
-            }
-            _ => Vec::new(),
-        }
-    }
-
-    /// Apply DAG template semantics to a queued execution node.
-    ///
-    /// Use this right after queue_template_node so assignments, action result
-    /// references, and data-flow edges are populated from the template.
-    ///
-    /// Example IR:
-    /// - total = @sum(values=items)
-    ///   The ActionCallNode template produces an ActionResultValue and defines
-    ///   total via assignments on the execution node.
-    fn apply_template_node(
-        &mut self,
-        exec_node: &ExecutionNode,
-        template: &DAGNode,
-        iteration_index: Option<i32>,
-    ) -> Result<(), RunnerStateError> {
-        match template {
-            DAGNode::Assignment(AssignmentNode {
-                assign_expr: Some(expr),
-                ..
-            }) => {
-                let value_expr = self.expr_to_value(expr, None)?;
-                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
-                    node_mut.value_expr = Some(value_expr.clone());
-                }
-                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
-                let assignments =
-                    self.build_assignments(&self.node_targets(template), &value_expr)?;
-                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
-                    node.assignments.extend(assignments.clone());
-                }
-                self.mark_latest_assignments(exec_node.node_id, &assignments);
-                return Ok(());
-            }
-            DAGNode::ActionCall(ActionCallNode {
-                action_name,
-                targets,
-                target,
-                ..
-            }) => {
-                let kwarg_values = self
-                    .nodes
-                    .get(&exec_node.node_id)
-                    .and_then(|node| node.action.as_ref())
-                    .map(|action| action.kwargs.values().cloned().collect::<Vec<_>>())
-                    .unwrap_or_default();
-                for expr in &kwarg_values {
-                    self.record_data_flow_from_value(exec_node.node_id, expr);
-                }
-                let targets = targets
-                    .clone()
-                    .or_else(|| target.clone().map(|item| vec![item]));
-                let result = self.assign_action_results(
-                    exec_node,
-                    action_name,
-                    targets.as_deref(),
-                    iteration_index,
-                    true,
-                )?;
-                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
-                    node_mut.value_expr = Some(ValueExpr::ActionResult(result));
-                }
-                return Ok(());
-            }
-            DAGNode::Sleep(SleepNode {
-                duration_expr: Some(expr),
-                ..
-            }) => {
-                let value_expr = self.expr_to_value(expr, None)?;
-                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
-                    node_mut.value_expr = Some(value_expr.clone());
-                }
-                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
-                return Ok(());
-            }
-            DAGNode::FnCall(FnCallNode {
-                assign_expr: Some(expr),
-                ..
-            }) => {
-                let value_expr = self.expr_to_value(expr, None)?;
-                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
-                    node_mut.value_expr = Some(value_expr.clone());
-                }
-                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
-                let assignments =
-                    self.build_assignments(&self.node_targets(template), &value_expr)?;
-                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
-                    node.assignments.extend(assignments.clone());
-                }
-                self.mark_latest_assignments(exec_node.node_id, &assignments);
-                return Ok(());
-            }
-            DAGNode::Return(ReturnNode {
-                assign_expr: Some(expr),
-                target,
-                ..
-            }) => {
-                let value_expr = self.expr_to_value(expr, None)?;
-                if let Some(node_mut) = self.nodes.get_mut(&exec_node.node_id) {
-                    node_mut.value_expr = Some(value_expr.clone());
-                }
-                self.record_data_flow_from_value(exec_node.node_id, &value_expr);
-                let target = target.clone().unwrap_or_else(|| "result".to_string());
-                let assignments = self.build_assignments(&[target], &value_expr)?;
-                if let Some(node) = self.nodes.get_mut(&exec_node.node_id) {
-                    node.assignments.extend(assignments.clone());
-                }
-                self.mark_latest_assignments(exec_node.node_id, &assignments);
-                return Ok(());
-            }
-            _ => {}
-        }
-        Ok(())
-    }
-
-    /// Create symbolic action results and map them to targets.
-    ///
-    /// Use this when an action produces one or more results that are assigned
-    /// to variables (including tuple unpacking).
-    ///
-    /// `update_latest` controls whether assigned targets are published into
-    /// `latest_assignments` for downstream variable/data-flow resolution.
-    ///
-    /// Use `update_latest = true` for user-visible assignments so later nodes
-    /// can resolve those target names through `latest_assignments`.
-    ///
-    /// Use `update_latest = false` for internal/synthetic bindings that should
-    /// not become globally visible variable definitions. Example: spread action
-    /// unroll nodes can bind an internal `_spread_result`, and the aggregator
-    /// later publishes the final user target.
-    ///
-    /// Example IR:
-    /// - a, b = @pair()
-    ///   This yields ActionResultValue(node_id, result_index=0/1) for a and b.
-    pub(crate) fn assign_action_results(
-        &mut self,
-        node: &ExecutionNode,
-        action_name: &str,
-        targets: Option<&[String]>,
-        iteration_index: Option<i32>,
-        update_latest: bool,
-    ) -> Result<ActionResultValue, RunnerStateError> {
-        let result_ref = ActionResultValue {
-            node_id: node.node_id,
-            action_name: action_name.to_string(),
-            iteration_index,
-            result_index: None,
-        };
-        let targets = targets.unwrap_or(&[]);
-        let assignments =
-            self.build_assignments(targets, &ValueExpr::ActionResult(result_ref.clone()))?;
-        if !assignments.is_empty() {
-            if let Some(node) = self.nodes.get_mut(&node.node_id) {
-                node.assignments.extend(assignments.clone());
-            }
-            if update_latest {
-                self.mark_latest_assignments(node.node_id, &assignments);
-            }
-        }
-        Ok(result_ref)
-    }
-
-    /// Expand an assignment into per-target symbolic values.
-    ///
-    /// Use this for single-target assignments, tuple unpacking, and action
-    /// multi-result binding to keep definitions explicit.
-    ///
-    /// Example IR:
-    /// - a, b = [1, 2]
-    ///   Produces {"a": LiteralValue(1), "b": LiteralValue(2)}.
-    fn build_assignments(
-        &self,
-        targets: &[String],
-        value: &ValueExpr,
-    ) -> Result<HashMap<String, ValueExpr>, RunnerStateError> {
-        if targets.is_empty() {
-            return Ok(HashMap::new());
-        }
-        if targets.len() == 1 {
-            let mut map = HashMap::new();
-            // Keep single-target assignments symbolic to avoid recursively
-            // embedding prior values into each update (which can explode
-            // persisted runner_instances.state size/depth in loops).
-            map.insert(targets[0].clone(), value.clone());
-            return Ok(map);
-        }
-        let value = self.materialize_value(value.clone());
-
-        match value {
-            ValueExpr::List(ListValue { elements }) => {
-                if elements.len() != targets.len() {
-                    return Err(RunnerStateError("tuple unpacking mismatch".to_string()));
-                }
-                let mut map = HashMap::new();
-                for (target, item) in targets.iter().zip(elements.into_iter()) {
-                    map.insert(target.clone(), item);
-                }
-                Ok(map)
-            }
-            ValueExpr::ActionResult(action_value) => {
-                let mut map = HashMap::new();
-                for (idx, target) in targets.iter().enumerate() {
-                    map.insert(
-                        target.clone(),
-                        ValueExpr::ActionResult(ActionResultValue {
-                            node_id: action_value.node_id,
-                            action_name: action_value.action_name.clone(),
-                            iteration_index: action_value.iteration_index,
-                            result_index: Some(idx as i32),
-                        }),
-                    );
-                }
-                Ok(map)
-            }
-            ValueExpr::FunctionCall(func_value) => {
-                let mut map = HashMap::new();
-                for (idx, target) in targets.iter().enumerate() {
-                    map.insert(
-                        target.clone(),
-                        ValueExpr::Index(IndexValue {
-                            object: Box::new(ValueExpr::FunctionCall(func_value.clone())),
-                            index: Box::new(ValueExpr::Literal(LiteralValue {
-                                value: serde_json::Value::Number((idx as i64).into()),
-                            })),
-                        }),
-                    );
-                }
-                Ok(map)
-            }
-            ValueExpr::Index(index_value) => {
-                let mut map = HashMap::new();
-                for (idx, target) in targets.iter().enumerate() {
-                    map.insert(
-                        target.clone(),
-                        ValueExpr::Index(IndexValue {
-                            object: Box::new(ValueExpr::Index(index_value.clone())),
-                            index: Box::new(ValueExpr::Literal(LiteralValue {
-                                value: serde_json::Value::Number((idx as i64).into()),
-                            })),
-                        }),
-                    );
-                }
-                Ok(map)
-            }
-            _ => Err(RunnerStateError("tuple unpacking mismatch".to_string())),
-        }
-    }
-
-    /// Inline variable references and apply light constant folding.
-    ///
-    /// Use this before storing assignments so values are self-contained and
-    /// list concatenations are simplified.
-    ///
-    /// Example IR:
-    /// - xs = [1]
-    /// - ys = xs + [2]
-    ///   Materialization turns ys into ListValue([1, 2]) rather than keeping xs.
-    pub(crate) fn materialize_value(&self, value: ValueExpr) -> ValueExpr {
-        let resolved = resolve_value_tree(&value, &|name, seen| {
-            self.resolve_variable_value(name, seen)
-        });
-        if let ValueExpr::BinaryOp(BinaryOpValue { left, op, right }) = &resolved
-            && ir::BinaryOperator::try_from(*op).ok() == Some(ir::BinaryOperator::BinaryOpAdd)
-            && let (ValueExpr::List(left_list), ValueExpr::List(right_list)) = (&**left, &**right)
-        {
-            let mut elements = left_list.elements.clone();
-            elements.extend(right_list.elements.clone());
-            return ValueExpr::List(ListValue { elements });
-        }
-        resolved
-    }
-
-    /// Resolve a variable name to its latest symbolic definition.
-    ///
-    /// Use this when materializing expressions so variables become their
-    /// defining expression while guarding against cycles.
-    ///
-    /// Example IR:
-    /// - x = 1
-    /// - y = x + 2
-    ///   When materializing y, the VariableValue("x") is replaced with the
-    ///   LiteralValue(1), yielding a BinaryOpValue(1 + 2) instead of a reference
-    ///   to x. This makes downstream replay use the symbolic expression rather
-    ///   than requiring a separate variable lookup.
-    fn resolve_variable_value(&self, name: &str, seen: &mut HashSet<String>) -> ValueExpr {
-        if seen.contains(name) {
-            return ValueExpr::Variable(VariableValue {
-                name: name.to_string(),
-            });
-        }
-        let node_id = match self.latest_assignments.get(name) {
-            Some(node_id) => *node_id,
-            None => {
-                return ValueExpr::Variable(VariableValue {
-                    name: name.to_string(),
-                });
-            }
-        };
-        let node = match self.nodes.get(&node_id) {
-            Some(node) => node,
-            None => {
-                return ValueExpr::Variable(VariableValue {
-                    name: name.to_string(),
-                });
-            }
-        };
-        let assigned = match node.assignments.get(name) {
-            Some(value) => value.clone(),
-            None => {
-                return ValueExpr::Variable(VariableValue {
-                    name: name.to_string(),
-                });
-            }
-        };
-        // Avoid inlining self-referential updates such as `i = i + 1`.
-        // Returning the raw assignment here would inject one "extra step"
-        // into materialized consumers (e.g. loop guards), causing off-by-one
-        // behavior and deep recursive expression trees.
-        if value_expr_contains_variable(&assigned, name) {
-            return ValueExpr::Variable(VariableValue {
-                name: name.to_string(),
-            });
-        }
-        if let ValueExpr::Variable(var) = &assigned {
-            seen.insert(name.to_string());
-            return self.resolve_variable_value(&var.name, seen);
-        }
-        assigned
-    }
-
-    pub(crate) fn mark_latest_assignments(
-        &mut self,
-        node_id: Uuid,
-        assignments: &HashMap<String, ValueExpr>,
-    ) {
-        for target in assignments.keys() {
-            self.latest_assignments.insert(target.clone(), node_id);
-        }
-    }
-
-    /// Add data-flow edges implied by a value expression.
-    ///
-    /// Use this when a node consumes an expression so upstream dependencies are
-    /// encoded in the runtime graph.
-    ///
-    /// Example IR:
-    /// - total = @sum(values)
-    ///   A data-flow edge is added from the values assignment node to the action.
-    pub(crate) fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) {
-        let source_ids =
-            collect_value_sources(value, &|name| self.latest_assignments.get(name).copied());
-        self.record_data_flow_edges(node_id, &source_ids);
-    }
-
-    /// Register data-flow edges from sources to the given node.
-    ///
-    /// Example:
-    /// - sources {A, B} and node C produce edges A -> C and B -> C.
-    fn record_data_flow_edges(&mut self, node_id: Uuid, source_ids: &HashSet<Uuid>) {
-        for source_id in source_ids {
-            if *source_id == node_id {
-                continue;
-            }
-            self.register_edge(ExecutionEdge {
-                source: *source_id,
-                target: node_id,
-                edge_type: EdgeType::DataFlow,
-            });
-        }
-    }
-
-    /// Convert an IR expression into a symbolic ValueExpr tree.
-    ///
-    /// Use this when interpreting IR statements or DAG templates into the
-    /// runtime state; it queues actions and spreads as needed.
-    ///
-    /// Example IR:
-    /// - total = base + 1
-    ///   Produces BinaryOpValue(VariableValue("base"), LiteralValue(1)).
-    pub fn expr_to_value(
-        &mut self,
-        expr: &ir::Expr,
-        local_scope: Option<&HashMap<String, ValueExpr>>,
-    ) -> Result<ValueExpr, RunnerStateError> {
-        match expr.kind.as_ref() {
-            Some(ir::expr::Kind::Literal(lit)) => Ok(ValueExpr::Literal(LiteralValue {
-                value: literal_value(lit),
-            })),
-            Some(ir::expr::Kind::Variable(var)) => {
-                if let Some(scope) = local_scope
-                    && let Some(value) = scope.get(&var.name)
-                {
-                    return Ok(value.clone());
-                }
-                Ok(ValueExpr::Variable(VariableValue {
-                    name: var.name.clone(),
-                }))
-            }
-            Some(ir::expr::Kind::BinaryOp(op)) => {
-                let left = op
-                    .left
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("binary op missing left".to_string()))?;
-                let right = op
-                    .right
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("binary op missing right".to_string()))?;
-                let left_value = self.expr_to_value(left, local_scope)?;
-                let right_value = self.expr_to_value(right, local_scope)?;
-                Ok(self.binary_op_value(op.op, left_value, right_value))
-            }
-            Some(ir::expr::Kind::UnaryOp(op)) => {
-                let operand = op
-                    .operand
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("unary op missing operand".to_string()))?;
-                let operand_value = self.expr_to_value(operand, local_scope)?;
-                Ok(self.unary_op_value(op.op, operand_value))
-            }
-            Some(ir::expr::Kind::List(list)) => {
-                let elements = list
-                    .elements
-                    .iter()
-                    .map(|item| self.expr_to_value(item, local_scope))
-                    .collect::<Result<Vec<ValueExpr>, RunnerStateError>>()?;
-                Ok(ValueExpr::List(ListValue { elements }))
-            }
-            Some(ir::expr::Kind::Dict(dict_expr)) => {
-                let mut entries = Vec::new();
-                for entry in &dict_expr.entries {
-                    let key_expr = entry
-                        .key
-                        .as_ref()
-                        .ok_or_else(|| RunnerStateError("dict entry missing key".to_string()))?;
-                    let value_expr = entry
-                        .value
-                        .as_ref()
-                        .ok_or_else(|| RunnerStateError("dict entry missing value".to_string()))?;
-                    entries.push(DictEntryValue {
-                        key: self.expr_to_value(key_expr, local_scope)?,
-                        value: self.expr_to_value(value_expr, local_scope)?,
-                    });
-                }
-                Ok(ValueExpr::Dict(DictValue { entries }))
-            }
-            Some(ir::expr::Kind::Index(index)) => {
-                let object = index
-                    .object
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("index access missing object".to_string()))?;
-                let index_expr = index
-                    .index
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("index access missing index".to_string()))?;
-                let object_value = self.expr_to_value(object, local_scope)?;
-                let index_value = self.expr_to_value(index_expr, local_scope)?;
-                Ok(self.index_value(object_value, index_value))
-            }
-            Some(ir::expr::Kind::Dot(dot)) => {
-                let object = dot
-                    .object
-                    .as_ref()
-                    .ok_or_else(|| RunnerStateError("dot access missing object".to_string()))?;
-                Ok(ValueExpr::Dot(DotValue {
-                    object: Box::new(self.expr_to_value(object, local_scope)?),
-                    attribute: dot.attribute.clone(),
-                }))
-            }
-            Some(ir::expr::Kind::FunctionCall(call)) => {
-                let args = call
-                    .args
-                    .iter()
-                    .map(|arg| self.expr_to_value(arg, local_scope))
-                    .collect::<Result<Vec<ValueExpr>, RunnerStateError>>()?;
-                let mut kwargs = HashMap::new();
-                for kw in &call.kwargs {
-                    if let Some(value) = &kw.value {
-                        kwargs.insert(kw.name.clone(), self.expr_to_value(value, local_scope)?);
-                    }
-                }
-                let global_fn = if call.global_function != 0 {
-                    Some(call.global_function)
-                } else {
-                    None
-                };
-                Ok(ValueExpr::FunctionCall(FunctionCallValue {
-                    name: call.name.clone(),
-                    args,
-                    kwargs,
-                    global_function: global_fn,
-                }))
-            }
-            Some(ir::expr::Kind::ActionCall(action)) => {
-                let result = self.queue_action_call(action, None, None, local_scope)?;
-                Ok(ValueExpr::ActionResult(result))
-            }
-            Some(ir::expr::Kind::ParallelExpr(parallel)) => {
-                let mut calls = Vec::new();
-                for call in &parallel.calls {
-                    calls.push(self.call_to_value(call, local_scope)?);
-                }
-                Ok(ValueExpr::List(ListValue { elements: calls }))
-            }
-            Some(ir::expr::Kind::SpreadExpr(spread)) => self.spread_expr_value(spread, local_scope),
-            None => Ok(ValueExpr::Literal(LiteralValue {
-                value: serde_json::Value::Null,
-            })),
-        }
-    }
-
-    /// Convert an IR call (action/function) into a ValueExpr.
-    ///
-    /// Use this for parallel expressions that contain mixed call types.
-    ///
-    /// Example IR:
-    /// - parallel { @double(x), helper(x) }
-    ///   Action calls become ActionResultValue nodes; function calls become
-    ///   FunctionCallValue expressions.
-    fn call_to_value(
-        &mut self,
-        call: &ir::Call,
-        local_scope: Option<&HashMap<String, ValueExpr>>,
-    ) -> Result<ValueExpr, RunnerStateError> {
-        match call.kind.as_ref() {
-            Some(ir::call::Kind::Action(action)) => Ok(ValueExpr::ActionResult(
-                self.queue_action_call(action, None, None, local_scope)?,
-            )),
-            Some(ir::call::Kind::Function(function)) => self.expr_to_value(
-                &ir::Expr {
-                    kind: Some(ir::expr::Kind::FunctionCall(function.clone())),
-                    span: None,
-                },
-                local_scope,
-            ),
-            None => Ok(ValueExpr::Literal(LiteralValue {
-                value: serde_json::Value::Null,
-            })),
-        }
-    }
-
-    /// Materialize a spread expression into concrete calls or a symbolic spread.
-    ///
-    /// Use this when converting IR spreads so known list collections unroll to
-    /// explicit action calls, while unknown collections stay symbolic.
-    ///
-    /// Example IR:
-    /// - spread [1, 2]:item -> @double(value=item)
-    ///   Produces a ListValue of ActionResultValue entries for each item.
-    fn spread_expr_value(
-        &mut self,
-        spread: &ir::SpreadExpr,
-        local_scope: Option<&HashMap<String, ValueExpr>>,
-    ) -> Result<ValueExpr, RunnerStateError> {
-        let collection = self.expr_to_value(
-            spread
-                .collection
-                .as_ref()
-                .ok_or_else(|| RunnerStateError("spread collection missing".to_string()))?,
-            local_scope,
-        )?;
-        if let ValueExpr::List(list) = &collection {
-            let mut results = Vec::new();
-            for (idx, item) in list.elements.iter().enumerate() {
-                let mut scope = HashMap::new();
-                scope.insert(spread.loop_var.clone(), item.clone());
-                let result = self.queue_action_call(
-                    spread
-                        .action
-                        .as_ref()
-                        .ok_or_else(|| RunnerStateError("spread action missing".to_string()))?,
-                    None,
-                    Some(idx as i32),
-                    Some(&scope),
-                )?;
-                results.push(ValueExpr::ActionResult(result));
-            }
-            return Ok(ValueExpr::List(ListValue { elements: results }));
-        }
-
-        let action_spec = self.action_spec_from_ir(
-            spread
-                .action
-                .as_ref()
-                .ok_or_else(|| RunnerStateError("spread action missing".to_string()))?,
-            None,
-        );
-        Ok(ValueExpr::Spread(SpreadValue {
-            collection: Box::new(collection),
-            loop_var: spread.loop_var.clone(),
-            action: action_spec,
-        }))
-    }
-
-    /// Build a binary-op value with simple constant folding.
-    ///
-    /// Use this when converting IR so literals and list concatenations are
-    /// simplified early.
-    ///
-    /// Example IR:
-    /// - total = 1 + 2
-    ///   Produces LiteralValue(3) instead of a BinaryOpValue.
-    fn binary_op_value(&self, op: i32, left: ValueExpr, right: ValueExpr) -> ValueExpr {
-        if ir::BinaryOperator::try_from(op).ok() == Some(ir::BinaryOperator::BinaryOpAdd)
-            && let (ValueExpr::List(left_list), ValueExpr::List(right_list)) = (&left, &right)
-        {
-            let mut elements = left_list.elements.clone();
-            elements.extend(right_list.elements.clone());
-            return ValueExpr::List(ListValue { elements });
-        }
-        if let (ValueExpr::Literal(left_val), ValueExpr::Literal(right_val)) = (&left, &right)
-            && let Some(folded) = fold_literal_binary(op, &left_val.value, &right_val.value)
-        {
-            return ValueExpr::Literal(LiteralValue { value: folded });
-        }
-        ValueExpr::BinaryOp(BinaryOpValue {
-            left: Box::new(left),
-            op,
-            right: Box::new(right),
-        })
-    }
-
-    /// Build a unary-op value with constant folding for literals.
-    ///
-    /// Example IR:
-    /// - neg = -1
-    ///   Produces LiteralValue(-1) instead of UnaryOpValue.
-    fn unary_op_value(&self, op: i32, operand: ValueExpr) -> ValueExpr {
-        if let ValueExpr::Literal(lit) = &operand
-            && let Some(folded) = fold_literal_unary(op, &lit.value)
-        {
-            return ValueExpr::Literal(LiteralValue { value: folded });
-        }
-        ValueExpr::UnaryOp(UnaryOpValue {
-            op,
-            operand: Box::new(operand),
-        })
-    }
-
-    /// Build an index value, folding list literals when possible.
-    ///
-    /// Example IR:
-    /// - first = [10, 20][0]
-    ///   Produces LiteralValue(10) when the list is fully literal.
-    fn index_value(&self, object: ValueExpr, index: ValueExpr) -> ValueExpr {
-        if let (ValueExpr::List(list), ValueExpr::Literal(idx)) = (&object, &index)
-            && let Some(idx) = idx.value.as_i64()
-            && idx >= 0
-            && (idx as usize) < list.elements.len()
-        {
-            return list.elements[idx as usize].clone();
-        }
-        ValueExpr::Index(IndexValue {
-            object: Box::new(object),
-            index: Box::new(index),
-        })
-    }
-
-    /// Extract an action call spec from a DAG node.
-    ///
-    /// Use this when queueing nodes from the DAG template.
-    ///
-    /// Example:
-    /// - ActionCallNode(action_name="double", kwargs={"value": "$x"})
-    ///   Produces ActionCallSpec(action_name="double", kwargs={"value": VariableValue("x")}).
-    fn action_spec_from_node(&mut self, node: &ActionCallNode) -> ActionCallSpec {
-        let kwargs = node
-            .kwarg_exprs
-            .iter()
-            .map(|(name, expr)| (name.clone(), self.expr_to_value(expr, None).unwrap()))
-            .collect();
-        ActionCallSpec {
-            action_name: node.action_name.clone(),
-            module_name: node.module_name.clone(),
-            kwargs,
-        }
-    }
-
-    /// Extract an action call spec from IR, applying local scope bindings.
-    ///
-    /// Example IR:
-    /// - @double(value=item) with local_scope["item"]=LiteralValue(2)
-    ///   Produces kwargs {"value": LiteralValue(2)}.
-    fn action_spec_from_ir(
-        &mut self,
-        action: &ir::ActionCall,
-        local_scope: Option<&HashMap<String, ValueExpr>>,
-    ) -> ActionCallSpec {
-        let kwargs = action
-            .kwargs
-            .iter()
-            .filter_map(|kw| kw.value.as_ref().map(|value| (kw.name.clone(), value)))
-            .map(|(name, value)| (name, self.expr_to_value(value, local_scope).unwrap()))
-            .collect();
-        ActionCallSpec {
-            action_name: action.action_name.clone(),
-            module_name: action.module_name.clone(),
-            kwargs,
-        }
-    }
-
-    /// Queue an action call from raw parameters and return a symbolic result.
-    ///
-    /// Use this when constructing runner state programmatically without IR
-    /// objects, while still wiring data-flow edges and assignments.
-    ///
-    /// Example:
-    /// - queue_action("double", targets=["out"], kwargs={"value": LiteralValue(2)})
-    ///   Defines out via an ActionResultValue and records data-flow from the literal.
-    pub fn queue_action(
-        &mut self,
-        action_name: &str,
-        targets: Option<Vec<String>>,
-        kwargs: Option<HashMap<String, ValueExpr>>,
-        module_name: Option<String>,
-        iteration_index: Option<i32>,
-    ) -> Result<ActionResultValue, RunnerStateError> {
-        let spec = ActionCallSpec {
-            action_name: action_name.to_string(),
-            module_name,
-            kwargs: kwargs.unwrap_or_default(),
-        };
-        let node = self.queue_node(
-            ExecutionNodeType::ActionCall.as_str(),
-            &format!("@{}()", spec.action_name),
-            QueueNodeParams {
-                targets: targets.clone(),
-                action: Some(spec.clone()),
-                ..QueueNodeParams::default()
-            },
-        )?;
-        for value in spec.kwargs.values() {
-            self.record_data_flow_from_value(node.node_id, value);
-        }
-        let result = self.assign_action_results(
-            &node,
-            &spec.action_name,
-            targets.as_deref(),
-            iteration_index,
-            true,
-        )?;
-        if let Some(node) = self.nodes.get_mut(&node.node_id) {
-            node.value_expr = Some(ValueExpr::ActionResult(result.clone()));
-        }
-        Ok(result)
-    }
-
-    /// Record an IR assignment as a runtime node with symbolic values.
-    ///
-    /// Use this when interpreting IR statements into the unrolled runtime graph.
-    ///
-    /// Example IR:
-    /// - results = []
-    ///   Produces an assignment node with targets ["results"] and a ListValue([]).
-    pub fn record_assignment(
-        &mut self,
-        targets: Vec<String>,
-        expr: &ir::Expr,
-        node_id: Option<Uuid>,
-        label: Option<String>,
-    ) -> Result<ExecutionNode, RunnerStateError> {
-        let value_expr = self.expr_to_value(expr, None)?;
-        self.record_assignment_value(targets, value_expr, node_id, label)
-    }
-
-    /// Record a symbolic assignment node and update data-flow/definitions.
-    ///
-    /// Use this for assignments created programmatically after ValueExpr
-    /// construction (tests or state rewrites).
-    ///
-    /// Example:
-    /// - record_assignment_value(targets=["x"], value_expr=LiteralValue(1))
-    ///   Creates an assignment node with x bound to LiteralValue(1).
-    pub fn record_assignment_value(
-        &mut self,
-        targets: Vec<String>,
-        value_expr: ValueExpr,
-        node_id: Option<Uuid>,
-        label: Option<String>,
-    ) -> Result<ExecutionNode, RunnerStateError> {
-        let exec_node_id = node_id.unwrap_or_else(Uuid::new_v4);
-        let node = self.queue_node(
-            "assignment",
-            label.as_deref().unwrap_or("assignment"),
-            QueueNodeParams {
-                node_id: Some(exec_node_id),
-                targets: Some(targets.clone()),
-                value_expr: Some(value_expr.clone()),
-                ..QueueNodeParams::default()
-            },
-        )?;
-        self.record_data_flow_from_value(exec_node_id, &value_expr);
-        let assignments = self.build_assignments(&targets, &value_expr)?;
-        if let Some(node_mut) = self.nodes.get_mut(&node.node_id) {
-            node_mut.assignments.extend(assignments.clone());
-        }
-        self.mark_latest_assignments(node.node_id, &assignments);
-        Ok(node)
-    }
-}
-
-/// Render a ValueExpr to a python-like string for debugging/visualization.
-///
-/// Example:
-/// - BinaryOpValue(VariableValue("a"), +, LiteralValue(1)) -> "a + 1"
-pub fn format_value(expr: &ValueExpr) -> String {
-    format_value_inner(expr, 0)
-}
-
-/// Recursive ValueExpr formatter with operator precedence handling.
-///
-/// Example:
-/// - (a + b) * c renders with parentheses when needed.
-fn format_value_inner(expr: &ValueExpr, parent_prec: i32) -> String {
-    match expr {
-        ValueExpr::Literal(lit) => format_literal(&lit.value),
-        ValueExpr::Variable(var) => var.name.clone(),
-        ValueExpr::ActionResult(value) => value.label(),
-        ValueExpr::BinaryOp(value) => {
-            let (op_str, prec) = binary_operator(value.op);
-            let left = format_value_inner(&value.left, prec);
-            let right = format_value_inner(&value.right, prec + 1);
-            let rendered = format!("{left} {op_str} {right}");
-            if prec < parent_prec {
-                format!("({rendered})")
-            } else {
-                rendered
-            }
-        }
-        ValueExpr::UnaryOp(value) => {
-            let (op_str, prec) = unary_operator(value.op);
-            let operand = format_value_inner(&value.operand, prec);
-            let rendered = format!("{op_str}{operand}");
-            if prec < parent_prec {
-                format!("({rendered})")
-            } else {
-                rendered
-            }
-        }
-        ValueExpr::List(value) => {
-            let items: Vec<String> = value
-                .elements
-                .iter()
-                .map(|item| format_value_inner(item, 0))
-                .collect();
-            format!("[{}]", items.join(", "))
-        }
-        ValueExpr::Dict(value) => {
-            let entries: Vec<String> = value
-                .entries
-                .iter()
-                .map(|entry| {
-                    format!(
-                        "{}: {}",
-                        format_value_inner(&entry.key, 0),
-                        format_value_inner(&entry.value, 0)
-                    )
-                })
-                .collect();
-            format!("{{{}}}", entries.join(", "))
-        }
-        ValueExpr::Index(value) => {
-            let prec = precedence("index");
-            let obj = format_value_inner(&value.object, prec);
-            let idx = format_value_inner(&value.index, 0);
-            let rendered = format!("{obj}[{idx}]");
-            if prec < parent_prec {
-                format!("({rendered})")
-            } else {
-                rendered
-            }
-        }
-        ValueExpr::Dot(value) => {
-            let prec = precedence("dot");
-            let obj = format_value_inner(&value.object, prec);
-            let rendered = format!("{obj}.{}", value.attribute);
-            if prec < parent_prec {
-                format!("({rendered})")
-            } else {
-                rendered
-            }
-        }
-        ValueExpr::FunctionCall(value) => {
-            let mut args: Vec<String> = value
-                .args
-                .iter()
-                .map(|arg| format_value_inner(arg, 0))
-                .collect();
-            for (name, val) in &value.kwargs {
-                args.push(format!("{name}={}", format_value_inner(val, 0)));
-            }
-            format!("{}({})", value.name, args.join(", "))
-        }
-        ValueExpr::Spread(value) => {
-            let collection = format_value_inner(&value.collection, 0);
-            let mut args: Vec<String> = Vec::new();
-            for (name, val) in &value.action.kwargs {
-                args.push(format!("{name}={}", format_value_inner(val, 0)));
-            }
-            let call = format!("@{}({})", value.action.action_name, args.join(", "));
-            format!("spread {collection}:{} -> {call}", value.loop_var)
-        }
-    }
-}
-
-fn value_expr_contains_variable(expr: &ValueExpr, name: &str) -> bool {
-    match expr {
-        ValueExpr::Variable(var) => var.name == name,
-        ValueExpr::BinaryOp(value) => {
-            value_expr_contains_variable(&value.left, name)
-                || value_expr_contains_variable(&value.right, name)
-        }
-        ValueExpr::UnaryOp(value) => value_expr_contains_variable(&value.operand, name),
-        ValueExpr::List(value) => value
-            .elements
-            .iter()
-            .any(|item| value_expr_contains_variable(item, name)),
-        ValueExpr::Dict(value) => value.entries.iter().any(|entry| {
-            value_expr_contains_variable(&entry.key, name)
-                || value_expr_contains_variable(&entry.value, name)
-        }),
-        ValueExpr::Index(value) => {
-            value_expr_contains_variable(&value.object, name)
-                || value_expr_contains_variable(&value.index, name)
-        }
-        ValueExpr::Dot(value) => value_expr_contains_variable(&value.object, name),
-        ValueExpr::FunctionCall(value) => {
-            value
-                .args
-                .iter()
-                .any(|arg| value_expr_contains_variable(arg, name))
-                || value
-                    .kwargs
-                    .values()
-                    .any(|kwarg| value_expr_contains_variable(kwarg, name))
-        }
-        ValueExpr::Spread(value) => {
-            value_expr_contains_variable(&value.collection, name)
-                || value
-                    .action
-                    .kwargs
-                    .values()
-                    .any(|kwarg| value_expr_contains_variable(kwarg, name))
-        }
-        ValueExpr::Literal(_) | ValueExpr::ActionResult(_) => false,
-    }
-}
-
-/// Map binary operator enums to (symbol, precedence) for formatting.
-fn binary_operator(op: i32) -> (&'static str, i32) {
-    match ir::BinaryOperator::try_from(op).ok() {
-        Some(ir::BinaryOperator::BinaryOpOr) => ("or", 10),
-        Some(ir::BinaryOperator::BinaryOpAnd) => ("and", 20),
-        Some(ir::BinaryOperator::BinaryOpEq) => ("==", 30),
-        Some(ir::BinaryOperator::BinaryOpNe) => ("!=", 30),
-        Some(ir::BinaryOperator::BinaryOpLt) => ("<", 30),
-        Some(ir::BinaryOperator::BinaryOpLe) => ("<=", 30),
-        Some(ir::BinaryOperator::BinaryOpGt) => (">", 30),
-        Some(ir::BinaryOperator::BinaryOpGe) => (">=", 30),
-        Some(ir::BinaryOperator::BinaryOpIn) => ("in", 30),
-        Some(ir::BinaryOperator::BinaryOpNotIn) => ("not in", 30),
-        Some(ir::BinaryOperator::BinaryOpAdd) => ("+", 40),
-        Some(ir::BinaryOperator::BinaryOpSub) => ("-", 40),
-        Some(ir::BinaryOperator::BinaryOpMul) => ("*", 50),
-        Some(ir::BinaryOperator::BinaryOpDiv) => ("/", 50),
-        Some(ir::BinaryOperator::BinaryOpFloorDiv) => ("//", 50),
-        Some(ir::BinaryOperator::BinaryOpMod) => ("%", 50),
-        _ => ("?", 0),
-    }
-}
-
-/// Map unary operator enums to (symbol, precedence) for formatting.
-fn unary_operator(op: i32) -> (&'static str, i32) {
-    match ir::UnaryOperator::try_from(op).ok() {
-        Some(ir::UnaryOperator::UnaryOpNeg) => ("-", 60),
-        Some(ir::UnaryOperator::UnaryOpNot) => ("not ", 60),
-        _ => ("?", 0),
-    }
-}
-
-/// Return precedence for non-operator constructs like index/dot.
-fn precedence(kind: &str) -> i32 {
-    match kind {
-        "index" | "dot" => 80,
-        _ => 0,
-    }
-}
-
-/// Format Python literals as source-like text.
-fn format_literal(value: &serde_json::Value) -> String {
-    match value {
-        serde_json::Value::Null => "None".to_string(),
-        serde_json::Value::Bool(value) => {
-            if *value {
-                "True".to_string()
-            } else {
-                "False".to_string()
-            }
-        }
-        serde_json::Value::String(value) => {
-            serde_json::to_string(value).unwrap_or_else(|_| format!("\"{value}\""))
-        }
-        _ => value.to_string(),
-    }
-}
-
-/// Convert an IR literal into a Python value.
-///
-/// Example IR:
-/// - Literal(int_value=3) -> 3
-pub(crate) fn literal_value(lit: &ir::Literal) -> serde_json::Value {
-    match lit.value.as_ref() {
-        Some(ir::literal::Value::IntValue(value)) => serde_json::Value::Number((*value).into()),
-        Some(ir::literal::Value::FloatValue(value)) => serde_json::Number::from_f64(*value)
-            .map(serde_json::Value::Number)
-            .unwrap_or(serde_json::Value::Null),
-        Some(ir::literal::Value::StringValue(value)) => serde_json::Value::String(value.clone()),
-        Some(ir::literal::Value::BoolValue(value)) => serde_json::Value::Bool(*value),
-        Some(ir::literal::Value::IsNone(_)) => serde_json::Value::Null,
-        None => serde_json::Value::Null,
-    }
-}
-
-/// Try to fold a literal binary operation to a concrete value.
-///
-/// Example:
-/// - (1, 2, BINARY_OP_ADD) -> 3
-fn fold_literal_binary(
-    op: i32,
-    left: &serde_json::Value,
-    right: &serde_json::Value,
-) -> Option<serde_json::Value> {
-    match ir::BinaryOperator::try_from(op).ok() {
-        Some(ir::BinaryOperator::BinaryOpAdd) => {
-            if let (Some(left), Some(right)) = (left.as_i64(), right.as_i64()) {
-                return Some(serde_json::Value::Number((left + right).into()));
-            }
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                return serde_json::Number::from_f64(left + right).map(serde_json::Value::Number);
-            }
-            if let (Some(left), Some(right)) = (left.as_str(), right.as_str()) {
-                return Some(serde_json::Value::String(format!("{left}{right}")));
-            }
-            None
-        }
-        Some(ir::BinaryOperator::BinaryOpSub) => {
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                return serde_json::Number::from_f64(left - right).map(serde_json::Value::Number);
-            }
-            None
-        }
-        Some(ir::BinaryOperator::BinaryOpMul) => {
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                return serde_json::Number::from_f64(left * right).map(serde_json::Value::Number);
-            }
-            None
-        }
-        Some(ir::BinaryOperator::BinaryOpDiv) => {
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                return serde_json::Number::from_f64(left / right).map(serde_json::Value::Number);
-            }
-            None
-        }
-        Some(ir::BinaryOperator::BinaryOpFloorDiv) => {
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                if right == 0.0 {
-                    return None;
-                }
-                let value = (left / right).floor();
-                return serde_json::Number::from_f64(value).map(serde_json::Value::Number);
-            }
-            None
-        }
-        Some(ir::BinaryOperator::BinaryOpMod) => {
-            if let (Some(left), Some(right)) = (left.as_f64(), right.as_f64()) {
-                return serde_json::Number::from_f64(left % right).map(serde_json::Value::Number);
-            }
-            None
-        }
-        _ => None,
-    }
-}
-
-/// Try to fold a literal unary operation to a concrete value.
-///
-/// Example:
-/// - (UNARY_OP_NEG, 4) -> -4
-fn fold_literal_unary(op: i32, operand: &serde_json::Value) -> Option<serde_json::Value> {
-    match ir::UnaryOperator::try_from(op).ok() {
-        Some(ir::UnaryOperator::UnaryOpNeg) => operand
-            .as_f64()
-            .and_then(|value| serde_json::Number::from_f64(-value).map(serde_json::Value::Number)),
-        Some(ir::UnaryOperator::UnaryOpNot) => Some(serde_json::Value::Bool(!is_truthy(operand))),
-        _ => None,
-    }
-}
-
-impl fmt::Display for NodeStatus {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let value = match self {
-            NodeStatus::Queued => "queued",
-            NodeStatus::Running => "running",
-            NodeStatus::Completed => "completed",
-            NodeStatus::Failed => "failed",
-        };
-        write!(f, "{value}")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::messages::ast as ir;
-    use serde_json::Value;
-
-    fn action_plus_two_expr() -> ir::Expr {
-        ir::Expr {
-            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
-                left: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
-                        name: "action_result".to_string(),
-                    })),
-                    span: None,
-                })),
-                op: ir::BinaryOperator::BinaryOpAdd as i32,
-                right: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                        value: Some(ir::literal::Value::IntValue(2)),
-                    })),
-                    span: None,
-                })),
-            }))),
-            span: None,
-        }
-    }
-
-    #[test]
-    fn test_runner_state_unrolls_loop_assignments() {
-        let mut state = RunnerState::new(None, None, None, true);
-
-        state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                Some(0),
-            )
-            .expect("queue action");
-        let first_list = ir::Expr {
-            kind: Some(ir::expr::Kind::List(ir::ListExpr {
-                elements: vec![action_plus_two_expr()],
-            })),
-            span: None,
-        };
-        state
-            .record_assignment(vec!["results".to_string()], &first_list, None, None)
-            .expect("record assignment");
-
-        state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                Some(1),
-            )
-            .expect("queue action");
-        let second_list = ir::Expr {
-            kind: Some(ir::expr::Kind::List(ir::ListExpr {
-                elements: vec![action_plus_two_expr()],
-            })),
-            span: None,
-        };
-        let concat_expr = ir::Expr {
-            kind: Some(ir::expr::Kind::BinaryOp(Box::new(ir::BinaryOp {
-                left: Some(Box::new(ir::Expr {
-                    kind: Some(ir::expr::Kind::Variable(ir::Variable {
-                        name: "results".to_string(),
-                    })),
-                    span: None,
-                })),
-                op: ir::BinaryOperator::BinaryOpAdd as i32,
-                right: Some(Box::new(second_list)),
-            }))),
-            span: None,
-        };
-        state
-            .record_assignment(vec!["results".to_string()], &concat_expr, None, None)
-            .expect("record assignment");
-
-        let mut results: Option<ValueExpr> = None;
-        for node_id in state.timeline.iter().rev() {
-            let node = state.nodes.get(node_id).unwrap();
-            if let Some(value) = node.assignments.get("results") {
-                results = Some(value.clone());
-                break;
-            }
-        }
-
-        let results = results.expect("results assignment");
-        let binary = match results {
-            ValueExpr::BinaryOp(value) => value,
-            other => panic!("expected BinaryOpValue, got {other:?}"),
-        };
-
-        match binary.left.as_ref() {
-            ValueExpr::Variable(value) => assert_eq!(value.name, "results"),
-            other => panic!("expected VariableValue, got {other:?}"),
-        }
-
-        let right_list = match binary.right.as_ref() {
-            ValueExpr::List(value) => value,
-            other => panic!("expected ListValue, got {other:?}"),
-        };
-        assert_eq!(right_list.elements.len(), 1);
-
-        let item_bin = match &right_list.elements[0] {
-            ValueExpr::BinaryOp(value) => value,
-            other => panic!("expected BinaryOpValue, got {other:?}"),
-        };
-
-        match item_bin.left.as_ref() {
-            ValueExpr::Variable(value) => assert_eq!(value.name, "action_result"),
-            other => panic!("expected VariableValue, got {other:?}"),
-        }
-
-        match item_bin.right.as_ref() {
-            ValueExpr::Literal(value) => assert_eq!(value.value, Value::Number(2.into())),
-            other => panic!("expected LiteralValue, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_runner_state_single_target_assignments_stay_symbolic() {
-        let mut state = RunnerState::new(None, None, None, true);
-
-        let initial = ValueExpr::Dict(DictValue {
-            entries: vec![DictEntryValue {
-                key: ValueExpr::Literal(LiteralValue {
-                    value: Value::String("result".to_string()),
-                }),
-                value: ValueExpr::Literal(LiteralValue {
-                    value: Value::Number(1.into()),
-                }),
-            }],
-        });
-        state
-            .record_assignment_value(vec!["result".to_string()], initial, None, None)
-            .expect("record initial assignment");
-
-        let wrapped = ValueExpr::Dict(DictValue {
-            entries: vec![DictEntryValue {
-                key: ValueExpr::Literal(LiteralValue {
-                    value: Value::String("result".to_string()),
-                }),
-                value: ValueExpr::Variable(VariableValue {
-                    name: "result".to_string(),
-                }),
-            }],
-        });
-        state
-            .record_assignment_value(vec!["result".to_string()], wrapped, None, None)
-            .expect("record wrapped assignment");
-
-        let mut latest: Option<ValueExpr> = None;
-        for node_id in state.timeline.iter().rev() {
-            let node = state.nodes.get(node_id).expect("node");
-            if let Some(value) = node.assignments.get("result") {
-                latest = Some(value.clone());
-                break;
-            }
-        }
-        let latest = latest.expect("latest assignment");
-        let dict = match latest {
-            ValueExpr::Dict(value) => value,
-            other => panic!("expected DictValue, got {other:?}"),
-        };
-        assert_eq!(dict.entries.len(), 1);
-        match &dict.entries[0].value {
-            ValueExpr::Variable(value) => assert_eq!(value.name, "result"),
-            other => panic!("expected VariableValue, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_materialize_value_keeps_self_referential_variable_symbolic() {
-        let mut state = RunnerState::new(None, None, None, true);
-        state
-            .record_assignment_value(
-                vec!["count".to_string()],
-                ValueExpr::Literal(LiteralValue {
-                    value: Value::Number(0.into()),
-                }),
-                None,
-                None,
-            )
-            .expect("record initial count");
-        state
-            .record_assignment_value(
-                vec!["count".to_string()],
-                ValueExpr::BinaryOp(BinaryOpValue {
-                    left: Box::new(ValueExpr::Variable(VariableValue {
-                        name: "count".to_string(),
-                    })),
-                    op: ir::BinaryOperator::BinaryOpAdd as i32,
-                    right: Box::new(ValueExpr::Literal(LiteralValue {
-                        value: Value::Number(1.into()),
-                    })),
-                }),
-                None,
-                None,
-            )
-            .expect("record count update");
-
-        let materialized = state.materialize_value(ValueExpr::Variable(VariableValue {
-            name: "count".to_string(),
-        }));
-        match materialized {
-            ValueExpr::Variable(value) => assert_eq!(value.name, "count"),
-            other => panic!("expected VariableValue, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_runner_state_graph_dirty_for_action_updates() {
-        let mut state = RunnerState::new(None, None, None, true);
-        assert!(!state.consume_graph_dirty_for_durable_execution());
-
-        let action_result = state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                None,
-            )
-            .expect("queue action");
-        assert!(state.consume_graph_dirty_for_durable_execution());
-        assert!(!state.consume_graph_dirty_for_durable_execution());
-
-        state
-            .increment_action_attempt(action_result.node_id)
-            .expect("increment action attempt");
-        assert!(state.consume_graph_dirty_for_durable_execution());
-    }
-
-    #[test]
-    fn test_runner_state_graph_dirty_not_set_for_assignments() {
-        let mut state = RunnerState::new(None, None, None, true);
-        let value_expr = ValueExpr::Literal(LiteralValue {
-            value: Value::Number(1.into()),
-        });
-        state
-            .record_assignment_value(vec!["value".to_string()], value_expr, None, None)
-            .expect("record assignment");
-
-        assert!(!state.consume_graph_dirty_for_durable_execution());
-    }
-
-    #[test]
-    fn test_runner_state_records_action_start_stop_timestamps() {
-        let mut state = RunnerState::new(None, None, None, true);
-        let action_result = state
-            .queue_action(
-                "action",
-                Some(vec!["action_result".to_string()]),
-                None,
-                None,
-                None,
-            )
-            .expect("queue action");
-
-        // Clear queue-time dirty bit so lifecycle transitions are isolated.
-        assert!(state.consume_graph_dirty_for_durable_execution());
-
-        state
-            .mark_running(action_result.node_id)
-            .expect("mark running");
-        let started_at = state
-            .nodes
-            .get(&action_result.node_id)
-            .and_then(|node| node.started_at);
-        assert!(
-            started_at.is_some(),
-            "running action should record started_at"
-        );
-        assert!(
-            state
-                .nodes
-                .get(&action_result.node_id)
-                .and_then(|node| node.completed_at)
-                .is_none(),
-            "running action should clear completed_at"
-        );
-        assert!(
-            !state.ready_queue.contains(&action_result.node_id),
-            "running action should be removed from ready_queue"
-        );
-        assert!(state.consume_graph_dirty_for_durable_execution());
-
-        state
-            .mark_completed(action_result.node_id)
-            .expect("mark completed");
-        let completed_at = state
-            .nodes
-            .get(&action_result.node_id)
-            .and_then(|node| node.completed_at);
-        assert!(
-            completed_at.is_some(),
-            "completed action should record completed_at"
-        );
-        assert!(
-            completed_at >= started_at,
-            "completed_at should be at or after started_at"
-        );
-        assert!(state.consume_graph_dirty_for_durable_execution());
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs b/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs
deleted file mode 100644
index df89b71f..00000000
--- a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-//! Synthetic exception helpers produced by Rust runtime coordination paths.
-
-use serde_json::Value;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) enum SyntheticExceptionType {
-    ExecutorResume,
-    ActionTimeout,
-}
-
-impl SyntheticExceptionType {
-    pub(crate) fn as_type_str(self) -> &'static str {
-        match self {
-            Self::ExecutorResume => "ExecutorResume",
-            Self::ActionTimeout => "ActionTimeout",
-        }
-    }
-
-    fn from_type_str(value: &str) -> Option<Self> {
-        match value {
-            "ExecutorResume" => Some(Self::ExecutorResume),
-            "ActionTimeout" => Some(Self::ActionTimeout),
-            _ => None,
-        }
-    }
-
-    pub(crate) fn from_value(value: &Value) -> Option<Self> {
-        let Value::Object(map) = value else {
-            return None;
-        };
-        map.get("type")
-            .and_then(Value::as_str)
-            .and_then(Self::from_type_str)
-    }
-}
-
-pub(crate) fn build_synthetic_exception_value(
-    exception_type: SyntheticExceptionType,
-    message: impl Into<String>,
-    fields: Vec<(String, Value)>,
-) -> Value {
-    let mut map = serde_json::Map::new();
-    map.insert(
-        "type".to_string(),
-        Value::String(exception_type.as_type_str().to_string()),
-    );
-    map.insert("message".to_string(), Value::String(message.into()));
-    for (key, value) in fields {
-        map.insert(key, value);
-    }
-    Value::Object(map)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn synthetic_exception_from_value_happy_path() {
-        let value = serde_json::json!({"type": "ActionTimeout", "message": "x"});
-        assert_eq!(
-            SyntheticExceptionType::from_value(&value),
-            Some(SyntheticExceptionType::ActionTimeout)
-        );
-    }
-
-    #[test]
-    fn build_synthetic_exception_value_happy_path() {
-        let value = build_synthetic_exception_value(
-            SyntheticExceptionType::ExecutorResume,
-            "resume",
-            vec![(
-                "attempt".to_string(),
-                Value::Number(serde_json::Number::from(2)),
-            )],
-        );
-        let Value::Object(map) = value else {
-            panic!("expected object value");
-        };
-        assert_eq!(
-            map.get("type"),
-            Some(&Value::String("ExecutorResume".to_string()))
-        );
-        assert_eq!(
-            map.get("message"),
-            Some(&Value::String("resume".to_string()))
-        );
-        assert_eq!(map.get("attempt"), Some(&Value::Number(2.into())));
-    }
-}
diff --git a/crates/waymark/src/waymark_core/runner/value_visitor.rs b/crates/waymark/src/waymark_core/runner/value_visitor.rs
deleted file mode 100644
index 82f02db1..00000000
--- a/crates/waymark/src/waymark_core/runner/value_visitor.rs
+++ /dev/null
@@ -1,533 +0,0 @@
-//! Shared ValueExpr visitors for traversal, resolution, and evaluation.
-
-use std::collections::{HashMap, HashSet};
-
-use serde::{Deserialize, Serialize};
-use uuid::Uuid;
-
-use super::state::{
-    ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue,
-    FunctionCallValue, IndexValue, ListValue, LiteralValue, SpreadValue, UnaryOpValue,
-    VariableValue,
-};
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-#[serde(tag = "type", content = "data")]
-pub enum ValueExpr {
-    Literal(LiteralValue),
-    Variable(VariableValue),
-    ActionResult(ActionResultValue),
-    BinaryOp(BinaryOpValue),
-    UnaryOp(UnaryOpValue),
-    List(ListValue),
-    Dict(DictValue),
-    Index(IndexValue),
-    Dot(DotValue),
-    FunctionCall(FunctionCallValue),
-    Spread(SpreadValue),
-}
-
-/// Resolve variables inside a ValueExpr tree without executing actions.
-///
-/// Example IR:
-/// - y = x + 1 (where x -> LiteralValue(2))
-///   Produces BinaryOpValue(LiteralValue(2), +, LiteralValue(1)).
-pub struct ValueExprResolver<'a> {
-    resolve_variable: &'a dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
-    seen: &'a mut HashSet<String>,
-}
-
-impl<'a> ValueExprResolver<'a> {
-    pub fn new(
-        resolve_variable: &'a dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
-        seen: &'a mut HashSet<String>,
-    ) -> Self {
-        Self {
-            resolve_variable,
-            seen,
-        }
-    }
-
-    pub fn visit(&mut self, expr: &ValueExpr) -> ValueExpr {
-        match expr {
-            ValueExpr::Literal(value) => ValueExpr::Literal(value.clone()),
-            ValueExpr::Variable(value) => (self.resolve_variable)(&value.name, self.seen),
-            ValueExpr::ActionResult(value) => ValueExpr::ActionResult(value.clone()),
-            ValueExpr::BinaryOp(value) => ValueExpr::BinaryOp(BinaryOpValue {
-                left: Box::new(self.visit(&value.left)),
-                op: value.op,
-                right: Box::new(self.visit(&value.right)),
-            }),
-            ValueExpr::UnaryOp(value) => ValueExpr::UnaryOp(UnaryOpValue {
-                op: value.op,
-                operand: Box::new(self.visit(&value.operand)),
-            }),
-            ValueExpr::List(value) => ValueExpr::List(ListValue {
-                elements: value.elements.iter().map(|item| self.visit(item)).collect(),
-            }),
-            ValueExpr::Dict(value) => ValueExpr::Dict(DictValue {
-                entries: value
-                    .entries
-                    .iter()
-                    .map(|entry| DictEntryValue {
-                        key: self.visit(&entry.key),
-                        value: self.visit(&entry.value),
-                    })
-                    .collect(),
-            }),
-            ValueExpr::Index(value) => ValueExpr::Index(IndexValue {
-                object: Box::new(self.visit(&value.object)),
-                index: Box::new(self.visit(&value.index)),
-            }),
-            ValueExpr::Dot(value) => ValueExpr::Dot(DotValue {
-                object: Box::new(self.visit(&value.object)),
-                attribute: value.attribute.clone(),
-            }),
-            ValueExpr::FunctionCall(value) => ValueExpr::FunctionCall(FunctionCallValue {
-                name: value.name.clone(),
-                args: value.args.iter().map(|arg| self.visit(arg)).collect(),
-                kwargs: value
-                    .kwargs
-                    .iter()
-                    .map(|(name, arg)| (name.clone(), self.visit(arg)))
-                    .collect(),
-                global_function: value.global_function,
-            }),
-            ValueExpr::Spread(value) => {
-                let kwargs = value
-                    .action
-                    .kwargs
-                    .iter()
-                    .map(|(name, arg)| (name.clone(), self.visit(arg)))
-                    .collect::<HashMap<_, _>>();
-                let action = ActionCallSpec {
-                    action_name: value.action.action_name.clone(),
-                    module_name: value.action.module_name.clone(),
-                    kwargs,
-                };
-                ValueExpr::Spread(SpreadValue {
-                    collection: Box::new(self.visit(&value.collection)),
-                    loop_var: value.loop_var.clone(),
-                    action,
-                })
-            }
-        }
-    }
-}
-
-/// Collect execution node ids that supply data to a ValueExpr tree.
-///
-/// Example IR:
-/// - total = a + @sum(values)
-///   Returns the node ids that last defined `a` and the action node for sum().
-pub struct ValueExprSourceCollector<'a> {
-    resolve_variable: &'a dyn Fn(&str) -> Option<Uuid>,
-}
-
-impl<'a> ValueExprSourceCollector<'a> {
-    pub fn new(resolve_variable: &'a dyn Fn(&str) -> Option<Uuid>) -> Self {
-        Self { resolve_variable }
-    }
-
-    pub fn visit(&self, expr: &ValueExpr) -> HashSet<Uuid> {
-        match expr {
-            ValueExpr::Literal(_) => HashSet::new(),
-            ValueExpr::Variable(value) => {
-                (self.resolve_variable)(&value.name).into_iter().collect()
-            }
-            ValueExpr::ActionResult(value) => [value.node_id].into_iter().collect(),
-            ValueExpr::BinaryOp(value) => {
-                let mut sources = self.visit(&value.left);
-                sources.extend(self.visit(&value.right));
-                sources
-            }
-            ValueExpr::UnaryOp(value) => self.visit(&value.operand),
-            ValueExpr::List(value) => {
-                let mut sources = HashSet::new();
-                for item in &value.elements {
-                    sources.extend(self.visit(item));
-                }
-                sources
-            }
-            ValueExpr::Dict(value) => {
-                let mut sources = HashSet::new();
-                for entry in &value.entries {
-                    sources.extend(self.visit(&entry.key));
-                    sources.extend(self.visit(&entry.value));
-                }
-                sources
-            }
-            ValueExpr::Index(value) => {
-                let mut sources = self.visit(&value.object);
-                sources.extend(self.visit(&value.index));
-                sources
-            }
-            ValueExpr::Dot(value) => self.visit(&value.object),
-            ValueExpr::FunctionCall(value) => {
-                let mut sources = HashSet::new();
-                for arg in &value.args {
-                    sources.extend(self.visit(arg));
-                }
-                for arg in value.kwargs.values() {
-                    sources.extend(self.visit(arg));
-                }
-                sources
-            }
-            ValueExpr::Spread(value) => {
-                let mut sources = self.visit(&value.collection);
-                for arg in value.action.kwargs.values() {
-                    sources.extend(self.visit(arg));
-                }
-                sources
-            }
-        }
-    }
-}
-
-/// Evaluate ValueExpr nodes into concrete Python values.
-///
-/// Example:
-/// - BinaryOpValue(VariableValue("a"), +, LiteralValue(1)) becomes the
-///   current value of a plus 1.
-pub struct ValueExprEvaluator<'a, E> {
-    resolve_variable: &'a dyn Fn(&str) -> Result<serde_json::Value, E>,
-    resolve_action_result: &'a dyn Fn(&ActionResultValue) -> Result<serde_json::Value, E>,
-    resolve_function_call: &'a ResolveFunctionCall<'a, E>,
-    apply_binary:
-        &'a dyn Fn(i32, serde_json::Value, serde_json::Value) -> Result<serde_json::Value, E>,
-    apply_unary: &'a dyn Fn(i32, serde_json::Value) -> Result<serde_json::Value, E>,
-    error_factory: &'a dyn Fn(&str) -> E,
-}
-
-type ResolveFunctionCall<'a, E> = dyn Fn(
-        &FunctionCallValue,
-        Vec<serde_json::Value>,
-        HashMap<String, serde_json::Value>,
-    ) -> Result<serde_json::Value, E>
-    + 'a;
-
-impl<'a, E> ValueExprEvaluator<'a, E> {
-    pub fn new(
-        resolve_variable: &'a dyn Fn(&str) -> Result<serde_json::Value, E>,
-        resolve_action_result: &'a dyn Fn(&ActionResultValue) -> Result<serde_json::Value, E>,
-        resolve_function_call: &'a ResolveFunctionCall<'a, E>,
-        apply_binary: &'a dyn Fn(
-            i32,
-            serde_json::Value,
-            serde_json::Value,
-        ) -> Result<serde_json::Value, E>,
-        apply_unary: &'a dyn Fn(i32, serde_json::Value) -> Result<serde_json::Value, E>,
-        error_factory: &'a dyn Fn(&str) -> E,
-    ) -> Self {
-        Self {
-            resolve_variable,
-            resolve_action_result,
-            resolve_function_call,
-            apply_binary,
-            apply_unary,
-            error_factory,
-        }
-    }
-
-    pub fn visit(&self, expr: &ValueExpr) -> Result<serde_json::Value, E> {
-        match expr {
-            ValueExpr::Literal(value) => Ok(value.value.clone()),
-            ValueExpr::Variable(value) => (self.resolve_variable)(&value.name),
-            ValueExpr::ActionResult(value) => (self.resolve_action_result)(value),
-            ValueExpr::BinaryOp(value) => {
-                let left = self.visit(&value.left)?;
-                let right = self.visit(&value.right)?;
-                (self.apply_binary)(value.op, left, right)
-            }
-            ValueExpr::UnaryOp(value) => {
-                let operand = self.visit(&value.operand)?;
-                (self.apply_unary)(value.op, operand)
-            }
-            ValueExpr::List(value) => {
-                let mut items = Vec::with_capacity(value.elements.len());
-                for item in &value.elements {
-                    items.push(self.visit(item)?);
-                }
-                Ok(serde_json::Value::Array(items))
-            }
-            ValueExpr::Dict(value) => {
-                let mut map = serde_json::Map::with_capacity(value.entries.len());
-                for entry in &value.entries {
-                    let key_value = self.visit(&entry.key)?;
-                    let key = key_value
-                        .as_str()
-                        .map(|value| value.to_string())
-                        .unwrap_or_else(|| key_value.to_string());
-                    let entry_value = self.visit(&entry.value)?;
-                    map.insert(key, entry_value);
-                }
-                Ok(serde_json::Value::Object(map))
-            }
-            ValueExpr::Index(value) => {
-                let object = self.visit(&value.object)?;
-                let index = self.visit(&value.index)?;
-                match (object, index) {
-                    (serde_json::Value::Array(items), serde_json::Value::Number(idx)) => {
-                        let idx = idx.as_i64().unwrap_or(-1);
-                        if idx < 0 || idx as usize >= items.len() {
-                            return Err((self.error_factory)("index out of range"));
-                        }
-                        Ok(items[idx as usize].clone())
-                    }
-                    (serde_json::Value::Object(map), serde_json::Value::String(key)) => map
-                        .get(&key)
-                        .cloned()
-                        .or_else(|| lookup_exception_value(&map, &key))
-                        .ok_or_else(|| (self.error_factory)("dict has no key")),
-                    _ => Err((self.error_factory)("unsupported index operation")),
-                }
-            }
-            ValueExpr::Dot(value) => {
-                let object = self.visit(&value.object)?;
-                if let serde_json::Value::Object(map) = object {
-                    return map
-                        .get(&value.attribute)
-                        .cloned()
-                        .or_else(|| lookup_exception_value(&map, &value.attribute))
-                        .ok_or_else(|| (self.error_factory)("dict has no key"));
-                }
-                Err((self.error_factory)("attribute not found"))
-            }
-            ValueExpr::FunctionCall(value) => {
-                let mut args = Vec::with_capacity(value.args.len());
-                for arg in &value.args {
-                    args.push(self.visit(arg)?);
-                }
-                let mut kwargs = HashMap::new();
-                for (name, arg) in &value.kwargs {
-                    kwargs.insert(name.clone(), self.visit(arg)?);
-                }
-                (self.resolve_function_call)(value, args, kwargs)
-            }
-            ValueExpr::Spread(_) => Err((self.error_factory)(
-                "cannot replay unresolved spread expression",
-            )),
-        }
-    }
-}
-
-fn lookup_exception_value(
-    map: &serde_json::Map<String, serde_json::Value>,
-    key: &str,
-) -> Option<serde_json::Value> {
-    if !(map.contains_key("type") && map.contains_key("message")) {
-        return None;
-    }
-    map.get("values")
-        .and_then(|value| value.as_object())
-        .and_then(|values| values.get(key))
-        .cloned()
-}
-
-/// Recursively resolve variable references throughout a value tree.
-///
-/// Use this as the core materialization step before assignment storage.
-///
-/// Example IR:
-/// - z = (x + y) * 2
-///   The tree walk replaces VariableValue("x")/("y") with their latest
-///   symbolic definitions before storing z.
-pub fn resolve_value_tree(
-    value: &ValueExpr,
-    resolve_variable: &dyn Fn(&str, &mut HashSet<String>) -> ValueExpr,
-) -> ValueExpr {
-    let mut seen = HashSet::new();
-    let mut resolver = ValueExprResolver::new(resolve_variable, &mut seen);
-    resolver.visit(value)
-}
-
-/// Find execution node ids that supply data to the given value.
-///
-/// Example IR:
-/// - total = a + @sum(values)
-///   Returns the latest assignment node for a and the action node for sum().
-pub fn collect_value_sources(
-    value: &ValueExpr,
-    resolve_variable: &dyn Fn(&str) -> Option<Uuid>,
-) -> HashSet<Uuid> {
-    let collector = ValueExprSourceCollector::new(resolve_variable);
-    collector.visit(value)
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::{HashMap, HashSet};
-
-    use serde_json::Value;
-    use uuid::Uuid;
-
-    use super::*;
-    use crate::messages::ast as ir;
-
-    fn literal_int(value: i64) -> ValueExpr {
-        ValueExpr::Literal(LiteralValue {
-            value: Value::Number(value.into()),
-        })
-    }
-
-    #[test]
-    fn test_value_expr_resolver_visit_happy_path() {
-        let mut seen = HashSet::new();
-        let resolve = |name: &str, _: &mut HashSet<String>| {
-            if name == "x" {
-                literal_int(3)
-            } else {
-                literal_int(0)
-            }
-        };
-        let mut resolver = ValueExprResolver::new(&resolve, &mut seen);
-        let expr = ValueExpr::BinaryOp(BinaryOpValue {
-            left: Box::new(ValueExpr::Variable(VariableValue {
-                name: "x".to_string(),
-            })),
-            op: ir::BinaryOperator::BinaryOpAdd as i32,
-            right: Box::new(literal_int(1)),
-        });
-
-        let resolved = resolver.visit(&expr);
-        match resolved {
-            ValueExpr::BinaryOp(value) => {
-                assert!(matches!(*value.left, ValueExpr::Literal(_)));
-                assert!(matches!(*value.right, ValueExpr::Literal(_)));
-            }
-            other => panic!("expected binary value, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_value_expr_source_collector_visit_happy_path() {
-        let variable_source = Uuid::new_v4();
-        let action_source = Uuid::new_v4();
-        let resolve = |name: &str| {
-            if name == "x" {
-                Some(variable_source)
-            } else {
-                None
-            }
-        };
-        let collector = ValueExprSourceCollector::new(&resolve);
-        let expr = ValueExpr::BinaryOp(BinaryOpValue {
-            left: Box::new(ValueExpr::Variable(VariableValue {
-                name: "x".to_string(),
-            })),
-            op: ir::BinaryOperator::BinaryOpAdd as i32,
-            right: Box::new(ValueExpr::ActionResult(ActionResultValue {
-                node_id: action_source,
-                action_name: "fetch".to_string(),
-                iteration_index: None,
-                result_index: None,
-            })),
-        });
-
-        let sources = collector.visit(&expr);
-        assert!(sources.contains(&variable_source));
-        assert!(sources.contains(&action_source));
-    }
-
-    #[test]
-    fn test_value_expr_evaluator_visit_happy_path() {
-        let resolve_variable = |name: &str| -> Result<Value, String> {
-            if name == "x" {
-                Ok(Value::Number(2.into()))
-            } else {
-                Err(format!("unknown variable: {name}"))
-            }
-        };
-        let resolve_action_result =
-            |_value: &ActionResultValue| -> Result<Value, String> { Ok(Value::Number(0.into())) };
-        let resolve_function_call =
-            |_call: &FunctionCallValue,
-             args: Vec<Value>,
-             _kwargs: HashMap<String, Value>|
-             -> Result<Value, String> { Ok(Value::Number((args.len() as i64).into())) };
-        let apply_binary = |_op: i32, left: Value, right: Value| -> Result<Value, String> {
-            match (left.as_i64(), right.as_i64()) {
-                (Some(left), Some(right)) => Ok(Value::Number((left + right).into())),
-                _ => Err("bad operands".to_string()),
-            }
-        };
-        let apply_unary = |_op: i32, value: Value| -> Result<Value, String> {
-            Ok(Value::Bool(!value.as_bool().unwrap_or(false)))
-        };
-        let error_factory = |message: &str| message.to_string();
-
-        let evaluator = ValueExprEvaluator::new(
-            &resolve_variable,
-            &resolve_action_result,
-            &resolve_function_call,
-            &apply_binary,
-            &apply_unary,
-            &error_factory,
-        );
-        let expr = ValueExpr::BinaryOp(BinaryOpValue {
-            left: Box::new(ValueExpr::Variable(VariableValue {
-                name: "x".to_string(),
-            })),
-            op: ir::BinaryOperator::BinaryOpAdd as i32,
-            right: Box::new(literal_int(5)),
-        });
-
-        let value = evaluator.visit(&expr).expect("evaluate expression");
-        assert_eq!(value, Value::Number(7.into()));
-    }
-
-    #[test]
-    fn test_resolve_value_tree_happy_path() {
-        let expr = ValueExpr::List(ListValue {
-            elements: vec![ValueExpr::Variable(VariableValue {
-                name: "user_id".to_string(),
-            })],
-        });
-        let resolve = |name: &str, _seen: &mut HashSet<String>| {
-            if name == "user_id" {
-                ValueExpr::Literal(LiteralValue {
-                    value: Value::String("abc".to_string()),
-                })
-            } else {
-                ValueExpr::Literal(LiteralValue { value: Value::Null })
-            }
-        };
-
-        let resolved = resolve_value_tree(&expr, &resolve);
-        match resolved {
-            ValueExpr::List(list) => {
-                assert_eq!(list.elements.len(), 1);
-                assert!(matches!(list.elements[0], ValueExpr::Literal(_)));
-            }
-            other => panic!("expected list value, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_collect_value_sources_happy_path() {
-        let source_a = Uuid::new_v4();
-        let source_b = Uuid::new_v4();
-        let expr = ValueExpr::FunctionCall(FunctionCallValue {
-            name: "sum".to_string(),
-            args: vec![ValueExpr::Variable(VariableValue {
-                name: "a".to_string(),
-            })],
-            kwargs: HashMap::from([(
-                "other".to_string(),
-                ValueExpr::ActionResult(ActionResultValue {
-                    node_id: source_b,
-                    action_name: "compute".to_string(),
-                    iteration_index: None,
-                    result_index: None,
-                }),
-            )]),
-            global_function: None,
-        });
-        let resolve = |name: &str| if name == "a" { Some(source_a) } else { None };
-
-        let sources = collect_value_sources(&expr, &resolve);
-        assert_eq!(sources.len(), 2);
-        assert!(sources.contains(&source_a));
-        assert!(sources.contains(&source_b));
-    }
-}
diff --git a/crates/waymark/src/webapp/server.rs b/crates/waymark/src/webapp/server.rs
index 43818ca3..9afda350 100644
--- a/crates/waymark/src/webapp/server.rs
+++ b/crates/waymark/src/webapp/server.rs
@@ -16,12 +16,15 @@ use tera::{Context as TeraContext, Tera};
 use tokio::net::TcpListener;
 use tracing::{error, info};
 use uuid::Uuid;
+use waymark_webapp_backend::WebappBackend;
+use waymark_webapp_core::WorkerStatus;
 
-use super::types::{
+use waymark_webapp_core::{
     ActionLogsResponse, FilterValuesResponse, HealthResponse, InstanceExportInfo, TimelineEntry,
-    WebappConfig, WorkflowInstanceExport, WorkflowRunDataResponse,
+    WorkflowInstanceExport, WorkflowRunDataResponse,
 };
-use crate::backends::WebappBackend;
+
+use crate::WebappConfig;
 
 // Embed templates at compile time
 const TEMPLATE_BASE: &str = include_str!("../../templates/base.html");
@@ -367,7 +370,7 @@ async fn get_action_logs(
     let logs: Vec<_> = timeline
         .into_iter()
         .filter(|e| e.action_id == action_id_str)
-        .map(|e| super::types::ActionLogEntry {
+        .map(|e| waymark_webapp_core::ActionLogEntry {
             action_id: e.action_id,
             action_name: e.action_name,
             module_name: e.module_name,
@@ -736,7 +739,7 @@ struct InvocationRow {
 
 fn render_invocations_page(
     templates: &Tera,
-    instances: &[super::types::InstanceSummary],
+    instances: &[waymark_webapp_core::InstanceSummary],
     current_page: i64,
     total_pages: i64,
     search_query: Option<String>,
@@ -812,8 +815,8 @@ struct GraphNode {
 
 fn render_instance_detail_page(
     templates: &Tera,
-    instance: &super::types::InstanceDetail,
-    graph: Option<super::types::ExecutionGraphView>,
+    instance: &waymark_webapp_core::InstanceDetail,
+    graph: Option<waymark_webapp_core::ExecutionGraphView>,
 ) -> String {
     let graph_data = graph
         .as_ref()
@@ -843,8 +846,8 @@ fn render_instance_detail_page(
     render_template(templates, "workflow_run.html", &context)
 }
 
-fn build_graph_data(graph: &super::types::ExecutionGraphView) -> GraphData {
-    let action_nodes: Vec<&super::types::ExecutionNodeView> = graph
+fn build_graph_data(graph: &waymark_webapp_core::ExecutionGraphView) -> GraphData {
+    let action_nodes: Vec<&waymark_webapp_core::ExecutionNodeView> = graph
         .nodes
         .iter()
         .filter(|node| is_action_node(&node.node_type))
@@ -1055,7 +1058,7 @@ struct ScheduleRow {
 
 fn render_schedules_page(
     templates: &Tera,
-    schedules: &[super::types::ScheduleSummary],
+    schedules: &[waymark_webapp_core::ScheduleSummary],
     current_page: i64,
     total_pages: i64,
     total_count: i64,
@@ -1136,8 +1139,8 @@ struct ScheduleInvocationRow {
 
 fn render_schedule_detail_page(
     templates: &Tera,
-    schedule: &super::types::ScheduleDetail,
-    invocations: &[super::types::ScheduleInvocationSummary],
+    schedule: &waymark_webapp_core::ScheduleDetail,
+    invocations: &[waymark_webapp_core::ScheduleInvocationSummary],
     current_page: i64,
     total_pages: i64,
 ) -> String {
@@ -1234,11 +1237,7 @@ struct WorkerInstanceRowView {
     updated_at: String,
 }
 
-fn render_workers_page(
-    templates: &Tera,
-    statuses: &[super::WorkerStatus],
-    window_minutes: i64,
-) -> String {
+fn render_workers_page(templates: &Tera, statuses: &[WorkerStatus], window_minutes: i64) -> String {
     use crate::pool_status::PoolTimeSeries;
 
     // Build action rows
@@ -1373,13 +1372,15 @@ mod tests {
     use sqlx::postgres::PgPoolOptions;
     use tower::util::ServiceExt;
     use uuid::Uuid;
+    use waymark_backend_memory::MemoryBackend;
+    use waymark_backend_postgres::PostgresBackend;
+    use waymark_webapp_backend::WebappBackend;
+    use waymark_worker_status_backend::{WorkerStatusBackend as _, WorkerStatusUpdate};
 
     use super::{WebappState, build_graph_data, build_router, init_templates};
-    use crate::backends::{
-        MemoryBackend, PostgresBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate,
-    };
-    use crate::test_support::postgres_setup;
-    use crate::webapp::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView};
+
+    use waymark_test_support::postgres_setup;
+    use waymark_webapp_core::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView};
 
     #[test]
     fn build_graph_data_projects_internal_nodes_to_action_dependencies() {
diff --git a/crates/waymark/src/webapp/types.rs b/crates/waymark/src/webapp/types.rs
index 7805c428..0b2ec6e8 100644
--- a/crates/waymark/src/webapp/types.rs
+++ b/crates/waymark/src/webapp/types.rs
@@ -1,8 +1,4 @@
-//! Shared types for the webapp.
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use uuid::Uuid;
+//! Shared types for the webapp server.
 
 /// Configuration for the webapp server.
 #[derive(Debug, Clone)]
@@ -55,245 +51,3 @@ impl WebappConfig {
         format!("{}:{}", self.host, self.port)
     }
 }
-
-/// Instance status.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum InstanceStatus {
-    Queued,
-    Running,
-    Completed,
-    Failed,
-}
-
-impl std::fmt::Display for InstanceStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Queued => write!(f, "queued"),
-            Self::Running => write!(f, "running"),
-            Self::Completed => write!(f, "completed"),
-            Self::Failed => write!(f, "failed"),
-        }
-    }
-}
-
-/// Summary of a workflow instance for listing.
-#[derive(Debug, Clone, Serialize)]
-pub struct InstanceSummary {
-    pub id: Uuid,
-    pub entry_node: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-    pub workflow_name: Option<String>,
-    pub input_preview: String,
-}
-
-/// Full details of a workflow instance.
-#[derive(Debug, Clone, Serialize)]
-pub struct InstanceDetail {
-    pub id: Uuid,
-    pub entry_node: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-    pub workflow_name: Option<String>,
-    pub input_payload: String,
-    pub result_payload: String,
-    pub error_payload: Option<String>,
-}
-
-/// Node in the execution graph for display.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionNodeView {
-    pub id: String,
-    pub node_type: String,
-    pub label: String,
-    pub status: String,
-    pub action_name: Option<String>,
-    pub module_name: Option<String>,
-}
-
-/// Edge in the execution graph for display.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionEdgeView {
-    pub source: String,
-    pub target: String,
-    pub edge_type: String,
-}
-
-/// Execution graph data for rendering.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionGraphView {
-    pub nodes: Vec<ExecutionNodeView>,
-    pub edges: Vec<ExecutionEdgeView>,
-}
-
-/// Timeline entry for an action execution.
-#[derive(Debug, Clone, Serialize)]
-pub struct TimelineEntry {
-    pub action_id: String,
-    pub action_name: String,
-    pub module_name: Option<String>,
-    pub status: String,
-    pub attempt_number: i32,
-    pub dispatched_at: Option<String>,
-    pub completed_at: Option<String>,
-    pub duration_ms: Option<i64>,
-    pub request_preview: String,
-    pub response_preview: String,
-    pub error: Option<String>,
-}
-
-/// Action log entry with full details.
-#[derive(Debug, Clone, Serialize)]
-pub struct ActionLogEntry {
-    pub action_id: String,
-    pub action_name: String,
-    pub module_name: Option<String>,
-    pub status: String,
-    pub attempt_number: i32,
-    pub dispatched_at: Option<String>,
-    pub completed_at: Option<String>,
-    pub duration_ms: Option<i64>,
-    pub request: String,
-    pub response: String,
-    pub error: Option<String>,
-}
-
-/// Response for the workflow run data API.
-#[derive(Debug, Serialize)]
-pub struct WorkflowRunDataResponse {
-    pub nodes: Vec<ExecutionNodeView>,
-    pub timeline: Vec<TimelineEntry>,
-    pub page: i64,
-    pub per_page: i64,
-    pub total: i64,
-    pub has_more: bool,
-}
-
-/// Response for action logs API.
-#[derive(Debug, Serialize)]
-pub struct ActionLogsResponse {
-    pub logs: Vec<ActionLogEntry>,
-}
-
-/// Filter values response.
-#[derive(Debug, Serialize)]
-pub struct FilterValuesResponse {
-    pub values: Vec<String>,
-}
-
-/// Health check response.
-#[derive(Debug, Serialize)]
-pub struct HealthResponse {
-    pub status: &'static str,
-    pub service: &'static str,
-}
-
-/// Export format for a workflow instance.
-#[derive(Debug, Serialize)]
-pub struct WorkflowInstanceExport {
-    pub export_version: &'static str,
-    pub exported_at: String,
-    pub instance: InstanceExportInfo,
-    pub nodes: Vec<ExecutionNodeView>,
-    pub timeline: Vec<TimelineEntry>,
-}
-
-/// Full worker status for webapp display.
-#[derive(Debug, Clone)]
-pub struct WorkerStatus {
-    pub pool_id: Uuid,
-    pub active_workers: i32,
-    pub throughput_per_min: f64,
-    pub actions_per_sec: f64,
-    pub total_completed: i64,
-    pub last_action_at: Option<DateTime<Utc>>,
-    pub updated_at: DateTime<Utc>,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub dispatch_queue_size: Option<i64>,
-    pub total_in_flight: Option<i64>,
-    pub median_instance_duration_secs: Option<f64>,
-    pub active_instance_count: i32,
-    pub total_instances_completed: i64,
-    pub instances_per_sec: f64,
-    pub instances_per_min: f64,
-    pub time_series: Option<Vec<u8>>,
-}
-
-/// Worker action stats row for display.
-#[derive(Debug, Clone)]
-pub struct WorkerActionRow {
-    pub pool_id: String,
-    pub active_workers: i64,
-    pub actions_per_sec: String,
-    pub throughput_per_min: i64,
-    pub total_completed: i64,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub last_action_at: Option<String>,
-    pub updated_at: String,
-}
-
-/// Aggregate worker stats for overview cards.
-#[derive(Debug, Clone)]
-pub struct WorkerAggregateStats {
-    pub active_worker_count: i64,
-    pub actions_per_sec: String,
-    pub total_in_flight: i64,
-    pub total_queue_depth: i64,
-}
-
-/// Instance info for export.
-#[derive(Debug, Serialize)]
-pub struct InstanceExportInfo {
-    pub id: String,
-    pub status: String,
-    pub created_at: String,
-    pub input_payload: String,
-    pub result_payload: String,
-}
-
-/// Schedule summary for listing.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleSummary {
-    pub id: String,
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: String,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub status: String,
-    pub next_run_at: Option<String>,
-    pub last_run_at: Option<String>,
-    pub created_at: String,
-}
-
-/// Full schedule details.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleDetail {
-    pub id: String,
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: String,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub jitter_seconds: i64,
-    pub status: String,
-    pub next_run_at: Option<String>,
-    pub last_run_at: Option<String>,
-    pub last_instance_id: Option<String>,
-    pub created_at: String,
-    pub updated_at: String,
-    pub priority: i32,
-    pub allow_duplicate: bool,
-    pub input_payload: Option<String>,
-}
-
-/// Invocation summary row for schedule detail pages.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleInvocationSummary {
-    pub id: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-}
diff --git a/crates/waymark/src/workers/status.rs b/crates/waymark/src/workers/status.rs
index c9428602..03fec671 100644
--- a/crates/waymark/src/workers/status.rs
+++ b/crates/waymark/src/workers/status.rs
@@ -9,8 +9,8 @@ use std::time::Duration;
 use chrono::{DateTime, Utc};
 use tracing::{info, warn};
 use uuid::Uuid;
+use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
 
-use crate::backends::{WorkerStatusBackend, WorkerStatusUpdate};
 use crate::pool_status::{PoolTimeSeries, TimeSeriesEntry};
 
 #[derive(Debug, Clone)]