diff --git a/.gitignore b/.gitignore
index 1cf1d602..5a5e2a84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,9 @@ share/python-wheels/
 *.egg
 MANIFEST
 
+# Exception for crates.
+!/crates/lib
+
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
diff --git a/Cargo.lock b/Cargo.lock
index bf98727e..08e29a10 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1517,9 +1517,9 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
 [[package]]
 name = "metrics"
-version = "0.24.2"
+version = "0.24.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25dea7ac8057892855ec285c440160265225438c3c45072613c25a4b26e98ef5"
+checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
 dependencies = [
  "ahash",
  "portable-atomic",
@@ -1847,9 +1847,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "portable-atomic"
-version = "1.11.1"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
 [[package]]
 name = "potential_utf"
@@ -2529,7 +2529,7 @@ dependencies = [
  "serde_json",
  "sha2",
  "smallvec",
- "thiserror 2.0.17",
+ "thiserror",
  "tokio",
  "tokio-stream",
  "tracing",
@@ -2614,7 +2614,7 @@ dependencies = [
  "smallvec",
  "sqlx-core",
  "stringprep",
- "thiserror 2.0.17",
+ "thiserror",
  "tracing",
  "uuid",
  "whoami",
@@ -2653,7 +2653,7 @@ dependencies = [
  "smallvec",
  "sqlx-core",
  "stringprep",
- "thiserror 2.0.17",
+ "thiserror",
  "tracing",
  "uuid",
  "whoami",
@@ -2679,7 +2679,7 @@ dependencies = [
  "serde",
  "serde_urlencoded",
  "sqlx-core",
- "thiserror 2.0.17",
+ "thiserror",
  "tracing",
  "url",
  "uuid",
@@ -2783,33 +2783,13 @@ dependencies = [
  "unicode-segmentation",
 ]
 
-[[package]]
-name = "thiserror"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
-dependencies = [
- "thiserror-impl 1.0.69",
-]
-
 [[package]]
 name = "thiserror"
 version = "2.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
 dependencies = [
- "thiserror-impl 2.0.17",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "thiserror-impl",
 ]
 
 [[package]]
@@ -3375,9 +3355,10 @@ dependencies = [
  "sha2",
  "sqlx",
  "tera",
- "thiserror 1.0.69",
+ "thiserror",
  "tokio",
  "tokio-stream",
+ "tokio-util",
  "tonic 0.11.0",
  "tonic-health",
  "tower 0.5.2",
@@ -3385,9 +3366,123 @@ dependencies = [
  "tracing-chrome",
  "tracing-subscriber",
  "uuid",
+ "waymark-backend-fault-injection",
+ "waymark-backend-memory",
+ "waymark-backend-postgres",
+ "waymark-backend-postgres-migrations",
+ "waymark-backends-core",
+ "waymark-core-backend",
  "waymark-dag",
- "waymark-observability-macros",
+ "waymark-garbage-collector-backend",
+ "waymark-integration-support",
+ "waymark-ir-parser",
+ "waymark-observability",
  "waymark-proto",
+ "waymark-runner",
+ "waymark-runner-state",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-test-support",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backend-fault-injection"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backend-memory",
+ "waymark-backends-core",
+ "waymark-core-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backend-memory"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "rmp-serde",
+ "serde_json",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-core-backend",
+ "waymark-garbage-collector-backend",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backend-postgres"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "prost 0.12.6",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "serial_test",
+ "sqlx",
+ "tokio",
+ "tracing",
+ "uuid",
+ "waymark-backend-postgres-migrations",
+ "waymark-backends-core",
+ "waymark-core-backend",
+ "waymark-dag",
+ "waymark-garbage-collector-backend",
+ "waymark-ir-parser",
+ "waymark-observability",
+ "waymark-proto",
+ "waymark-runner",
+ "waymark-runner-state",
+ "waymark-scheduler-backend",
+ "waymark-scheduler-core",
+ "waymark-test-support",
+ "waymark-webapp-backend",
+ "waymark-webapp-core",
+ "waymark-worker-status-backend",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-backend-postgres-migrations"
+version = "0.1.0"
+dependencies = [
+ "sqlx",
+]
+
+[[package]]
+name = "waymark-backends-core"
+version = "0.1.0"
+dependencies = [
+ "serde_json",
+ "sqlx",
+ "thiserror",
+]
+
+[[package]]
+name = "waymark-core-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "serde",
+ "serde_json",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-dag",
+ "waymark-runner-state",
 ]
 
 [[package]]
@@ -3396,9 +3491,9 @@ version = "0.1.0"
 dependencies = [
  "rustc-hash",
  "serde",
- "thiserror 1.0.69",
+ "thiserror",
  "uuid",
- "waymark",
+ "waymark-ir-parser",
  "waymark-proto",
 ]
 
@@ -3415,7 +3510,47 @@ dependencies = [
  "tokio",
  "uuid",
  "waymark",
+ "waymark-backend-memory",
+ "waymark-core-backend",
  "waymark-dag",
+ "waymark-ir-parser",
+ "waymark-runner-state",
+ "waymark-workflow-registry-backend",
+]
+
+[[package]]
+name = "waymark-garbage-collector-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "waymark-backends-core",
+]
+
+[[package]]
+name = "waymark-integration-support"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "sqlx",
+ "tokio",
+ "waymark-backend-postgres-migrations",
+]
+
+[[package]]
+name = "waymark-ir-parser"
+version = "0.1.0"
+dependencies = [
+ "regex",
+ "waymark-proto",
+]
+
+[[package]]
+name = "waymark-observability"
+version = "0.1.0"
+dependencies = [
+ "tracing",
+ "waymark-observability-macros",
 ]
 
 [[package]]
@@ -3438,6 +3573,105 @@ dependencies = [
  "tonic-build",
 ]
 
+[[package]]
+name = "waymark-runner"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "rustc-hash",
+ "serde_json",
+ "thiserror",
+ "tracing",
+ "uuid",
+ "waymark-backend-memory",
+ "waymark-core-backend",
+ "waymark-dag",
+ "waymark-ir-parser",
+ "waymark-observability",
+ "waymark-proto",
+ "waymark-runner-state",
+]
+
+[[package]]
+name = "waymark-runner-state"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "uuid",
+ "waymark-dag",
+ "waymark-proto",
+]
+
+[[package]]
+name = "waymark-scheduler-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-scheduler-core",
+]
+
+[[package]]
+name = "waymark-scheduler-core"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "cron",
+ "rand 0.8.5",
+ "serde",
+ "uuid",
+]
+
+[[package]]
+name = "waymark-test-support"
+version = "0.1.0"
+dependencies = [
+ "sqlx",
+ "waymark-integration-support",
+]
+
+[[package]]
+name = "waymark-webapp-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+ "waymark-webapp-core",
+]
+
+[[package]]
+name = "waymark-webapp-core"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "serde",
+ "uuid",
+]
+
+[[package]]
+name = "waymark-worker-status-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "chrono",
+ "uuid",
+ "waymark-backends-core",
+]
+
+[[package]]
+name = "waymark-workflow-registry-backend"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "uuid",
+ "waymark-backends-core",
+]
+
 [[package]]
 name = "webpki-roots"
 version = "0.26.11"
diff --git a/Cargo.toml b/Cargo.toml
index c75a7299..853e1067 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,23 +1,52 @@
 [workspace]
 resolver = "3"
-members = ["crates/*"]
+members = ["crates/lib/*", "crates/bin/*", "crates/waymark"]
 
 [workspace.dependencies]
 waymark = { path = "crates/waymark" }
-waymark-dag = { path = "crates/dag" }
-waymark-proto = { path = "crates/proto" }
+waymark-backend-fault-injection = { path = "crates/lib/backend-fault-injection" }
+waymark-backend-memory = { path = "crates/lib/backend-memory" }
+waymark-backend-postgres = { path = "crates/lib/backend-postgres" }
+waymark-backend-postgres-migrations = { path = "crates/lib/backend-postgres-migrations" }
+waymark-backends-core = { path = "crates/lib/backends-core" }
+waymark-core-backend = { path = "crates/lib/core-backend" }
+waymark-dag = { path = "crates/lib/dag" }
+waymark-garbage-collector-backend = { path = "crates/lib/garbage-collector-backend" }
+waymark-integration-support = { path = "crates/lib/integration-support" }
+waymark-ir-parser = { path = "crates/lib/ir-parser" }
+waymark-observability = { path = "crates/lib/observability" }
+waymark-observability-macros = { path = "crates/lib/observability-macros" }
+waymark-proto = { path = "crates/lib/proto" }
+waymark-runner = { path = "crates/lib/runner" }
+waymark-runner-state = { path = "crates/lib/runner-state" }
+waymark-scheduler-backend = { path = "crates/lib/scheduler-backend" }
+waymark-scheduler-core = { path = "crates/lib/scheduler-core" }
+waymark-test-support = { path = "crates/lib/test-support" }
+waymark-webapp-backend = { path = "crates/lib/webapp-backend" }
+waymark-webapp-core = { path = "crates/lib/webapp-core" }
+waymark-worker-status-backend = { path = "crates/lib/worker-status-backend" }
+waymark-workflow-registry-backend = { path = "crates/lib/workflow-registry-backend" }
 
 anyhow = "1"
+async-trait = "0.1"
+chrono = { version = "0.4", default-features = false }
 clap = "4.5"
+cron = "0.12"
 proptest = "1.9"
 prost = "0.12"
 prost-types = "0.12"
+rand = "0.8"
+regex = "1"
+rmp-serde = "1"
 rustc-hash = "2"
 serde = "1"
 serde_json = "1"
+serial_test = "2"
 sha2 = "0.10"
-thiserror = "1"
+sqlx = { version = "0.8", default-features = false }
+thiserror = "2"
 tokio = "1"
 tonic = "0.11"
 tonic-build = "0.11"
+tracing = "0.1"
 uuid = "1"
diff --git a/crates/fuzzer/Cargo.toml b/crates/bin/fuzzer/Cargo.toml
similarity index 65%
rename from crates/fuzzer/Cargo.toml
rename to crates/bin/fuzzer/Cargo.toml
index e31f9970..8c7c039d 100644
--- a/crates/fuzzer/Cargo.toml
+++ b/crates/bin/fuzzer/Cargo.toml
@@ -14,3 +14,8 @@ uuid = { workspace = true, features = ["serde", "v4"] }
 tokio = { workspace = true }
 waymark = { workspace = true }
 waymark-dag = { workspace = true }
+waymark-ir-parser = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
diff --git a/crates/fuzzer/src/bin/waymark-fuzz.rs b/crates/bin/fuzzer/src/bin/waymark-fuzz.rs
similarity index 100%
rename from crates/fuzzer/src/bin/waymark-fuzz.rs
rename to crates/bin/fuzzer/src/bin/waymark-fuzz.rs
diff --git a/crates/fuzzer/src/generator.rs b/crates/bin/fuzzer/src/generator.rs
similarity index 100%
rename from crates/fuzzer/src/generator.rs
rename to crates/bin/fuzzer/src/generator.rs
diff --git a/crates/fuzzer/src/harness.rs b/crates/bin/fuzzer/src/harness.rs
similarity index 96%
rename from crates/fuzzer/src/harness.rs
rename to crates/bin/fuzzer/src/harness.rs
index 242d2924..2bec4043 100644
--- a/crates/fuzzer/src/harness.rs
+++ b/crates/bin/fuzzer/src/harness.rs
@@ -9,17 +9,17 @@ use prost::Message;
 use serde_json::Value;
 use sha2::{Digest, Sha256};
 use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
 use super::generator::GeneratedCase;
-use waymark::backends::{
-    MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
 use waymark::messages::ast as ir;
-use waymark::waymark_core::ir_parser::parse_program;
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError};
 use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
 
 pub async fn run_case(case_index: usize, case: &GeneratedCase) -> Result<()> {
     let program = parse_program(case.source.trim()).map_err(|err| {
diff --git a/crates/fuzzer/src/lib.rs b/crates/bin/fuzzer/src/lib.rs
similarity index 100%
rename from crates/fuzzer/src/lib.rs
rename to crates/bin/fuzzer/src/lib.rs
diff --git a/crates/lib/backend-fault-injection/Cargo.toml b/crates/lib/backend-fault-injection/Cargo.toml
new file mode 100644
index 00000000..1b592ba1
--- /dev/null
+++ b/crates/lib/backend-fault-injection/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "waymark-backend-fault-injection"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
diff --git a/crates/lib/backend-fault-injection/src/lib.rs b/crates/lib/backend-fault-injection/src/lib.rs
new file mode 100644
index 00000000..4a43d305
--- /dev/null
+++ b/crates/lib/backend-fault-injection/src/lib.rs
@@ -0,0 +1,128 @@
+use std::sync::{
+    Arc,
+    atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
+};
+
+use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::{
+    CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstanceBatch,
+};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
+
+#[derive(Clone)]
+pub struct FaultInjectingBackend {
+    inner: MemoryBackend,
+    fail_get_queued_instances_with_depth_limit: Arc<AtomicBool>,
+    get_queued_instances_calls: Arc<AtomicUsize>,
+}
+
+impl FaultInjectingBackend {
+    pub fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self {
+        Self {
+            inner,
+            fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)),
+            get_queued_instances_calls: Arc::new(AtomicUsize::new(0)),
+        }
+    }
+
+    pub fn get_queued_instances_calls(&self) -> usize {
+        self.get_queued_instances_calls.load(AtomicOrdering::SeqCst)
+    }
+
+    pub fn queue_len(&self) -> usize {
+        self.inner
+            .instance_queue()
+            .as_ref()
+            .map(|queue| queue.lock().expect("queue poisoned").len())
+            .unwrap_or(0)
+    }
+
+    pub fn instances_done_len(&self) -> usize {
+        self.inner.instances_done().len()
+    }
+}
+
+#[async_trait::async_trait]
+impl CoreBackend for FaultInjectingBackend {
+    fn clone_box(&self) -> Box<dyn CoreBackend> {
+        Box::new(self.clone())
+    }
+
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        self.inner.save_graphs(claim, graphs).await
+    }
+
+    async fn save_actions_done(
+        &self,
+        actions: &[waymark_core_backend::ActionDone],
+    ) -> BackendResult<()> {
+        self.inner.save_actions_done(actions).await
+    }
+
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        self.inner.save_instances_done(instances).await
+    }
+
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        self.get_queued_instances_calls
+            .fetch_add(1, AtomicOrdering::SeqCst);
+        if self
+            .fail_get_queued_instances_with_depth_limit
+            .load(AtomicOrdering::SeqCst)
+        {
+            return Err(BackendError::Message("depth limit exceeded".to_string()));
+        }
+        self.inner.get_queued_instances(size, claim).await
+    }
+
+    async fn queue_instances(
+        &self,
+        instances: &[waymark_core_backend::QueuedInstance],
+    ) -> BackendResult<()> {
+        self.inner.queue_instances(instances).await
+    }
+
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        self.inner.refresh_instance_locks(claim, instance_ids).await
+    }
+
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()> {
+        self.inner
+            .release_instance_locks(lock_uuid, instance_ids)
+            .await
+    }
+}
+
+#[async_trait::async_trait]
+impl WorkflowRegistryBackend for FaultInjectingBackend {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid> {
+        self.inner.upsert_workflow_version(registration).await
+    }
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
+        self.inner.get_workflow_versions(ids).await
+    }
+}
diff --git a/crates/lib/backend-memory/Cargo.toml b/crates/lib/backend-memory/Cargo.toml
new file mode 100644
index 00000000..203e0f35
--- /dev/null
+++ b/crates/lib/backend-memory/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "waymark-backend-memory"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+rmp-serde = { workspace = true }
+serde_json = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true, optional = true }
+waymark-scheduler-backend = { workspace = true, optional = true }
+waymark-scheduler-core = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+waymark-webapp-backend = { workspace = true, optional = true }
+waymark-webapp-core = { workspace = true, optional = true }
+
+[features]
+default = [
+  "core-backend",
+  "worker-status-backend",
+  "workflow-registry-backend",
+  "scheduler-backend",
+  "garbage-collector-backend",
+  "webapp-backend",
+]
+
+core-backend = []
+garbage-collector-backend = ["dep:waymark-garbage-collector-backend"]
+scheduler-backend = ["dep:waymark-scheduler-backend"]
+worker-status-backend = []
+workflow-registry-backend = []
+webapp-backend = ["dep:waymark-webapp-backend", "dep:waymark-webapp-core"]
diff --git a/crates/lib/backend-memory/src/core_backend.rs b/crates/lib/backend-memory/src/core_backend.rs
new file mode 100644
index 00000000..49a40330
--- /dev/null
+++ b/crates/lib/backend-memory/src/core_backend.rs
@@ -0,0 +1,159 @@
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::{
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
+};
+
+#[async_trait::async_trait]
+impl waymark_core_backend::CoreBackend for crate::MemoryBackend {
+    fn clone_box(&self) -> Box<dyn waymark_core_backend::CoreBackend> {
+        Box::new(self.clone())
+    }
+
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        let mut stored = self.graph_updates.lock().expect("graph updates poisoned");
+        stored.extend(graphs.iter().cloned());
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        let mut locks = Vec::with_capacity(graphs.len());
+        for graph in graphs {
+            if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id)
+                && *lock_uuid == claim.lock_uuid
+                && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at)
+            {
+                *lock_expires_at = Some(claim.lock_expires_at);
+            }
+            let (lock_uuid, lock_expires_at) = guard
+                .get(&graph.instance_id)
+                .cloned()
+                .unwrap_or((None, None));
+            locks.push(InstanceLockStatus {
+                instance_id: graph.instance_id,
+                lock_uuid,
+                lock_expires_at,
+            });
+        }
+        Ok(locks)
+    }
+
+    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
+        let mut stored = self.actions_done.lock().expect("actions done poisoned");
+        stored.extend(actions.iter().cloned());
+        Ok(())
+    }
+
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
+        let mut stored = self.instances_done.lock().expect("instances done poisoned");
+        stored.extend(instances.iter().cloned());
+        if !instances.is_empty() {
+            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
+            for instance in instances {
+                locks.remove(&instance.executor_id);
+            }
+        }
+        Ok(())
+    }
+
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch> {
+        if size == 0 {
+            return Ok(QueuedInstanceBatch {
+                instances: Vec::new(),
+            });
+        }
+        let queue = match &self.instance_queue {
+            Some(queue) => queue,
+            None => {
+                return Ok(QueuedInstanceBatch {
+                    instances: Vec::new(),
+                });
+            }
+        };
+        let mut guard = queue.lock().expect("instance queue poisoned");
+        let now = Utc::now();
+        let mut instances = Vec::new();
+        while instances.len() < size {
+            let Some(instance) = guard.front() else {
+                break;
+            };
+            if let Some(scheduled_at) = instance.scheduled_at
+                && scheduled_at > now
+            {
+                break;
+            }
+            let instance = guard.pop_front().expect("instance queue empty");
+            instances.push(instance);
+        }
+        if !instances.is_empty() {
+            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
+            for instance in &instances {
+                locks.insert(
+                    instance.instance_id,
+                    (Some(claim.lock_uuid), Some(claim.lock_expires_at)),
+                );
+            }
+        }
+        Ok(QueuedInstanceBatch { instances })
+    }
+
+    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
+        if instances.is_empty() {
+            return Ok(());
+        }
+        let queue = self.instance_queue.as_ref().ok_or_else(|| {
+            BackendError::Message("memory backend missing instance queue".to_string())
+        })?;
+        let mut guard = queue.lock().expect("instance queue poisoned");
+        for instance in instances {
+            guard.push_back(instance.clone());
+        }
+        Ok(())
+    }
+
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>> {
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        let mut locks = Vec::new();
+        for instance_id in instance_ids {
+            let entry = guard
+                .entry(*instance_id)
+                .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at)));
+            if entry.0 == Some(claim.lock_uuid) {
+                entry.1 = Some(claim.lock_expires_at);
+            }
+            locks.push(InstanceLockStatus {
+                instance_id: *instance_id,
+                lock_uuid: entry.0,
+                lock_expires_at: entry.1,
+            });
+        }
+        Ok(locks)
+    }
+
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()> {
+        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
+        for instance_id in instance_ids {
+            if let Some((current_lock, _)) = guard.get(instance_id)
+                && *current_lock == Some(lock_uuid)
+            {
+                guard.remove(instance_id);
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/crates/lib/backend-memory/src/garbage_collector_backend.rs b/crates/lib/backend-memory/src/garbage_collector_backend.rs
new file mode 100644
index 00000000..6a4cda66
--- /dev/null
+++ b/crates/lib/backend-memory/src/garbage_collector_backend.rs
@@ -0,0 +1,14 @@
+use chrono::{DateTime, Utc};
+use waymark_backends_core::BackendResult;
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
+
+#[async_trait::async_trait]
+impl GarbageCollectorBackend for crate::MemoryBackend {
+    async fn collect_done_instances(
+        &self,
+        _older_than: DateTime<Utc>,
+        _limit: usize,
+    ) -> BackendResult<GarbageCollectionResult> {
+        Ok(GarbageCollectionResult::default())
+    }
+}
diff --git a/crates/lib/backend-memory/src/lib.rs b/crates/lib/backend-memory/src/lib.rs
new file mode 100644
index 00000000..e2ef56e4
--- /dev/null
+++ b/crates/lib/backend-memory/src/lib.rs
@@ -0,0 +1,111 @@
+//! In-memory backend that prints persistence operations.
+
+#[cfg(feature = "core-backend")]
+mod core_backend;
+
+#[cfg(feature = "garbage-collector-backend")]
+mod garbage_collector_backend;
+
+#[cfg(feature = "scheduler-backend")]
+mod scheduler_backend;
+
+#[cfg(feature = "webapp-backend")]
+mod webapp_backend;
+
+#[cfg(feature = "worker-status-backend")]
+mod worker_status_backend;
+
+#[cfg(feature = "workflow-registry-backend")]
+mod workflow_registry_backend;
+
+use std::collections::{HashMap, VecDeque};
+use std::sync::{Arc, Mutex};
+
+use chrono::{DateTime, Utc};
+use uuid::Uuid;
+
+use waymark_core_backend::{ActionDone, GraphUpdate, InstanceDone, QueuedInstance};
+use waymark_scheduler_core::{ScheduleId, WorkflowSchedule};
+use waymark_worker_status_backend::WorkerStatusUpdate;
+use waymark_workflow_registry_backend::WorkflowRegistration;
+
+type WorkflowVersionKey = (String, String);
+type WorkflowVersionValue = (Uuid, WorkflowRegistration);
+type WorkflowVersionStore = HashMap<WorkflowVersionKey, WorkflowVersionValue>;
+type InstanceLockStore = HashMap<Uuid, (Option<Uuid>, Option<DateTime<Utc>>)>;
+
+/// Backend that stores updates in memory for tests or local runs.
+#[derive(Clone)]
+pub struct MemoryBackend {
+    instance_queue: Option<Arc<Mutex<VecDeque<QueuedInstance>>>>,
+    graph_updates: Arc<Mutex<Vec<GraphUpdate>>>,
+    actions_done: Arc<Mutex<Vec<ActionDone>>>,
+    instances_done: Arc<Mutex<Vec<InstanceDone>>>,
+    worker_status_updates: Arc<Mutex<Vec<WorkerStatusUpdate>>>,
+    #[cfg_attr(not(feature = "workflow-registry-backend"), allow(dead_code))]
+    workflow_versions: Arc<Mutex<WorkflowVersionStore>>,
+    #[cfg_attr(not(feature = "scheduler-backend"), allow(dead_code))]
+    schedules: Arc<Mutex<HashMap<ScheduleId, WorkflowSchedule>>>,
+    #[cfg_attr(not(feature = "core-backend"), allow(dead_code))]
+    instance_locks: Arc<Mutex<InstanceLockStore>>,
+}
+
+impl Default for MemoryBackend {
+    fn default() -> Self {
+        Self {
+            instance_queue: None,
+            graph_updates: Arc::new(Mutex::new(Vec::new())),
+            actions_done: Arc::new(Mutex::new(Vec::new())),
+            instances_done: Arc::new(Mutex::new(Vec::new())),
+            worker_status_updates: Arc::new(Mutex::new(Vec::new())),
+            workflow_versions: Arc::new(Mutex::new(HashMap::new())),
+            schedules: Arc::new(Mutex::new(HashMap::new())),
+            instance_locks: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+}
+
+impl MemoryBackend {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_queue(queue: Arc<Mutex<VecDeque<QueuedInstance>>>) -> Self {
+        Self {
+            instance_queue: Some(queue),
+            ..Self::default()
+        }
+    }
+
+    pub fn instance_queue(&self) -> Option<Arc<Mutex<VecDeque<QueuedInstance>>>> {
+        self.instance_queue.clone()
+    }
+
+    pub fn graph_updates(&self) -> Vec<GraphUpdate> {
+        self.graph_updates
+            .lock()
+            .expect("graph updates poisoned")
+            .clone()
+    }
+
+    pub fn actions_done(&self) -> Vec<ActionDone> {
+        self.actions_done
+            .lock()
+            .expect("actions done poisoned")
+            .clone()
+    }
+
+    pub fn instances_done(&self) -> Vec<InstanceDone> {
+        self.instances_done
+            .lock()
+            .expect("instances done poisoned")
+            .clone()
+    }
+
+    pub fn worker_status_updates(&self) -> Vec<WorkerStatusUpdate> {
+        self.worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned")
+            .clone()
+    }
+}
diff --git a/crates/lib/backend-memory/src/scheduler_backend.rs b/crates/lib/backend-memory/src/scheduler_backend.rs
new file mode 100644
index 00000000..3764f489
--- /dev/null
+++ b/crates/lib/backend-memory/src/scheduler_backend.rs
@@ -0,0 +1,209 @@
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_scheduler_backend::SchedulerBackend;
+use waymark_scheduler_core::{
+    CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule, compute_next_run,
+};
+
+#[async_trait::async_trait]
+impl SchedulerBackend for crate::MemoryBackend {
+    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let existing_schedule = guard.iter().find_map(|(id, schedule)| {
+            if schedule.workflow_name == params.workflow_name
+                && schedule.schedule_name == params.schedule_name
+            {
+                Some((*id, schedule.clone()))
+            } else {
+                None
+            }
+        });
+        let schedule_id = existing_schedule
+            .as_ref()
+            .map(|(id, _)| *id)
+            .unwrap_or_else(ScheduleId::new);
+        let now = Utc::now();
+        let next_run_at = match existing_schedule
+            .as_ref()
+            .and_then(|(_, schedule)| schedule.next_run_at)
+        {
+            Some(next_run_at) => Some(next_run_at),
+            None => Some(
+                compute_next_run(
+                    params.schedule_type,
+                    params.cron_expression.as_deref(),
+                    params.interval_seconds,
+                    params.jitter_seconds,
+                    None,
+                )
+                .map_err(BackendError::Message)?,
+            ),
+        };
+        let schedule = WorkflowSchedule {
+            id: schedule_id.0,
+            workflow_name: params.workflow_name.clone(),
+            schedule_name: params.schedule_name.clone(),
+            schedule_type: params.schedule_type.as_str().to_string(),
+            cron_expression: params.cron_expression.clone(),
+            interval_seconds: params.interval_seconds,
+            jitter_seconds: params.jitter_seconds,
+            input_payload: params.input_payload.clone(),
+            status: "active".to_string(),
+            next_run_at,
+            last_run_at: existing_schedule
+                .as_ref()
+                .and_then(|(_, schedule)| schedule.last_run_at),
+            last_instance_id: existing_schedule
+                .as_ref()
+                .and_then(|(_, schedule)| schedule.last_instance_id),
+            created_at: existing_schedule
+                .as_ref()
+                .map(|(_, schedule)| schedule.created_at)
+                .unwrap_or(now),
+            updated_at: now,
+            priority: params.priority,
+            allow_duplicate: params.allow_duplicate,
+        };
+        guard.insert(schedule_id, schedule);
+        Ok(schedule_id)
+    }
+
+    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        guard
+            .get(&id)
+            .cloned()
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}")))
+    }
+
+    async fn get_schedule_by_name(
+        &self,
+        workflow_name: &str,
+        schedule_name: &str,
+    ) -> BackendResult<Option<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .find(|schedule| {
+                schedule.workflow_name == workflow_name
+                    && schedule.schedule_name == schedule_name
+                    && schedule.status != "deleted"
+            })
+            .cloned())
+    }
+
+    async fn list_schedules(
+        &self,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .cloned()
+            .collect();
+        schedules.sort_by(|a, b| {
+            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
+        });
+        let start = offset.max(0) as usize;
+        let end = start.saturating_add(limit.max(0) as usize);
+        Ok(schedules
+            .into_iter()
+            .skip(start)
+            .take(end - start)
+            .collect())
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .count() as i64)
+    }
+
+    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        if let Some(schedule) = guard.get_mut(&id) {
+            schedule.status = status.to_string();
+            schedule.updated_at = Utc::now();
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
+        SchedulerBackend::update_schedule_status(self, id, "deleted").await
+    }
+
+    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let now = Utc::now();
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| {
+                schedule.status == "active"
+                    && schedule
+                        .next_run_at
+                        .map(|next| next <= now)
+                        .unwrap_or(false)
+            })
+            .cloned()
+            .collect();
+        schedules.sort_by_key(|schedule| schedule.next_run_at);
+        Ok(schedules.into_iter().take(limit as usize).collect())
+    }
+
+    async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult<bool> {
+        Ok(false)
+    }
+
+    async fn mark_schedule_executed(
+        &self,
+        schedule_id: ScheduleId,
+        instance_id: Uuid,
+    ) -> BackendResult<()> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .get_mut(&schedule_id)
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+        schedule.last_run_at = Some(Utc::now());
+        schedule.last_instance_id = Some(instance_id);
+        schedule.next_run_at = Some(next_run_at);
+        schedule.updated_at = Utc::now();
+        Ok(())
+    }
+
+    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .get_mut(&schedule_id)
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
+            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
+        let next_run_at = compute_next_run(
+            schedule_type,
+            schedule.cron_expression.as_deref(),
+            schedule.interval_seconds,
+            schedule.jitter_seconds,
+            Some(Utc::now()),
+        )
+        .map_err(BackendError::Message)?;
+        schedule.next_run_at = Some(next_run_at);
+        schedule.updated_at = Utc::now();
+        Ok(())
+    }
+}
diff --git a/crates/lib/backend-memory/src/webapp_backend.rs b/crates/lib/backend-memory/src/webapp_backend.rs
new file mode 100644
index 00000000..5bcca7c1
--- /dev/null
+++ b/crates/lib/backend-memory/src/webapp_backend.rs
@@ -0,0 +1,295 @@
+use std::collections::HashMap;
+
+use chrono::Utc;
+use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_webapp_backend::WebappBackend;
+use waymark_webapp_core::{
+    ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail,
+    ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow,
+    WorkerAggregateStats, WorkerStatus,
+};
+use waymark_worker_status_backend::WorkerStatusUpdate;
+
+#[async_trait::async_trait]
+impl WebappBackend for crate::MemoryBackend {
+    async fn count_instances(&self, _search: Option<&str>) -> BackendResult<i64> {
+        Ok(0)
+    }
+
+    async fn list_instances(
+        &self,
+        _search: Option<&str>,
+        _limit: i64,
+        _offset: i64,
+    ) -> BackendResult<Vec<InstanceSummary>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
+        Err(BackendError::Message(format!(
+            "instance not found: {instance_id}"
+        )))
+    }
+
+    async fn get_execution_graph(
+        &self,
+        _instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        Ok(None)
+    }
+
+    async fn get_workflow_graph(
+        &self,
+        _instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>> {
+        Ok(None)
+    }
+
+    async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
+        Ok(Vec::new())
+    }
+
+    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec![
+            InstanceStatus::Queued.to_string(),
+            InstanceStatus::Running.to_string(),
+            InstanceStatus::Completed.to_string(),
+            InstanceStatus::Failed.to_string(),
+        ])
+    }
+
+    async fn count_schedules(&self) -> BackendResult<i64> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        Ok(guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .count() as i64)
+    }
+
+    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let mut schedules: Vec<_> = guard
+            .values()
+            .filter(|schedule| schedule.status != "deleted")
+            .cloned()
+            .collect();
+        schedules.sort_by(|a, b| {
+            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
+        });
+
+        let start = offset.max(0) as usize;
+        let page_limit = limit.max(0) as usize;
+        Ok(schedules
+            .into_iter()
+            .skip(start)
+            .take(page_limit)
+            .map(|schedule| ScheduleSummary {
+                id: schedule.id.to_string(),
+                workflow_name: schedule.workflow_name,
+                schedule_name: schedule.schedule_name,
+                schedule_type: schedule.schedule_type,
+                cron_expression: schedule.cron_expression,
+                interval_seconds: schedule.interval_seconds,
+                status: schedule.status,
+                next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
+                last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
+                created_at: schedule.created_at.to_rfc3339(),
+            })
+            .collect())
+    }
+
+    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
+        let guard = self.schedules.lock().expect("schedules poisoned");
+        let schedule = guard
+            .values()
+            .find(|schedule| schedule.id == schedule_id)
+            .cloned()
+            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
+
+        let input_payload = schedule.input_payload.as_ref().and_then(|bytes| {
+            rmp_serde::from_slice::<serde_json::Value>(bytes)
+                .ok()
+                .and_then(|value| serde_json::to_string_pretty(&value).ok())
+        });
+
+        Ok(ScheduleDetail {
+            id: schedule.id.to_string(),
+            workflow_name: schedule.workflow_name,
+            schedule_name: schedule.schedule_name,
+            schedule_type: schedule.schedule_type,
+            cron_expression: schedule.cron_expression,
+            interval_seconds: schedule.interval_seconds,
+            jitter_seconds: schedule.jitter_seconds,
+            status: schedule.status,
+            next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
+            last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
+            last_instance_id: schedule.last_instance_id.map(|id| id.to_string()),
+            created_at: schedule.created_at.to_rfc3339(),
+            updated_at: schedule.updated_at.to_rfc3339(),
+            priority: schedule.priority,
+            allow_duplicate: schedule.allow_duplicate,
+            input_payload,
+        })
+    }
+
+    async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult<i64> {
+        Ok(0)
+    }
+
+    async fn list_schedule_invocations(
+        &self,
+        _schedule_id: Uuid,
+        _limit: i64,
+        _offset: i64,
+    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
+        Ok(Vec::new())
+    }
+
+    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
+        let mut guard = self.schedules.lock().expect("schedules poisoned");
+        let Some(schedule) = guard
+            .values_mut()
+            .find(|schedule| schedule.id == schedule_id)
+        else {
+            return Ok(false);
+        };
+        schedule.status = status.to_string();
+        schedule.updated_at = Utc::now();
+        Ok(true)
+    }
+
+    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["active".to_string(), "paused".to_string()])
+    }
+
+    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
+        Ok(vec!["cron".to_string(), "interval".to_string()])
+    }
+
+    async fn get_worker_action_stats(
+        &self,
+        _window_minutes: i64,
+    ) -> BackendResult<Vec<WorkerActionRow>> {
+        let statuses = latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        );
+
+        Ok(statuses
+            .into_iter()
+            .map(|status| WorkerActionRow {
+                pool_id: status.pool_id.to_string(),
+                active_workers: status.active_workers as i64,
+                actions_per_sec: format!("{:.1}", status.actions_per_sec),
+                throughput_per_min: status.throughput_per_min as i64,
+                total_completed: status.total_completed,
+                median_dequeue_ms: status.median_dequeue_ms,
+                median_handling_ms: status.median_handling_ms,
+                last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()),
+                updated_at: status.updated_at.to_rfc3339(),
+            })
+            .collect())
+    }
+
+    async fn get_worker_aggregate_stats(
+        &self,
+        _window_minutes: i64,
+    ) -> BackendResult<WorkerAggregateStats> {
+        let statuses = latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        );
+
+        let active_worker_count = statuses
+            .iter()
+            .map(|status| status.active_workers as i64)
+            .sum();
+        let total_in_flight = statuses
+            .iter()
+            .filter_map(|status| status.total_in_flight)
+            .sum();
+        let total_queue_depth = statuses
+            .iter()
+            .filter_map(|status| status.dispatch_queue_size)
+            .sum();
+        let actions_per_sec = statuses
+            .iter()
+            .map(|status| status.actions_per_sec)
+            .sum::<f64>();
+
+        Ok(WorkerAggregateStats {
+            active_worker_count,
+            actions_per_sec: format!("{:.1}", actions_per_sec),
+            total_in_flight,
+            total_queue_depth,
+        })
+    }
+
+    async fn worker_status_table_exists(&self) -> bool {
+        !self
+            .worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned")
+            .is_empty()
+    }
+
+    async fn schedules_table_exists(&self) -> bool {
+        !self
+            .schedules
+            .lock()
+            .expect("schedules poisoned")
+            .is_empty()
+    }
+
+    async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
+        Ok(latest_worker_statuses(
+            &self
+                .worker_status_updates
+                .lock()
+                .expect("worker status updates poisoned"),
+        ))
+    }
+}
+
+fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec<WorkerStatus> {
+    let mut by_pool: HashMap<Uuid, WorkerStatusUpdate> = HashMap::new();
+    for update in updates {
+        by_pool.insert(update.pool_id, update.clone());
+    }
+
+    let now = Utc::now();
+    let mut statuses: Vec<_> = by_pool
+        .into_values()
+        .map(|status| WorkerStatus {
+            pool_id: status.pool_id,
+            active_workers: status.active_workers,
+            throughput_per_min: status.throughput_per_min,
+            actions_per_sec: status.actions_per_sec,
+            total_completed: status.total_completed,
+            last_action_at: status.last_action_at,
+            updated_at: now,
+            median_dequeue_ms: status.median_dequeue_ms,
+            median_handling_ms: status.median_handling_ms,
+            dispatch_queue_size: Some(status.dispatch_queue_size),
+            total_in_flight: Some(status.total_in_flight),
+            median_instance_duration_secs: status.median_instance_duration_secs,
+            active_instance_count: status.active_instance_count,
+            total_instances_completed: status.total_instances_completed,
+            instances_per_sec: status.instances_per_sec,
+            instances_per_min: status.instances_per_min,
+            time_series: status.time_series,
+        })
+        .collect();
+
+    statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec));
+    statuses
+}
diff --git a/crates/lib/backend-memory/src/worker_status_backend.rs b/crates/lib/backend-memory/src/worker_status_backend.rs
new file mode 100644
index 00000000..dbca9794
--- /dev/null
+++ b/crates/lib/backend-memory/src/worker_status_backend.rs
@@ -0,0 +1,13 @@
+use waymark_worker_status_backend::{BackendResult, WorkerStatusBackend, WorkerStatusUpdate};
+
+#[async_trait::async_trait]
+impl WorkerStatusBackend for crate::MemoryBackend {
+    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
+        let mut stored = self
+            .worker_status_updates
+            .lock()
+            .expect("worker status updates poisoned");
+        stored.push(status.clone());
+        Ok(())
+    }
+}
diff --git a/crates/lib/backend-memory/src/workflow_registry_backend.rs b/crates/lib/backend-memory/src/workflow_registry_backend.rs
new file mode 100644
index 00000000..e820b5a9
--- /dev/null
+++ b/crates/lib/backend-memory/src/workflow_registry_backend.rs
@@ -0,0 +1,58 @@
+use uuid::Uuid;
+use waymark_workflow_registry_backend::{
+    BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
+
+#[async_trait::async_trait]
+impl WorkflowRegistryBackend for crate::MemoryBackend {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid> {
+        let mut guard = self
+            .workflow_versions
+            .lock()
+            .expect("workflow versions poisoned");
+        let key = (
+            registration.workflow_name.clone(),
+            registration.workflow_version.clone(),
+        );
+        if let Some((id, existing)) = guard.get(&key) {
+            if existing.ir_hash != registration.ir_hash {
+                return Err(BackendError::Message(format!(
+                    "workflow version already exists with different IR hash: {}@{}",
+                    registration.workflow_name, registration.workflow_version
+                )));
+            }
+            return Ok(*id);
+        }
+
+        let id = Uuid::new_v4();
+        guard.insert(key, (id, registration.clone()));
+        Ok(id)
+    }
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
+        if ids.is_empty() {
+            return Ok(Vec::new());
+        }
+        let guard = self
+            .workflow_versions
+            .lock()
+            .expect("workflow versions poisoned");
+        let mut versions = Vec::new();
+        for (id, registration) in guard.values() {
+            if ids.contains(id) {
+                versions.push(WorkflowVersion {
+                    id: *id,
+                    workflow_name: registration.workflow_name.clone(),
+                    workflow_version: registration.workflow_version.clone(),
+                    ir_hash: registration.ir_hash.clone(),
+                    program_proto: registration.program_proto.clone(),
+                    concurrent: registration.concurrent,
+                });
+            }
+        }
+        Ok(versions)
+    }
+}
diff --git a/crates/lib/backend-postgres-migrations/Cargo.toml b/crates/lib/backend-postgres-migrations/Cargo.toml
new file mode 100644
index 00000000..f84ad14c
--- /dev/null
+++ b/crates/lib/backend-postgres-migrations/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "waymark-backend-postgres-migrations"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sqlx = { workspace = true, features = ["postgres", "macros", "migrate"] }
diff --git a/crates/lib/backend-postgres-migrations/build.rs b/crates/lib/backend-postgres-migrations/build.rs
new file mode 100644
index 00000000..3a8149ef
--- /dev/null
+++ b/crates/lib/backend-postgres-migrations/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+    println!("cargo:rerun-if-changed=migrations");
+}
diff --git a/crates/waymark/migrations/0001_init.sql b/crates/lib/backend-postgres-migrations/migrations/0001_init.sql
similarity index 100%
rename from crates/waymark/migrations/0001_init.sql
rename to crates/lib/backend-postgres-migrations/migrations/0001_init.sql
diff --git a/crates/waymark/migrations/0002_runner_actions_done_execution_id.sql b/crates/lib/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql
similarity index 100%
rename from crates/waymark/migrations/0002_runner_actions_done_execution_id.sql
rename to crates/lib/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql
diff --git a/crates/waymark/migrations/0003_instance_locks.sql b/crates/lib/backend-postgres-migrations/migrations/0003_instance_locks.sql
similarity index 100%
rename from crates/waymark/migrations/0003_instance_locks.sql
rename to crates/lib/backend-postgres-migrations/migrations/0003_instance_locks.sql
diff --git a/crates/waymark/migrations/0004_workflow_versions.sql b/crates/lib/backend-postgres-migrations/migrations/0004_workflow_versions.sql
similarity index 100%
rename from crates/waymark/migrations/0004_workflow_versions.sql
rename to crates/lib/backend-postgres-migrations/migrations/0004_workflow_versions.sql
diff --git a/crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql b/crates/lib/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql
similarity index 100%
rename from crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql
rename to crates/lib/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql
diff --git a/crates/waymark/migrations/0006_drop_unused_runner_tables.sql b/crates/lib/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql
similarity index 100%
rename from crates/waymark/migrations/0006_drop_unused_runner_tables.sql
rename to crates/lib/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql
diff --git a/crates/waymark/migrations/0007_runner_instances_schedule_id.sql b/crates/lib/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql
similarity index 100%
rename from crates/waymark/migrations/0007_runner_instances_schedule_id.sql
rename to crates/lib/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql
diff --git a/crates/waymark/migrations/0008_runner_actions_done_timing.sql b/crates/lib/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql
similarity index 100%
rename from crates/waymark/migrations/0008_runner_actions_done_timing.sql
rename to crates/lib/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql
diff --git a/crates/waymark/migrations/0009_instance_search_columns.sql b/crates/lib/backend-postgres-migrations/migrations/0009_instance_search_columns.sql
similarity index 100%
rename from crates/waymark/migrations/0009_instance_search_columns.sql
rename to crates/lib/backend-postgres-migrations/migrations/0009_instance_search_columns.sql
diff --git a/crates/lib/backend-postgres-migrations/src/lib.rs b/crates/lib/backend-postgres-migrations/src/lib.rs
new file mode 100644
index 00000000..82495aeb
--- /dev/null
+++ b/crates/lib/backend-postgres-migrations/src/lib.rs
@@ -0,0 +1,8 @@
+//! Migrations for the postgres backend.
+
+use sqlx::PgPool;
+
+/// Run the embedded SQLx migrations.
+pub async fn run(pool: &PgPool) -> Result<(), sqlx::migrate::MigrateError> {
+    sqlx::migrate!().run(pool).await
+}
diff --git a/crates/lib/backend-postgres/Cargo.toml b/crates/lib/backend-postgres/Cargo.toml
new file mode 100644
index 00000000..f61f582f
--- /dev/null
+++ b/crates/lib/backend-postgres/Cargo.toml
@@ -0,0 +1,39 @@
+[package]
+name = "waymark-backend-postgres"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+rmp-serde = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+sqlx = { workspace = true, features = ["uuid", "chrono"] }
+tokio = { workspace = true, features = ["macros"] }
+tracing = { workspace = true }
+uuid = { workspace = true }
+waymark-backend-postgres-migrations = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-core-backend = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-runner = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-scheduler-backend = { workspace = true }
+waymark-scheduler-core = { workspace = true }
+waymark-webapp-backend = { workspace = true }
+waymark-webapp-core = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+prost = { workspace = true }
+
+[dev-dependencies]
+serial_test = { workspace = true }
+waymark-test-support = { workspace = true }
+waymark-ir-parser = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/waymark/src/backends/postgres/core.rs b/crates/lib/backend-postgres/src/core.rs
similarity index 98%
rename from crates/waymark/src/backends/postgres/core.rs
rename to crates/lib/backend-postgres/src/core.rs
index c827b4c0..bc46747e 100644
--- a/crates/waymark/src/backends/postgres/core.rs
+++ b/crates/lib/backend-postgres/src/core.rs
@@ -4,18 +4,19 @@ use std::time::Duration as StdDuration;
 
 use chrono::{DateTime, Utc};
 use sqlx::{Postgres, QueryBuilder, Row};
-use tonic::async_trait;
 use tracing::warn;
 use uuid::Uuid;
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
+use waymark_scheduler_backend::{BackendError, BackendResult};
+use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
 
 use super::PostgresBackend;
-use crate::backends::base::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult,
-    GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
-    QueuedInstance, QueuedInstanceBatch, WorkerStatusBackend, WorkerStatusUpdate,
+use waymark_core_backend::{
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
 };
-use crate::observability::obs;
-use crate::waymark_core::runner::state::RunnerState;
+use waymark_observability::obs;
+use waymark_runner_state::RunnerState;
 
 const INSTANCE_STATUS_QUEUED: &str = "queued";
 const INSTANCE_STATUS_RUNNING: &str = "running";
@@ -59,7 +60,7 @@ fn is_transient_sqlstate(code: &str) -> bool {
 
 fn is_transient_backend_error(err: &BackendError) -> bool {
     match err {
-        BackendError::Sqlx(sqlx::Error::Database(db_err)) => {
+        BackendError::Inner(sqlx::Error::Database(db_err)) => {
             db_err.code().as_deref().is_some_and(is_transient_sqlstate)
         }
         // Fallback for cases where sqlstate is not preserved in wrapping.
@@ -842,16 +843,16 @@ impl PostgresBackend {
     }
 }
 
-#[async_trait]
-impl CoreBackend for PostgresBackend {
-    fn clone_box(&self) -> Box<dyn CoreBackend> {
+#[async_trait::async_trait]
+impl waymark_core_backend::CoreBackend for PostgresBackend {
+    fn clone_box(&self) -> Box<dyn waymark_core_backend::CoreBackend> {
         Box::new(self.clone())
     }
 
     async fn save_graphs(
         &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
+        claim: waymark_core_backend::LockClaim,
+        graphs: &[waymark_core_backend::GraphUpdate],
     ) -> BackendResult<Vec<InstanceLockStatus>> {
         self.save_graphs_impl(claim, graphs).await
     }
@@ -935,7 +936,10 @@ impl CoreBackend for PostgresBackend {
         Ok(())
     }
 
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
+    async fn queue_instances(
+        &self,
+        instances: &[waymark_core_backend::QueuedInstance],
+    ) -> BackendResult<()> {
         PostgresBackend::queue_instances(self, instances).await
     }
 }
@@ -988,7 +992,7 @@ impl PostgresBackend {
     }
 }
 
-#[async_trait]
+#[async_trait::async_trait]
 impl GarbageCollectorBackend for PostgresBackend {
     async fn collect_done_instances(
         &self,
@@ -999,7 +1003,7 @@ impl GarbageCollectorBackend for PostgresBackend {
     }
 }
 
-#[async_trait]
+#[async_trait::async_trait]
 impl WorkerStatusBackend for PostgresBackend {
     async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
         PostgresBackend::upsert_worker_status(self, status).await
@@ -1017,14 +1021,13 @@ mod tests {
     use serial_test::serial;
     use sqlx::Row;
     use uuid::Uuid;
+    use waymark_core_backend::{ActionAttemptStatus, CoreBackend};
 
     use super::super::test_helpers::setup_backend;
     use super::*;
-    use crate::backends::{
-        ActionAttemptStatus, CoreBackend, GarbageCollectorBackend, WorkerStatusBackend,
-    };
-    use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus};
+
     use waymark_dag::EdgeType;
+    use waymark_runner_state::{ActionCallSpec, ExecutionNode, NodeStatus};
 
     fn sample_runner_state() -> RunnerState {
         RunnerState::new(None, None, None, false)
@@ -1343,13 +1346,11 @@ mod tests {
         let graph = GraphUpdate {
             instance_id,
             nodes,
-            edges: std::collections::HashSet::from([
-                crate::waymark_core::runner::state::ExecutionEdge {
-                    source: execution_id,
-                    target: execution_id,
-                    edge_type: EdgeType::StateMachine,
-                },
-            ]),
+            edges: std::collections::HashSet::from([waymark_runner_state::ExecutionEdge {
+                source: execution_id,
+                target: execution_id,
+                edge_type: EdgeType::StateMachine,
+            }]),
         };
         let extended_claim = LockClaim {
             lock_uuid: claim.lock_uuid,
diff --git a/crates/waymark/src/backends/postgres/mod.rs b/crates/lib/backend-postgres/src/lib.rs
similarity index 86%
rename from crates/waymark/src/backends/postgres/mod.rs
rename to crates/lib/backend-postgres/src/lib.rs
index 4bec275d..2b4e3821 100644
--- a/crates/waymark/src/backends/postgres/mod.rs
+++ b/crates/lib/backend-postgres/src/lib.rs
@@ -11,11 +11,8 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 
 use sqlx::PgPool;
-
-use crate::db;
-use crate::observability::obs;
-
-use super::base::{BackendError, BackendResult};
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_observability::obs;
 
 /// Persist runner state and action results in Postgres.
 #[derive(Clone)]
@@ -37,7 +34,9 @@ impl PostgresBackend {
     #[obs]
     pub async fn connect(dsn: &str) -> BackendResult<Self> {
         let pool = PgPool::connect(dsn).await?;
-        db::run_migrations(&pool).await?;
+        waymark_backend_postgres_migrations::run(&pool)
+            .await
+            .map_err(|err| BackendError::Message(err.to_string()))?;
         Ok(Self::new(pool))
     }
 
@@ -86,12 +85,12 @@ impl PostgresBackend {
             .clone()
     }
 
-    pub(super) fn count_query(counts: &Arc<Mutex<HashMap<String, usize>>>, label: &str) {
+    pub(crate) fn count_query(counts: &Arc<Mutex<HashMap<String, usize>>>, label: &str) {
         let mut guard = counts.lock().expect("query counts poisoned");
         *guard.entry(label.to_string()).or_insert(0) += 1;
     }
 
-    pub(super) fn count_batch_size(
+    pub(crate) fn count_batch_size(
         counts: &Arc<Mutex<HashMap<String, HashMap<usize, usize>>>>,
         label: &str,
         size: usize,
@@ -104,11 +103,11 @@ impl PostgresBackend {
         *entry.entry(size).or_insert(0) += 1;
     }
 
-    pub(super) fn serialize<T: serde::Serialize>(value: &T) -> Result<Vec<u8>, BackendError> {
+    pub(crate) fn serialize<T: serde::Serialize>(value: &T) -> Result<Vec<u8>, BackendError> {
         rmp_serde::to_vec_named(value).map_err(|e| BackendError::Message(e.to_string()))
     }
 
-    pub(super) fn deserialize<T: serde::de::DeserializeOwned>(
+    pub(crate) fn deserialize<T: serde::de::DeserializeOwned>(
         payload: &[u8],
     ) -> Result<T, BackendError> {
         rmp_serde::from_slice(payload).map_err(|e| BackendError::Message(e.to_string()))
diff --git a/crates/waymark/src/backends/postgres/registry.rs b/crates/lib/backend-postgres/src/registry.rs
similarity index 94%
rename from crates/waymark/src/backends/postgres/registry.rs
rename to crates/lib/backend-postgres/src/registry.rs
index c8fb5a68..94fc1e2c 100644
--- a/crates/waymark/src/backends/postgres/registry.rs
+++ b/crates/lib/backend-postgres/src/registry.rs
@@ -1,13 +1,13 @@
 use sqlx::Row;
-use tonic::async_trait;
 use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
 
 use super::PostgresBackend;
-use crate::backends::base::{
-    BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
 
-#[async_trait]
+#[async_trait::async_trait]
 impl WorkflowRegistryBackend for PostgresBackend {
     async fn upsert_workflow_version(
         &self,
@@ -95,7 +95,7 @@ mod tests {
     use serial_test::serial;
 
     use super::super::test_helpers::setup_backend;
-    use crate::backends::{WorkflowRegistration, WorkflowRegistryBackend};
+    use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
 
     fn sample_registration(version: &str) -> WorkflowRegistration {
         WorkflowRegistration {
diff --git a/crates/waymark/src/backends/postgres/scheduler.rs b/crates/lib/backend-postgres/src/scheduler.rs
similarity index 97%
rename from crates/waymark/src/backends/postgres/scheduler.rs
rename to crates/lib/backend-postgres/src/scheduler.rs
index 5eb00735..e47f2114 100644
--- a/crates/waymark/src/backends/postgres/scheduler.rs
+++ b/crates/lib/backend-postgres/src/scheduler.rs
@@ -1,15 +1,14 @@
 use chrono::{DateTime, Utc};
 use sqlx::Row;
-use tonic::async_trait;
 use uuid::Uuid;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_scheduler_backend::SchedulerBackend;
 
-use super::PostgresBackend;
-use crate::backends::base::{BackendError, BackendResult, SchedulerBackend};
-use crate::scheduler::compute_next_run;
-use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
+use waymark_scheduler_core::compute_next_run;
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
 
-#[async_trait]
-impl SchedulerBackend for PostgresBackend {
+#[async_trait::async_trait]
+impl SchedulerBackend for crate::PostgresBackend {
     async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
         let next_run_at = compute_next_run(
             params.schedule_type,
@@ -307,10 +306,12 @@ mod tests {
     use sqlx::Row;
     use uuid::Uuid;
 
+    use crate::PostgresBackend;
+
     use super::super::test_helpers::setup_backend;
     use super::*;
-    use crate::backends::SchedulerBackend;
-    use crate::scheduler::CreateScheduleParams;
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_scheduler_core::CreateScheduleParams;
 
     fn sample_params(schedule_name: &str) -> CreateScheduleParams {
         CreateScheduleParams {
diff --git a/crates/waymark/src/backends/postgres/test_helpers.rs b/crates/lib/backend-postgres/src/test_helpers.rs
similarity index 93%
rename from crates/waymark/src/backends/postgres/test_helpers.rs
rename to crates/lib/backend-postgres/src/test_helpers.rs
index dd03cd7f..addb1ad4 100644
--- a/crates/waymark/src/backends/postgres/test_helpers.rs
+++ b/crates/lib/backend-postgres/src/test_helpers.rs
@@ -1,7 +1,7 @@
 use sqlx::PgPool;
 
 use super::PostgresBackend;
-use crate::test_support::postgres_setup;
+use waymark_test_support::postgres_setup;
 
 pub(super) async fn setup_backend() -> PostgresBackend {
     let pool = postgres_setup().await;
diff --git a/crates/waymark/src/backends/postgres/webapp.rs b/crates/lib/backend-postgres/src/webapp.rs
similarity index 98%
rename from crates/waymark/src/backends/postgres/webapp.rs
rename to crates/lib/backend-postgres/src/webapp.rs
index 8f6b932f..e3f50ced 100644
--- a/crates/waymark/src/backends/postgres/webapp.rs
+++ b/crates/lib/backend-postgres/src/webapp.rs
@@ -4,20 +4,22 @@ use chrono::{DateTime, Utc};
 use prost::Message;
 use serde_json::Value;
 use sqlx::{Postgres, QueryBuilder, Row};
-use tonic::async_trait;
+
 use uuid::Uuid;
 
-use super::PostgresBackend;
-use crate::backends::base::{BackendError, BackendResult, GraphUpdate, WebappBackend};
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus};
-use crate::waymark_core::runner::{RunnerState, ValueExpr, format_value, replay_action_kwargs};
-use crate::webapp::{
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::GraphUpdate;
+use waymark_dag::{DAGNode, EdgeType, convert_to_dag};
+use waymark_proto::ast as ir;
+use waymark_runner::replay_action_kwargs;
+use waymark_runner_state::{
+    ActionCallSpec, ExecutionNode, NodeStatus, RunnerState, format_value, value_visitor::ValueExpr,
+};
+use waymark_webapp_core::{
     ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView, InstanceDetail, InstanceStatus,
     InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, ScheduleSummary, TimelineEntry,
     WorkerActionRow, WorkerAggregateStats, WorkerStatus,
 };
-use waymark_dag::{DAGNode, EdgeType, convert_to_dag};
 
 const INSTANCE_STATUS_FALLBACK_SQL: &str = r#"
 CASE
@@ -263,8 +265,8 @@ fn parse_instance_status(status: &str) -> Option<InstanceStatus> {
     }
 }
 
-#[async_trait]
-impl WebappBackend for PostgresBackend {
+#[async_trait::async_trait]
+impl waymark_webapp_backend::WebappBackend for crate::PostgresBackend {
     async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64> {
         let mut builder: QueryBuilder<Postgres> = QueryBuilder::new(
             r#"
@@ -1407,20 +1409,23 @@ mod tests {
     use prost::Message;
     use serial_test::serial;
     use uuid::Uuid;
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_webapp_backend::WebappBackend;
+    use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
+    use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
+
+    use crate::PostgresBackend;
 
     use super::super::test_helpers::setup_backend;
     use super::*;
-    use crate::backends::{
-        SchedulerBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate,
-        WorkflowRegistration, WorkflowRegistryBackend,
-    };
-    use crate::scheduler::{CreateScheduleParams, ScheduleType};
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::ValueExpr;
-    use crate::waymark_core::runner::state::{
+
+    use waymark_dag::EdgeType;
+    use waymark_ir_parser::parse_program;
+    use waymark_runner_state::{
         ActionCallSpec, ExecutionEdge, ExecutionNode, LiteralValue, NodeStatus,
+        value_visitor::ValueExpr,
     };
-    use waymark_dag::EdgeType;
+    use waymark_scheduler_core::{CreateScheduleParams, ScheduleType};
 
     #[test]
     fn format_extracted_inputs_happy_path() {
diff --git a/crates/lib/backends-core/Cargo.toml b/crates/lib/backends-core/Cargo.toml
new file mode 100644
index 00000000..194062c0
--- /dev/null
+++ b/crates/lib/backends-core/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "waymark-backends-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+sqlx = { workspace = true, optional = true }
+
+[features]
+default = ["sqlx-error"]
+
+# TODO: this has to abstracted away since not all backends will use sqlx.
+sqlx-error = ["dep:sqlx"]
diff --git a/crates/lib/backends-core/src/lib.rs b/crates/lib/backends-core/src/lib.rs
new file mode 100644
index 00000000..ee49d385
--- /dev/null
+++ b/crates/lib/backends-core/src/lib.rs
@@ -0,0 +1,36 @@
+//! Core primitives for various waymark subsystem backends.
+
+/// The common backend error.
+///
+/// TODO: move away from a shared notion of backend error to use concrete error
+/// type per-operation (rather than per-subsystem or per-crate).
+#[derive(Debug, thiserror::Error)]
+pub enum BackendError<Inner = InnerError> {
+    #[error("{0}")]
+    Message(String),
+
+    #[error(transparent)]
+    Inner(Inner),
+
+    #[error(transparent)]
+    Serialization(serde_json::Error),
+}
+
+#[cfg(feature = "sqlx-error")]
+pub type InnerError = sqlx::Error;
+
+#[cfg(not(feature = "sqlx-error"))]
+pub type InnerError = ();
+
+/// Utility type alias for backend results.
+///
+/// TODO: move away from the single-`Result` type aliases as we want to vary
+/// rrors per-call.
+pub type BackendResult<T, E = InnerError> = Result<T, BackendError<E>>;
+
+#[cfg(feature = "sqlx-error")]
+impl From<sqlx::Error> for BackendError<sqlx::Error> {
+    fn from(value: sqlx::Error) -> Self {
+        Self::Inner(value)
+    }
+}
diff --git a/crates/lib/core-backend/Cargo.toml b/crates/lib/core-backend/Cargo.toml
new file mode 100644
index 00000000..da2aa394
--- /dev/null
+++ b/crates/lib/core-backend/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "waymark-core-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+chrono = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/lib/core-backend/src/data.rs b/crates/lib/core-backend/src/data.rs
new file mode 100644
index 00000000..d9320e71
--- /dev/null
+++ b/crates/lib/core-backend/src/data.rs
@@ -0,0 +1,150 @@
+// The models that we use for our backends are similar to the ones that we
+// have specified in our database/Postgres backend, but not 1:1. It's better for
+// us to internally convert within the given backend
+
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+};
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+use waymark_dag::DAG;
+use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Queued instance payload for the run loop.
+pub struct QueuedInstance {
+    pub workflow_version_id: Uuid,
+    #[serde(default)]
+    pub schedule_id: Option<Uuid>,
+    #[serde(skip, default)]
+    pub dag: Option<Arc<DAG>>,
+    pub entry_node: Uuid,
+    pub state: Option<RunnerState>,
+    #[serde(
+        default = "default_action_results",
+        deserialize_with = "deserialize_action_results"
+    )]
+    pub action_results: HashMap<Uuid, serde_json::Value>,
+    #[serde(default = "default_instance_id")]
+    pub instance_id: Uuid,
+    #[serde(default)]
+    pub scheduled_at: Option<DateTime<Utc>>,
+}
+
+#[derive(Clone, Debug)]
+/// Result payload for queued instance polling.
+pub struct QueuedInstanceBatch {
+    pub instances: Vec<QueuedInstance>,
+}
+
+#[derive(Clone, Debug)]
+/// Lock claim settings for owned instances.
+pub struct LockClaim {
+    pub lock_uuid: Uuid,
+    pub lock_expires_at: DateTime<Utc>,
+}
+
+#[derive(Clone, Debug)]
+/// Current lock status for an instance.
+pub struct InstanceLockStatus {
+    pub instance_id: Uuid,
+    pub lock_uuid: Option<Uuid>,
+    pub lock_expires_at: Option<DateTime<Utc>>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Completed instance payload with result or exception.
+pub struct InstanceDone {
+    pub executor_id: Uuid,
+    pub entry_node: Uuid,
+    pub result: Option<serde_json::Value>,
+    pub error: Option<serde_json::Value>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Batch payload representing an updated execution graph snapshot.
+///
+/// This intentionally stores only runtime nodes and edges (no DAG template or
+/// derived caches) so persistence stays lightweight.
+pub struct GraphUpdate {
+    pub instance_id: Uuid,
+    pub nodes: HashMap<Uuid, ExecutionNode>,
+    pub edges: HashSet<ExecutionEdge>,
+}
+
+impl GraphUpdate {
+    pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self {
+        Self {
+            instance_id,
+            nodes: state.nodes.clone(),
+            edges: state.edges.clone(),
+        }
+    }
+
+    pub fn next_scheduled_at(&self) -> DateTime<Utc> {
+        let mut next: Option<DateTime<Utc>> = None;
+        for node in self.nodes.values() {
+            if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) {
+                continue;
+            }
+            if let Some(scheduled_at) = node.scheduled_at {
+                next = Some(match next {
+                    Some(existing) => existing.min(scheduled_at),
+                    None => scheduled_at,
+                });
+            }
+        }
+        next.unwrap_or_else(Utc::now)
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+/// Batch payload representing a finished action attempt (success or failure).
+pub struct ActionDone {
+    pub execution_id: Uuid,
+    pub attempt: i32,
+    pub status: ActionAttemptStatus,
+    pub started_at: Option<DateTime<Utc>>,
+    pub completed_at: Option<DateTime<Utc>>,
+    pub duration_ms: Option<i64>,
+    pub result: serde_json::Value,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ActionAttemptStatus {
+    Completed,
+    Failed,
+    TimedOut,
+}
+
+impl std::fmt::Display for ActionAttemptStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+            Self::TimedOut => write!(f, "timed_out"),
+        }
+    }
+}
+
+fn default_instance_id() -> Uuid {
+    Uuid::new_v4()
+}
+
+fn default_action_results() -> HashMap<Uuid, serde_json::Value> {
+    HashMap::new()
+}
+
+fn deserialize_action_results<'de, D>(
+    deserializer: D,
+) -> Result<HashMap<Uuid, serde_json::Value>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    let value = Option::<HashMap<Uuid, serde_json::Value>>::deserialize(deserializer)?;
+    Ok(value.unwrap_or_default())
+}
diff --git a/crates/lib/core-backend/src/lib.rs b/crates/lib/core-backend/src/lib.rs
new file mode 100644
index 00000000..5e876b29
--- /dev/null
+++ b/crates/lib/core-backend/src/lib.rs
@@ -0,0 +1,58 @@
+//! Core backend traits for waymark.
+
+mod data;
+
+use uuid::Uuid;
+
+use waymark_backends_core::BackendResult;
+
+pub use self::data::*;
+
+/// Abstract persistence backend for runner state.
+#[async_trait::async_trait]
+pub trait CoreBackend: Send + Sync {
+    fn clone_box(&self) -> Box<dyn CoreBackend>;
+
+    /// Persist updated execution graphs.
+    async fn save_graphs(
+        &self,
+        claim: LockClaim,
+        graphs: &[GraphUpdate],
+    ) -> BackendResult<Vec<InstanceLockStatus>>;
+
+    /// Persist finished action attempts (success or failure).
+    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>;
+
+    /// Return up to size queued instances without blocking.
+    async fn get_queued_instances(
+        &self,
+        size: usize,
+        claim: LockClaim,
+    ) -> BackendResult<QueuedInstanceBatch>;
+
+    /// Refresh lock expiry for owned instances.
+    async fn refresh_instance_locks(
+        &self,
+        claim: LockClaim,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<Vec<InstanceLockStatus>>;
+
+    /// Release instance locks when evicting from memory.
+    async fn release_instance_locks(
+        &self,
+        lock_uuid: Uuid,
+        instance_ids: &[Uuid],
+    ) -> BackendResult<()>;
+
+    /// Persist completed workflow instances.
+    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>;
+
+    /// Insert queued instances for run-loop consumption.
+    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>;
+}
+
+impl Clone for Box<dyn CoreBackend> {
+    fn clone(&self) -> Self {
+        self.clone_box()
+    }
+}
diff --git a/crates/dag/Cargo.toml b/crates/lib/dag/Cargo.toml
similarity index 89%
rename from crates/dag/Cargo.toml
rename to crates/lib/dag/Cargo.toml
index 54abfd13..a8bda894 100644
--- a/crates/dag/Cargo.toml
+++ b/crates/lib/dag/Cargo.toml
@@ -11,4 +11,4 @@ uuid = { workspace = true, features = ["serde", "v4"]  }
 waymark-proto = { workspace = true, features = ["serde"] }
 
 [dev-dependencies]
-waymark = { workspace = true }
+waymark-ir-parser = { workspace = true }
diff --git a/crates/dag/src/builder/assignments.rs b/crates/lib/dag/src/builder/assignments.rs
similarity index 100%
rename from crates/dag/src/builder/assignments.rs
rename to crates/lib/dag/src/builder/assignments.rs
diff --git a/crates/dag/src/builder/conditionals.rs b/crates/lib/dag/src/builder/conditionals.rs
similarity index 100%
rename from crates/dag/src/builder/conditionals.rs
rename to crates/lib/dag/src/builder/conditionals.rs
diff --git a/crates/dag/src/builder/converter.rs b/crates/lib/dag/src/builder/converter.rs
similarity index 100%
rename from crates/dag/src/builder/converter.rs
rename to crates/lib/dag/src/builder/converter.rs
diff --git a/crates/dag/src/builder/data_flow.rs b/crates/lib/dag/src/builder/data_flow.rs
similarity index 100%
rename from crates/dag/src/builder/data_flow.rs
rename to crates/lib/dag/src/builder/data_flow.rs
diff --git a/crates/dag/src/builder/exceptions.rs b/crates/lib/dag/src/builder/exceptions.rs
similarity index 100%
rename from crates/dag/src/builder/exceptions.rs
rename to crates/lib/dag/src/builder/exceptions.rs
diff --git a/crates/dag/src/builder/expansion.rs b/crates/lib/dag/src/builder/expansion.rs
similarity index 100%
rename from crates/dag/src/builder/expansion.rs
rename to crates/lib/dag/src/builder/expansion.rs
diff --git a/crates/dag/src/builder/loops.rs b/crates/lib/dag/src/builder/loops.rs
similarity index 100%
rename from crates/dag/src/builder/loops.rs
rename to crates/lib/dag/src/builder/loops.rs
diff --git a/crates/dag/src/builder/mod.rs b/crates/lib/dag/src/builder/mod.rs
similarity index 100%
rename from crates/dag/src/builder/mod.rs
rename to crates/lib/dag/src/builder/mod.rs
diff --git a/crates/dag/src/builder/spreads.rs b/crates/lib/dag/src/builder/spreads.rs
similarity index 100%
rename from crates/dag/src/builder/spreads.rs
rename to crates/lib/dag/src/builder/spreads.rs
diff --git a/crates/dag/src/builder/test_helpers.rs b/crates/lib/dag/src/builder/test_helpers.rs
similarity index 96%
rename from crates/dag/src/builder/test_helpers.rs
rename to crates/lib/dag/src/builder/test_helpers.rs
index ed33ca00..0c9811c1 100644
--- a/crates/dag/src/builder/test_helpers.rs
+++ b/crates/lib/dag/src/builder/test_helpers.rs
@@ -1,5 +1,5 @@
 use crate::{DAG, DAGConverter, convert_to_dag};
-use waymark::waymark_core::ir_parser::parse_program;
+use waymark_ir_parser::parse_program;
 use waymark_proto::ast as ir;
 
 pub(super) fn dedent(source: &str) -> String {
diff --git a/crates/dag/src/builder/utils.rs b/crates/lib/dag/src/builder/utils.rs
similarity index 100%
rename from crates/dag/src/builder/utils.rs
rename to crates/lib/dag/src/builder/utils.rs
diff --git a/crates/dag/src/lib.rs b/crates/lib/dag/src/lib.rs
similarity index 100%
rename from crates/dag/src/lib.rs
rename to crates/lib/dag/src/lib.rs
diff --git a/crates/dag/src/models.rs b/crates/lib/dag/src/models.rs
similarity index 100%
rename from crates/dag/src/models.rs
rename to crates/lib/dag/src/models.rs
diff --git a/crates/dag/src/nodes.rs b/crates/lib/dag/src/nodes.rs
similarity index 100%
rename from crates/dag/src/nodes.rs
rename to crates/lib/dag/src/nodes.rs
diff --git a/crates/dag/src/validate.rs b/crates/lib/dag/src/validate.rs
similarity index 99%
rename from crates/dag/src/validate.rs
rename to crates/lib/dag/src/validate.rs
index b6aa67e1..0e48504e 100644
--- a/crates/dag/src/validate.rs
+++ b/crates/lib/dag/src/validate.rs
@@ -361,7 +361,7 @@ fn collect_expr_variables(expr: &ir::Expr, vars: &mut HashSet<String>) {
 mod tests {
     use super::validate_dag;
     use crate::convert_to_dag;
-    use waymark::waymark_core::ir_parser::parse_program;
+    use waymark_ir_parser::parse_program;
 
     #[test]
     fn validate_dag_rejects_unresolved_variable_reference() {
diff --git a/crates/lib/garbage-collector-backend/Cargo.toml b/crates/lib/garbage-collector-backend/Cargo.toml
new file mode 100644
index 00000000..e1e4f300
--- /dev/null
+++ b/crates/lib/garbage-collector-backend/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-garbage-collector-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+chrono = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/lib/garbage-collector-backend/src/lib.rs b/crates/lib/garbage-collector-backend/src/lib.rs
new file mode 100644
index 00000000..af8badff
--- /dev/null
+++ b/crates/lib/garbage-collector-backend/src/lib.rs
@@ -0,0 +1,20 @@
+use chrono::{DateTime, Utc};
+
+use waymark_backends_core::BackendResult;
+
+#[derive(Clone, Copy, Debug, Default)]
+/// Summary of a garbage collection sweep.
+pub struct GarbageCollectionResult {
+    pub deleted_instances: usize,
+    pub deleted_actions: usize,
+}
+
+/// Backend capability for deleting old finished workflow data.
+#[async_trait::async_trait]
+pub trait GarbageCollectorBackend: Send + Sync {
+    async fn collect_done_instances(
+        &self,
+        older_than: DateTime<Utc>,
+        limit: usize,
+    ) -> BackendResult<GarbageCollectionResult>;
+}
diff --git a/crates/lib/integration-support/Cargo.toml b/crates/lib/integration-support/Cargo.toml
new file mode 100644
index 00000000..1e24644c
--- /dev/null
+++ b/crates/lib/integration-support/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-integration-support"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+anyhow = { workspace = true }
+sqlx = { workspace = true }
+tokio = { workspace = true, features = ["process", "time", "sync"] }
+waymark-backend-postgres-migrations = { workspace = true }
diff --git a/crates/waymark/src/integration_support/mod.rs b/crates/lib/integration-support/src/lib.rs
similarity index 100%
rename from crates/waymark/src/integration_support/mod.rs
rename to crates/lib/integration-support/src/lib.rs
diff --git a/crates/waymark/src/integration_support/postgres.rs b/crates/lib/integration-support/src/postgres.rs
similarity index 96%
rename from crates/waymark/src/integration_support/postgres.rs
rename to crates/lib/integration-support/src/postgres.rs
index 5a876f2d..59c9caec 100644
--- a/crates/waymark/src/integration_support/postgres.rs
+++ b/crates/lib/integration-support/src/postgres.rs
@@ -8,8 +8,6 @@ use sqlx::{PgPool, postgres::PgPoolOptions};
 use tokio::process::Command;
 use tokio::sync::OnceCell;
 
-use crate::db;
-
 pub const LOCAL_POSTGRES_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
 
 const READY_TIMEOUT: Duration = Duration::from_secs(45);
@@ -41,7 +39,7 @@ pub async fn connect_pool(dsn: &str) -> Result<PgPool> {
 
 async fn ensure_local_postgres_impl() -> Result<()> {
     if let Ok(pool) = connect_pool(LOCAL_POSTGRES_DSN).await {
-        db::run_migrations(&pool)
+        waymark_backend_postgres_migrations::run(&pool)
             .await
             .context("run migrations for existing local postgres")?;
         pool.close().await;
@@ -50,7 +48,7 @@ async fn ensure_local_postgres_impl() -> Result<()> {
 
     run_compose_up().await?;
     let pool = wait_for_postgres(LOCAL_POSTGRES_DSN).await?;
-    db::run_migrations(&pool)
+    waymark_backend_postgres_migrations::run(&pool)
         .await
         .context("run migrations for local postgres")?;
     pool.close().await;
diff --git a/crates/lib/ir-parser/Cargo.toml b/crates/lib/ir-parser/Cargo.toml
new file mode 100644
index 00000000..fee9b094
--- /dev/null
+++ b/crates/lib/ir-parser/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-ir-parser"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+waymark-proto = { workspace = true }
+regex = { workspace = true }
diff --git a/crates/waymark/src/waymark_core/ir_parser.rs b/crates/lib/ir-parser/src/lib.rs
similarity index 99%
rename from crates/waymark/src/waymark_core/ir_parser.rs
rename to crates/lib/ir-parser/src/lib.rs
index a3f2bbf7..1af43324 100644
--- a/crates/waymark/src/waymark_core/ir_parser.rs
+++ b/crates/lib/ir-parser/src/lib.rs
@@ -2,7 +2,7 @@
 
 use std::fmt;
 
-use crate::messages::ast as ir;
+use waymark_proto::ast as ir;
 
 /// Raised when parsing the IR source representation fails.
 #[derive(Debug, Clone)]
diff --git a/crates/observability-macros/Cargo.toml b/crates/lib/observability-macros/Cargo.toml
similarity index 100%
rename from crates/observability-macros/Cargo.toml
rename to crates/lib/observability-macros/Cargo.toml
diff --git a/crates/observability-macros/src/lib.rs b/crates/lib/observability-macros/src/lib.rs
similarity index 62%
rename from crates/observability-macros/src/lib.rs
rename to crates/lib/observability-macros/src/lib.rs
index 9fc1df7b..e10c19ec 100644
--- a/crates/observability-macros/src/lib.rs
+++ b/crates/lib/observability-macros/src/lib.rs
@@ -6,10 +6,10 @@ use syn::{ItemFn, parse_macro_input};
 pub fn obs(args: TokenStream, input: TokenStream) -> TokenStream {
     let mut item = parse_macro_input!(input as ItemFn);
     let attr = if args.is_empty() {
-        syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(skip_all))])
+        syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(skip_all))])
     } else {
         let args = proc_macro2::TokenStream::from(args);
-        syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(#args))])
+        syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(#args))])
     };
     item.attrs.push(attr);
     TokenStream::from(quote!(#item))
diff --git a/crates/lib/observability/Cargo.toml b/crates/lib/observability/Cargo.toml
new file mode 100644
index 00000000..bc27b66c
--- /dev/null
+++ b/crates/lib/observability/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-observability"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+waymark-observability-macros = { workspace = true }
+tracing = { workspace = true }
diff --git a/crates/lib/observability/src/lib.rs b/crates/lib/observability/src/lib.rs
new file mode 100644
index 00000000..d2fa50f6
--- /dev/null
+++ b/crates/lib/observability/src/lib.rs
@@ -0,0 +1,8 @@
+pub use waymark_observability_macros::obs;
+
+#[doc(hidden)]
+pub mod __inner {
+    pub mod tracing {
+        pub use tracing::instrument;
+    }
+}
diff --git a/crates/proto/Cargo.toml b/crates/lib/proto/Cargo.toml
similarity index 88%
rename from crates/proto/Cargo.toml
rename to crates/lib/proto/Cargo.toml
index 277e7df3..79ea21bf 100644
--- a/crates/proto/Cargo.toml
+++ b/crates/lib/proto/Cargo.toml
@@ -3,6 +3,9 @@ name = "waymark-proto"
 version = "0.1.0"
 edition = "2024"
 
+[package.metadata.cargo-shear]
+ignored = ["prost"]
+
 [dependencies]
 prost = "0.12"
 prost-types = "0.12"
diff --git a/crates/proto/build.rs b/crates/lib/proto/build.rs
similarity index 97%
rename from crates/proto/build.rs
rename to crates/lib/proto/build.rs
index 9cbb4369..33a80357 100644
--- a/crates/proto/build.rs
+++ b/crates/lib/proto/build.rs
@@ -1,4 +1,4 @@
-const PROTO_DIR: &str = "../../proto";
+const PROTO_DIR: &str = "../../../proto";
 
 fn if_feature_enabled(
     builder: tonic_build::Builder,
diff --git a/crates/proto/src/lib.rs b/crates/lib/proto/src/lib.rs
similarity index 100%
rename from crates/proto/src/lib.rs
rename to crates/lib/proto/src/lib.rs
diff --git a/crates/lib/runner-state/Cargo.toml b/crates/lib/runner-state/Cargo.toml
new file mode 100644
index 00000000..6a64d994
--- /dev/null
+++ b/crates/lib/runner-state/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "waymark-runner-state"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+chrono = { workspace = true, features = ["serde", "clock"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+uuid = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/lib/runner-state/src/lib.rs b/crates/lib/runner-state/src/lib.rs
new file mode 100644
index 00000000..5c7ae36b
--- /dev/null
+++ b/crates/lib/runner-state/src/lib.rs
@@ -0,0 +1,5 @@
+mod state;
+mod util;
+pub mod value_visitor;
+
+pub use self::state::*;
diff --git a/crates/waymark/src/waymark_core/runner/state.rs b/crates/lib/runner-state/src/state.rs
similarity index 99%
rename from crates/waymark/src/waymark_core/runner/state.rs
rename to crates/lib/runner-state/src/state.rs
index dd9e68be..da418624 100644
--- a/crates/waymark/src/waymark_core/runner/state.rs
+++ b/crates/lib/runner-state/src/state.rs
@@ -8,15 +8,13 @@ use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::expression_evaluator::is_truthy;
-use crate::waymark_core::runner::value_visitor::{
-    ValueExpr, collect_value_sources, resolve_value_tree,
-};
+use crate::util::is_truthy;
+use crate::value_visitor::{ValueExpr, collect_value_sources, resolve_value_tree};
 use waymark_dag::{
     ActionCallNode, AggregatorNode, AssignmentNode, DAG, DAGNode, EdgeType, FnCallNode, JoinNode,
     ReturnNode, SleepNode,
 };
+use waymark_proto::ast as ir;
 
 /// Raised when the runner state cannot be updated safely.
 #[derive(Debug, thiserror::Error)]
@@ -335,11 +333,13 @@ impl RunnerState {
         state
     }
 
-    pub(crate) fn set_link_queued_nodes(&mut self, value: bool) {
+    /// TODO: make this `pub(crate)` again
+    pub fn set_link_queued_nodes(&mut self, value: bool) {
         self.link_queued_nodes = value;
     }
 
-    pub(crate) fn latest_assignment(&self, name: &str) -> Option<Uuid> {
+    /// TODO: make this `pub(crate)` again
+    pub fn latest_assignment(&self, name: &str) -> Option<Uuid> {
         self.latest_assignments.get(name).copied()
     }
 
@@ -915,7 +915,9 @@ impl RunnerState {
     /// Example IR:
     /// - a, b = @pair()
     ///   This yields ActionResultValue(node_id, result_index=0/1) for a and b.
-    pub(crate) fn assign_action_results(
+    ///
+    /// TODO: make this `pub(crate)` again
+    pub fn assign_action_results(
         &mut self,
         node: &ExecutionNode,
         action_name: &str,
@@ -1038,7 +1040,7 @@ impl RunnerState {
     /// - xs = [1]
     /// - ys = xs + [2]
     ///   Materialization turns ys into ListValue([1, 2]) rather than keeping xs.
-    pub(crate) fn materialize_value(&self, value: ValueExpr) -> ValueExpr {
+    pub fn materialize_value(&self, value: ValueExpr) -> ValueExpr {
         let resolved = resolve_value_tree(&value, &|name, seen| {
             self.resolve_variable_value(name, seen)
         });
@@ -1111,7 +1113,8 @@ impl RunnerState {
         assigned
     }
 
-    pub(crate) fn mark_latest_assignments(
+    /// TODO: make this `pub(crate)` again
+    pub fn mark_latest_assignments(
         &mut self,
         node_id: Uuid,
         assignments: &HashMap<String, ValueExpr>,
@@ -1129,7 +1132,9 @@ impl RunnerState {
     /// Example IR:
     /// - total = @sum(values)
     ///   A data-flow edge is added from the values assignment node to the action.
-    pub(crate) fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) {
+    ///
+    /// TODO: make this `pub(crate)` again
+    pub fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) {
         let source_ids =
             collect_value_sources(value, &|name| self.latest_assignments.get(name).copied());
         self.record_data_flow_edges(node_id, &source_ids);
@@ -1790,7 +1795,7 @@ fn format_literal(value: &serde_json::Value) -> String {
 ///
 /// Example IR:
 /// - Literal(int_value=3) -> 3
-pub(crate) fn literal_value(lit: &ir::Literal) -> serde_json::Value {
+pub fn literal_value(lit: &ir::Literal) -> serde_json::Value {
     match lit.value.as_ref() {
         Some(ir::literal::Value::IntValue(value)) => serde_json::Value::Number((*value).into()),
         Some(ir::literal::Value::FloatValue(value)) => serde_json::Number::from_f64(*value)
@@ -1892,8 +1897,8 @@ impl fmt::Display for NodeStatus {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::messages::ast as ir;
     use serde_json::Value;
+    use waymark_proto::ast as ir;
 
     fn action_plus_two_expr() -> ir::Expr {
         ir::Expr {
diff --git a/crates/lib/runner-state/src/util.rs b/crates/lib/runner-state/src/util.rs
new file mode 100644
index 00000000..20768070
--- /dev/null
+++ b/crates/lib/runner-state/src/util.rs
@@ -0,0 +1,12 @@
+pub(crate) fn is_truthy(value: &serde_json::Value) -> bool {
+    match value {
+        serde_json::Value::Null => false,
+        serde_json::Value::Bool(value) => *value,
+        serde_json::Value::Number(number) => {
+            number.as_f64().map(|value| value != 0.0).unwrap_or(false)
+        }
+        serde_json::Value::String(value) => !value.is_empty(),
+        serde_json::Value::Array(values) => !values.is_empty(),
+        serde_json::Value::Object(map) => !map.is_empty(),
+    }
+}
diff --git a/crates/waymark/src/waymark_core/runner/value_visitor.rs b/crates/lib/runner-state/src/value_visitor.rs
similarity index 99%
rename from crates/waymark/src/waymark_core/runner/value_visitor.rs
rename to crates/lib/runner-state/src/value_visitor.rs
index 82f02db1..fbc7736a 100644
--- a/crates/waymark/src/waymark_core/runner/value_visitor.rs
+++ b/crates/lib/runner-state/src/value_visitor.rs
@@ -362,7 +362,7 @@ mod tests {
     use uuid::Uuid;
 
     use super::*;
-    use crate::messages::ast as ir;
+    use waymark_proto::ast as ir;
 
     fn literal_int(value: i64) -> ValueExpr {
         ValueExpr::Literal(LiteralValue {
diff --git a/crates/lib/runner/Cargo.toml b/crates/lib/runner/Cargo.toml
new file mode 100644
index 00000000..115de256
--- /dev/null
+++ b/crates/lib/runner/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "waymark-runner"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+chrono = { workspace = true, features = ["serde"]  }
+rustc-hash = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+uuid = { workspace = true }
+waymark-dag = { workspace = true }
+waymark-proto = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-core-backend = { workspace = true }
+tracing = { workspace = true }
+
+[dev-dependencies]
+waymark-ir-parser = { workspace = true }
+waymark-backend-memory = { workspace = true }
+
+[features]
+trace = []
diff --git a/crates/waymark/src/waymark_core/runner/executor.rs b/crates/lib/runner/src/executor.rs
similarity index 98%
rename from crates/waymark/src/waymark_core/runner/executor.rs
rename to crates/lib/runner/src/executor.rs
index e0572688..bae9a9c2 100644
--- a/crates/waymark/src/waymark_core/runner/executor.rs
+++ b/crates/lib/runner/src/executor.rs
@@ -10,24 +10,20 @@ use rustc_hash::FxHashMap;
 use serde_json::Value;
 use uuid::Uuid;
 
-use crate::backends::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate};
-use crate::messages::ast as ir;
-use crate::observability::obs;
-use crate::waymark_core::runner::expression_evaluator::is_exception_value;
-use crate::waymark_core::runner::retry::{
-    RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies,
+use crate::expression_evaluator::is_exception_value;
+use crate::retry::{RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies};
+use crate::synthetic_exceptions::{SyntheticExceptionType, build_synthetic_exception_value};
+use waymark_core_backend::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate};
+use waymark_dag::{
+    ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType,
 };
-use crate::waymark_core::runner::state::{
+use waymark_observability::obs;
+use waymark_proto::ast as ir;
+use waymark_runner_state::value_visitor::ValueExpr;
+use waymark_runner_state::{
     ActionCallSpec, ExecutionEdge, ExecutionNode, ExecutionNodeType, IndexValue, ListValue,
     LiteralValue, NodeStatus, QueueNodeParams, RunnerState, RunnerStateError,
 };
-use crate::waymark_core::runner::synthetic_exceptions::{
-    SyntheticExceptionType, build_synthetic_exception_value,
-};
-use crate::waymark_core::runner::value_visitor::ValueExpr;
-use waymark_dag::{
-    ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType,
-};
 
 /// Raised when the runner executor cannot advance safely.
 #[derive(Debug, thiserror::Error)]
@@ -1505,16 +1501,14 @@ mod tests {
     use std::collections::{HashMap, HashSet};
     use std::sync::Arc;
 
-    use crate::backends::MemoryBackend;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::state::{
-        ExecutionEdge, ExecutionNode, NodeStatus, RunnerState,
-    };
+    use waymark_backend_memory::MemoryBackend;
     use waymark_dag::{
         ActionCallNode, ActionCallParams, AggregatorNode, AssignmentNode, DAG, DAGEdge,
         convert_to_dag,
     };
+    use waymark_ir_parser::parse_program;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
 
     fn variable(name: &str) -> ir::Expr {
         ir::Expr {
@@ -1992,16 +1986,12 @@ mod tests {
                 rehydrated.state().ready_queue.is_empty()
             );
 
-            let replay_canonical = crate::waymark_core::runner::replay_variables(
-                canonical.state(),
-                canonical.action_results(),
-            )
-            .expect("replay canonical");
-            let replay_rehydrated = crate::waymark_core::runner::replay_variables(
-                rehydrated.state(),
-                rehydrated.action_results(),
-            )
-            .expect("replay rehydrated");
+            let replay_canonical =
+                crate::replay_variables(canonical.state(), canonical.action_results())
+                    .expect("replay canonical");
+            let replay_rehydrated =
+                crate::replay_variables(rehydrated.state(), rehydrated.action_results())
+                    .expect("replay rehydrated");
 
             let mut assignment_counts: HashMap<String, usize> = HashMap::new();
             for node in canonical.state().nodes.values() {
@@ -2712,21 +2702,15 @@ fn main(input: [], output: [done]):
         let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None);
         executor.increment(&[exec1.node_id]).expect("increment");
 
-        let orig_replay = crate::waymark_core::runner::replay_variables(
-            executor.state(),
-            executor.action_results(),
-        )
-        .expect("replay");
+        let orig_replay =
+            crate::replay_variables(executor.state(), executor.action_results()).expect("replay");
 
         let (nodes_snap, edges_snap, results_snap) =
             snapshot_state(executor.state(), executor.action_results());
         let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap);
 
-        let rehy_replay = crate::waymark_core::runner::replay_variables(
-            rehydrated.state(),
-            rehydrated.action_results(),
-        )
-        .expect("replay");
+        let rehy_replay = crate::replay_variables(rehydrated.state(), rehydrated.action_results())
+            .expect("replay");
         assert_eq!(orig_replay.variables, rehy_replay.variables);
         assert_eq!(
             rehy_replay.variables.get("doubled"),
diff --git a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs b/crates/lib/runner/src/expression_evaluator.rs
similarity index 98%
rename from crates/waymark/src/waymark_core/runner/expression_evaluator.rs
rename to crates/lib/runner/src/expression_evaluator.rs
index 96d908e2..dac989a9 100644
--- a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs
+++ b/crates/lib/runner/src/expression_evaluator.rs
@@ -5,15 +5,15 @@ use std::rc::Rc;
 use serde_json::Value;
 use uuid::Uuid;
 
-use crate::messages::ast as ir;
-use crate::observability::obs;
-use crate::waymark_core::runner::state::{
+use waymark_dag::{DAGEdge, EdgeType};
+use waymark_observability::obs;
+use waymark_proto::ast as ir;
+use waymark_runner_state::{
     ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue,
     FunctionCallValue, IndexValue, ListValue, LiteralValue, UnaryOpValue, VariableValue,
     literal_value,
+    value_visitor::{ValueExpr, ValueExprEvaluator},
 };
-use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator};
-use waymark_dag::{DAGEdge, EdgeType};
 
 use super::{RunnerExecutor, RunnerExecutorError};
 
@@ -674,15 +674,13 @@ mod tests {
     use uuid::Uuid;
 
     use super::*;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::ir_parser::IRParser;
-    use crate::waymark_core::runner::RunnerState;
-    use crate::waymark_core::runner::state::{
+    use waymark_dag::{DAG, DAGEdge};
+    use waymark_ir_parser::IRParser;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{
         ActionCallSpec, ActionResultValue, BinaryOpValue, FunctionCallValue, LiteralValue,
-        VariableValue,
+        RunnerState, VariableValue, value_visitor::ValueExpr,
     };
-    use crate::waymark_core::runner::value_visitor::ValueExpr;
-    use waymark_dag::{DAG, DAGEdge};
 
     fn parse_expr(source: &str) -> ir::Expr {
         IRParser::new("    ")
@@ -801,7 +799,7 @@ mod tests {
     #[test]
     fn test_evaluate_value_expr_happy_path() {
         let executor = executor_with_assignment("x", literal_int(3));
-        let expr = ValueExpr::BinaryOp(crate::waymark_core::runner::state::BinaryOpValue {
+        let expr = ValueExpr::BinaryOp(waymark_runner_state::BinaryOpValue {
             left: Box::new(ValueExpr::Variable(VariableValue {
                 name: "x".to_string(),
             })),
diff --git a/crates/waymark/src/waymark_core/runner/mod.rs b/crates/lib/runner/src/lib.rs
similarity index 53%
rename from crates/waymark/src/waymark_core/runner/mod.rs
rename to crates/lib/runner/src/lib.rs
index 4e7a491d..684a49a0 100644
--- a/crates/waymark/src/waymark_core/runner/mod.rs
+++ b/crates/lib/runner/src/lib.rs
@@ -4,16 +4,11 @@ pub mod executor;
 pub mod expression_evaluator;
 pub mod replay;
 pub(crate) mod retry;
-pub mod state;
-pub(crate) mod synthetic_exceptions;
-pub mod value_visitor;
+
+/// TODO: make `pub(crate)`
+pub mod synthetic_exceptions;
 
 pub use executor::{
     DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
 };
 pub use replay::{ReplayError, ReplayResult, replay_action_kwargs, replay_variables};
-pub use state::{
-    ActionCallSpec, ActionResultValue, ExecutionEdge, ExecutionNode, NodeStatus, RunnerState,
-    RunnerStateError, format_value,
-};
-pub use value_visitor::ValueExpr;
diff --git a/crates/waymark/src/waymark_core/runner/replay.rs b/crates/lib/runner/src/replay.rs
similarity index 98%
rename from crates/waymark/src/waymark_core/runner/replay.rs
rename to crates/lib/runner/src/replay.rs
index 246caace..ffb413a1 100644
--- a/crates/waymark/src/waymark_core/runner/replay.rs
+++ b/crates/lib/runner/src/replay.rs
@@ -7,14 +7,16 @@ use std::rc::Rc;
 use serde_json::Value;
 use uuid::Uuid;
 
-use crate::messages::ast as ir;
-use crate::waymark_core::runner::expression_evaluator::{
+use crate::expression_evaluator::{
     add_values, compare_values, int_value, is_exception_value, is_truthy, len_of_value, numeric_op,
     range_from_args, value_in,
 };
-use crate::waymark_core::runner::state::{ActionResultValue, FunctionCallValue, RunnerState};
-use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator};
 use waymark_dag::{EXCEPTION_SCOPE_VAR, EdgeType};
+use waymark_proto::ast as ir;
+use waymark_runner_state::{
+    ActionResultValue, FunctionCallValue, RunnerState,
+    value_visitor::{ValueExpr, ValueExprEvaluator},
+};
 
 /// Raised when replay cannot reconstruct variable values.
 #[derive(Debug, thiserror::Error)]
@@ -515,9 +517,8 @@ pub fn replay_action_kwargs(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::messages::ast as ir;
-    use crate::waymark_core::runner::state::{RunnerState, VariableValue};
-    use crate::waymark_core::runner::value_visitor::ValueExpr;
+    use waymark_proto::ast as ir;
+    use waymark_runner_state::{RunnerState, VariableValue, value_visitor::ValueExpr};
 
     fn action_plus_two_expr() -> ir::Expr {
         ir::Expr {
diff --git a/crates/waymark/src/waymark_core/runner/retry.rs b/crates/lib/runner/src/retry.rs
similarity index 99%
rename from crates/waymark/src/waymark_core/runner/retry.rs
rename to crates/lib/runner/src/retry.rs
index e6fb4d70..a24f7a2d 100644
--- a/crates/waymark/src/waymark_core/runner/retry.rs
+++ b/crates/lib/runner/src/retry.rs
@@ -1,6 +1,6 @@
 //! Retry/timeout policy helpers shared by runner components.
 
-use crate::messages::ast as ir;
+use waymark_proto::ast as ir;
 
 #[derive(Clone, Debug)]
 pub(crate) struct RetryDecision {
diff --git a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs b/crates/lib/runner/src/synthetic_exceptions.rs
similarity index 91%
rename from crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs
rename to crates/lib/runner/src/synthetic_exceptions.rs
index df89b71f..5bd2be0d 100644
--- a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs
+++ b/crates/lib/runner/src/synthetic_exceptions.rs
@@ -3,13 +3,13 @@
 use serde_json::Value;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) enum SyntheticExceptionType {
+pub enum SyntheticExceptionType {
     ExecutorResume,
     ActionTimeout,
 }
 
 impl SyntheticExceptionType {
-    pub(crate) fn as_type_str(self) -> &'static str {
+    pub fn as_type_str(self) -> &'static str {
         match self {
             Self::ExecutorResume => "ExecutorResume",
             Self::ActionTimeout => "ActionTimeout",
@@ -24,7 +24,7 @@ impl SyntheticExceptionType {
         }
     }
 
-    pub(crate) fn from_value(value: &Value) -> Option<Self> {
+    pub fn from_value(value: &Value) -> Option<Self> {
         let Value::Object(map) = value else {
             return None;
         };
@@ -34,7 +34,7 @@ impl SyntheticExceptionType {
     }
 }
 
-pub(crate) fn build_synthetic_exception_value(
+pub fn build_synthetic_exception_value(
     exception_type: SyntheticExceptionType,
     message: impl Into<String>,
     fields: Vec<(String, Value)>,
diff --git a/crates/lib/scheduler-backend/Cargo.toml b/crates/lib/scheduler-backend/Cargo.toml
new file mode 100644
index 00000000..6af1c2bb
--- /dev/null
+++ b/crates/lib/scheduler-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-scheduler-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-scheduler-core = { workspace = true }
diff --git a/crates/lib/scheduler-backend/src/lib.rs b/crates/lib/scheduler-backend/src/lib.rs
new file mode 100644
index 00000000..613cc4ac
--- /dev/null
+++ b/crates/lib/scheduler-backend/src/lib.rs
@@ -0,0 +1,29 @@
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, WorkflowSchedule};
+
+/// Backend capability for workflow schedule persistence.
+#[async_trait::async_trait]
+pub trait SchedulerBackend: Send + Sync {
+    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId>;
+    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule>;
+    async fn get_schedule_by_name(
+        &self,
+        workflow_name: &str,
+        schedule_name: &str,
+    ) -> BackendResult<Option<WorkflowSchedule>>;
+    async fn list_schedules(&self, limit: i64, offset: i64)
+    -> BackendResult<Vec<WorkflowSchedule>>;
+    async fn count_schedules(&self) -> BackendResult<i64>;
+    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool>;
+    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool>;
+    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>>;
+    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool>;
+    async fn mark_schedule_executed(
+        &self,
+        schedule_id: ScheduleId,
+        instance_id: Uuid,
+    ) -> BackendResult<()>;
+    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>;
+}
diff --git a/crates/lib/scheduler-core/Cargo.toml b/crates/lib/scheduler-core/Cargo.toml
new file mode 100644
index 00000000..9659e878
--- /dev/null
+++ b/crates/lib/scheduler-core/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "waymark-scheduler-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+uuid = { workspace = true, features = ["serde", "v4"] }
+chrono = { workspace = true, features = ["serde"] }
+serde = { workspace = true, features = ["derive"] }
+cron = { workspace = true }
+rand = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true, features = ["clock"] }
diff --git a/crates/lib/scheduler-core/src/lib.rs b/crates/lib/scheduler-core/src/lib.rs
new file mode 100644
index 00000000..02d2783b
--- /dev/null
+++ b/crates/lib/scheduler-core/src/lib.rs
@@ -0,0 +1,6 @@
+mod types;
+mod utils;
+
+pub use self::types::*;
+
+pub use self::utils::*;
diff --git a/crates/waymark/src/scheduler/types.rs b/crates/lib/scheduler-core/src/types.rs
similarity index 100%
rename from crates/waymark/src/scheduler/types.rs
rename to crates/lib/scheduler-core/src/types.rs
diff --git a/crates/waymark/src/scheduler/utils.rs b/crates/lib/scheduler-core/src/utils.rs
similarity index 100%
rename from crates/waymark/src/scheduler/utils.rs
rename to crates/lib/scheduler-core/src/utils.rs
diff --git a/crates/lib/test-support/Cargo.toml b/crates/lib/test-support/Cargo.toml
new file mode 100644
index 00000000..829d2395
--- /dev/null
+++ b/crates/lib/test-support/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "waymark-test-support"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sqlx = { workspace = true }
+waymark-integration-support = { workspace = true }
diff --git a/crates/waymark/src/test_support/mod.rs b/crates/lib/test-support/src/lib.rs
similarity index 100%
rename from crates/waymark/src/test_support/mod.rs
rename to crates/lib/test-support/src/lib.rs
diff --git a/crates/waymark/src/test_support/postgres.rs b/crates/lib/test-support/src/postgres.rs
similarity index 83%
rename from crates/waymark/src/test_support/postgres.rs
rename to crates/lib/test-support/src/postgres.rs
index 1fb9e50f..e76bf812 100644
--- a/crates/waymark/src/test_support/postgres.rs
+++ b/crates/lib/test-support/src/postgres.rs
@@ -2,7 +2,7 @@
 
 use sqlx::PgPool;
 
-use crate::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
 
 /// Ensure test Postgres is available and migrated, then return a pooled connection.
 pub async fn postgres_setup() -> PgPool {
diff --git a/crates/lib/webapp-backend/Cargo.toml b/crates/lib/webapp-backend/Cargo.toml
new file mode 100644
index 00000000..735810b3
--- /dev/null
+++ b/crates/lib/webapp-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-webapp-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-webapp-core = { workspace = true }
diff --git a/crates/lib/webapp-backend/src/lib.rs b/crates/lib/webapp-backend/src/lib.rs
new file mode 100644
index 00000000..bc8f365c
--- /dev/null
+++ b/crates/lib/webapp-backend/src/lib.rs
@@ -0,0 +1,54 @@
+use uuid::Uuid;
+use waymark_backends_core::BackendResult;
+use waymark_webapp_core::{
+    ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary,
+    ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus,
+};
+
+/// Backend capability for webapp-specific queries.
+#[async_trait::async_trait]
+pub trait WebappBackend: Send + Sync {
+    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64>;
+    async fn list_instances(
+        &self,
+        search: Option<&str>,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<InstanceSummary>>;
+    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail>;
+    async fn get_execution_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>>;
+    async fn get_workflow_graph(
+        &self,
+        instance_id: Uuid,
+    ) -> BackendResult<Option<ExecutionGraphView>>;
+    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>>;
+    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>>;
+    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>>;
+    async fn count_schedules(&self) -> BackendResult<i64>;
+    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>>;
+    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail>;
+    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64>;
+    async fn list_schedule_invocations(
+        &self,
+        schedule_id: Uuid,
+        limit: i64,
+        offset: i64,
+    ) -> BackendResult<Vec<ScheduleInvocationSummary>>;
+    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool>;
+    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>>;
+    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>>;
+    async fn get_worker_action_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<Vec<WorkerActionRow>>;
+    async fn get_worker_aggregate_stats(
+        &self,
+        window_minutes: i64,
+    ) -> BackendResult<WorkerAggregateStats>;
+    async fn worker_status_table_exists(&self) -> bool;
+    async fn schedules_table_exists(&self) -> bool;
+    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>>;
+}
diff --git a/crates/lib/webapp-core/Cargo.toml b/crates/lib/webapp-core/Cargo.toml
new file mode 100644
index 00000000..2b51dc6d
--- /dev/null
+++ b/crates/lib/webapp-core/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-webapp-core"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+uuid = { workspace = true, features = ["serde"] }
+chrono = { workspace = true, features = ["serde"] }
+serde = { workspace = true, features = ["derive"] }
diff --git a/crates/lib/webapp-core/src/lib.rs b/crates/lib/webapp-core/src/lib.rs
new file mode 100644
index 00000000..61a4a453
--- /dev/null
+++ b/crates/lib/webapp-core/src/lib.rs
@@ -0,0 +1,247 @@
+//! Shared types for the webapp.
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+/// Instance status.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum InstanceStatus {
+    Queued,
+    Running,
+    Completed,
+    Failed,
+}
+
+impl std::fmt::Display for InstanceStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Queued => write!(f, "queued"),
+            Self::Running => write!(f, "running"),
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+        }
+    }
+}
+
+/// Summary of a workflow instance for listing.
+#[derive(Debug, Clone, Serialize)]
+pub struct InstanceSummary {
+    pub id: Uuid,
+    pub entry_node: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+    pub workflow_name: Option<String>,
+    pub input_preview: String,
+}
+
+/// Full details of a workflow instance.
+#[derive(Debug, Clone, Serialize)]
+pub struct InstanceDetail {
+    pub id: Uuid,
+    pub entry_node: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+    pub workflow_name: Option<String>,
+    pub input_payload: String,
+    pub result_payload: String,
+    pub error_payload: Option<String>,
+}
+
+/// Node in the execution graph for display.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionNodeView {
+    pub id: String,
+    pub node_type: String,
+    pub label: String,
+    pub status: String,
+    pub action_name: Option<String>,
+    pub module_name: Option<String>,
+}
+
+/// Edge in the execution graph for display.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionEdgeView {
+    pub source: String,
+    pub target: String,
+    pub edge_type: String,
+}
+
+/// Execution graph data for rendering.
+#[derive(Debug, Clone, Serialize)]
+pub struct ExecutionGraphView {
+    pub nodes: Vec<ExecutionNodeView>,
+    pub edges: Vec<ExecutionEdgeView>,
+}
+
+/// Timeline entry for an action execution.
+#[derive(Debug, Clone, Serialize)]
+pub struct TimelineEntry {
+    pub action_id: String,
+    pub action_name: String,
+    pub module_name: Option<String>,
+    pub status: String,
+    pub attempt_number: i32,
+    pub dispatched_at: Option<String>,
+    pub completed_at: Option<String>,
+    pub duration_ms: Option<i64>,
+    pub request_preview: String,
+    pub response_preview: String,
+    pub error: Option<String>,
+}
+
+/// Action log entry with full details.
+#[derive(Debug, Clone, Serialize)]
+pub struct ActionLogEntry {
+    pub action_id: String,
+    pub action_name: String,
+    pub module_name: Option<String>,
+    pub status: String,
+    pub attempt_number: i32,
+    pub dispatched_at: Option<String>,
+    pub completed_at: Option<String>,
+    pub duration_ms: Option<i64>,
+    pub request: String,
+    pub response: String,
+    pub error: Option<String>,
+}
+
+/// Response for the workflow run data API.
+#[derive(Debug, Serialize)]
+pub struct WorkflowRunDataResponse {
+    pub nodes: Vec<ExecutionNodeView>,
+    pub timeline: Vec<TimelineEntry>,
+    pub page: i64,
+    pub per_page: i64,
+    pub total: i64,
+    pub has_more: bool,
+}
+
+/// Response for action logs API.
+#[derive(Debug, Serialize)]
+pub struct ActionLogsResponse {
+    pub logs: Vec<ActionLogEntry>,
+}
+
+/// Filter values response.
+#[derive(Debug, Serialize)]
+pub struct FilterValuesResponse {
+    pub values: Vec<String>,
+}
+
+/// Health check response.
+#[derive(Debug, Serialize)]
+pub struct HealthResponse {
+    pub status: &'static str,
+    pub service: &'static str,
+}
+
+/// Export format for a workflow instance.
+#[derive(Debug, Serialize)]
+pub struct WorkflowInstanceExport {
+    pub export_version: &'static str,
+    pub exported_at: String,
+    pub instance: InstanceExportInfo,
+    pub nodes: Vec<ExecutionNodeView>,
+    pub timeline: Vec<TimelineEntry>,
+}
+
+/// Full worker status for webapp display.
+#[derive(Debug, Clone)]
+pub struct WorkerStatus {
+    pub pool_id: Uuid,
+    pub active_workers: i32,
+    pub throughput_per_min: f64,
+    pub actions_per_sec: f64,
+    pub total_completed: i64,
+    pub last_action_at: Option<DateTime<Utc>>,
+    pub updated_at: DateTime<Utc>,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub dispatch_queue_size: Option<i64>,
+    pub total_in_flight: Option<i64>,
+    pub median_instance_duration_secs: Option<f64>,
+    pub active_instance_count: i32,
+    pub total_instances_completed: i64,
+    pub instances_per_sec: f64,
+    pub instances_per_min: f64,
+    pub time_series: Option<Vec<u8>>,
+}
+
+/// Worker action stats row for display.
+#[derive(Debug, Clone)]
+pub struct WorkerActionRow {
+    pub pool_id: String,
+    pub active_workers: i64,
+    pub actions_per_sec: String,
+    pub throughput_per_min: i64,
+    pub total_completed: i64,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub last_action_at: Option<String>,
+    pub updated_at: String,
+}
+
+/// Aggregate worker stats for overview cards.
+#[derive(Debug, Clone)]
+pub struct WorkerAggregateStats {
+    pub active_worker_count: i64,
+    pub actions_per_sec: String,
+    pub total_in_flight: i64,
+    pub total_queue_depth: i64,
+}
+
+/// Instance info for export.
+#[derive(Debug, Serialize)]
+pub struct InstanceExportInfo {
+    pub id: String,
+    pub status: String,
+    pub created_at: String,
+    pub input_payload: String,
+    pub result_payload: String,
+}
+
+/// Schedule summary for listing.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleSummary {
+    pub id: String,
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: String,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub status: String,
+    pub next_run_at: Option<String>,
+    pub last_run_at: Option<String>,
+    pub created_at: String,
+}
+
+/// Full schedule details.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleDetail {
+    pub id: String,
+    pub workflow_name: String,
+    pub schedule_name: String,
+    pub schedule_type: String,
+    pub cron_expression: Option<String>,
+    pub interval_seconds: Option<i64>,
+    pub jitter_seconds: i64,
+    pub status: String,
+    pub next_run_at: Option<String>,
+    pub last_run_at: Option<String>,
+    pub last_instance_id: Option<String>,
+    pub created_at: String,
+    pub updated_at: String,
+    pub priority: i32,
+    pub allow_duplicate: bool,
+    pub input_payload: Option<String>,
+}
+
+/// Invocation summary row for schedule detail pages.
+#[derive(Debug, Clone, Serialize)]
+pub struct ScheduleInvocationSummary {
+    pub id: Uuid,
+    pub created_at: DateTime<Utc>,
+    pub status: InstanceStatus,
+}
diff --git a/crates/lib/worker-status-backend/Cargo.toml b/crates/lib/worker-status-backend/Cargo.toml
new file mode 100644
index 00000000..ff50466a
--- /dev/null
+++ b/crates/lib/worker-status-backend/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "waymark-worker-status-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+chrono = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/lib/worker-status-backend/src/lib.rs b/crates/lib/worker-status-backend/src/lib.rs
new file mode 100644
index 00000000..bc23eb4e
--- /dev/null
+++ b/crates/lib/worker-status-backend/src/lib.rs
@@ -0,0 +1,32 @@
+//! Worker status backend.
+
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+/// Worker status update for persistence.
+#[derive(Clone, Debug)]
+pub struct WorkerStatusUpdate {
+    pub pool_id: Uuid,
+    pub throughput_per_min: f64,
+    pub total_completed: i64,
+    pub last_action_at: Option<chrono::DateTime<chrono::Utc>>,
+    pub median_dequeue_ms: Option<i64>,
+    pub median_handling_ms: Option<i64>,
+    pub dispatch_queue_size: i64,
+    pub total_in_flight: i64,
+    pub active_workers: i32,
+    pub actions_per_sec: f64,
+    pub median_instance_duration_secs: Option<f64>,
+    pub active_instance_count: i32,
+    pub total_instances_completed: i64,
+    pub instances_per_sec: f64,
+    pub instances_per_min: f64,
+    pub time_series: Option<Vec<u8>>,
+}
+
+/// Backend capability for recording worker status metrics.
+#[async_trait::async_trait]
+pub trait WorkerStatusBackend: Send + Sync {
+    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>;
+}
diff --git a/crates/lib/workflow-registry-backend/Cargo.toml b/crates/lib/workflow-registry-backend/Cargo.toml
new file mode 100644
index 00000000..2dc85a4d
--- /dev/null
+++ b/crates/lib/workflow-registry-backend/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "waymark-workflow-registry-backend"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+async-trait = { workspace = true }
+uuid = { workspace = true }
+waymark-backends-core = { workspace = true }
diff --git a/crates/lib/workflow-registry-backend/src/lib.rs b/crates/lib/workflow-registry-backend/src/lib.rs
new file mode 100644
index 00000000..041c8482
--- /dev/null
+++ b/crates/lib/workflow-registry-backend/src/lib.rs
@@ -0,0 +1,35 @@
+use uuid::Uuid;
+
+pub use waymark_backends_core::{BackendError, BackendResult};
+
+/// Registration payload for storing workflow DAG metadata.
+#[derive(Clone, Debug)]
+pub struct WorkflowRegistration {
+    pub workflow_name: String,
+    pub workflow_version: String,
+    pub ir_hash: String,
+    pub program_proto: Vec<u8>,
+    pub concurrent: bool,
+}
+
+#[derive(Clone, Debug)]
+/// Stored workflow version metadata and IR payload.
+pub struct WorkflowVersion {
+    pub id: Uuid,
+    pub workflow_name: String,
+    pub workflow_version: String,
+    pub ir_hash: String,
+    pub program_proto: Vec<u8>,
+    pub concurrent: bool,
+}
+
+/// Backend capability for registering workflow DAGs.
+#[async_trait::async_trait]
+pub trait WorkflowRegistryBackend: Send + Sync {
+    async fn upsert_workflow_version(
+        &self,
+        registration: &WorkflowRegistration,
+    ) -> BackendResult<Uuid>;
+
+    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>>;
+}
diff --git a/crates/waymark/Cargo.toml b/crates/waymark/Cargo.toml
index 998ba4f9..91b817bf 100644
--- a/crates/waymark/Cargo.toml
+++ b/crates/waymark/Cargo.toml
@@ -17,8 +17,25 @@ name = "smoke"
 path = "src/bin/smoke.rs"
 
 [dependencies]
-waymark-proto = { workspace = true, features = ["serde", "client", "server"] }
+waymark-core-backend = { workspace = true }
 waymark-dag = { workspace = true }
+waymark-ir-parser = { workspace = true }
+waymark-observability = { workspace = true }
+waymark-proto = { workspace = true, features = ["serde", "client", "server"] }
+waymark-runner = { workspace = true }
+waymark-runner-state = { workspace = true }
+waymark-webapp-backend = { workspace = true }
+waymark-webapp-core = { workspace = true }
+waymark-garbage-collector-backend = { workspace = true }
+waymark-scheduler-backend = { workspace = true }
+waymark-scheduler-core = { workspace = true }
+waymark-backends-core = { workspace = true }
+waymark-integration-support = { workspace = true }
+waymark-backend-postgres = { workspace = true }
+waymark-backend-postgres-migrations = { workspace = true }
+waymark-workflow-registry-backend = { workspace = true }
+waymark-worker-status-backend = { workspace = true }
+waymark-backend-memory = { workspace = true }
 
 anyhow = "1"
 axum = "0.8"
@@ -39,9 +56,10 @@ sqlx = { version = "0.8", default-features = false, features = ["runtime-tokio-r
 tera = "1"
 uuid = { version = "1", features = ["serde", "v4"] }
 dotenvy = "0.15"
-thiserror = "1"
+thiserror = { workspace = true }
 tokio = { version = "1", features = ["full"] }
 tokio-stream = { version = "0.1", features = ["net"] }
+tokio-util = "0.7"
 tonic = { version = "0.11", features = ["transport"] }
 tonic-health = "0.11"
 tracing = "0.1"
@@ -49,7 +67,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 tracing-chrome = "0.7"
 metrics = "0.24"
 regex = "1"
-waymark-observability-macros = { path = "../observability-macros" }
 console-subscriber = { version = "0.5", optional = true }
 
 [features]
@@ -57,6 +74,10 @@ trace = []
 observability = ["trace", "dep:console-subscriber"]
 
 [dev-dependencies]
+waymark-backend-fault-injection = { workspace = true }
+waymark-backend-memory = { workspace = true }
+waymark-test-support = { workspace = true }
+
 serial_test = "2"
 tower = { version = "0.5", features = ["util"] }
 http-body-util = "0.1"
diff --git a/crates/waymark/src/backends/base.rs b/crates/waymark/src/backends/base.rs
deleted file mode 100644
index 92c17a3f..00000000
--- a/crates/waymark/src/backends/base.rs
+++ /dev/null
@@ -1,366 +0,0 @@
-//! Backend interfaces for persisting runner state and action results.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Deserializer, Serialize};
-use serde_json::Value;
-use tonic::async_trait;
-use uuid::Uuid;
-
-use crate::scheduler::{CreateScheduleParams, ScheduleId, WorkflowSchedule};
-use crate::waymark_core::runner::state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState};
-use crate::webapp::{
-    ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary,
-    ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus,
-};
-use waymark_dag::DAG;
-
-#[derive(Debug, thiserror::Error)]
-pub enum BackendError {
-    #[error("{0}")]
-    Message(String),
-    #[error(transparent)]
-    Sqlx(#[from] sqlx::Error),
-    #[error(transparent)]
-    Serialization(#[from] serde_json::Error),
-}
-
-pub type BackendResult<T> = Result<T, BackendError>;
-
-fn default_instance_id() -> Uuid {
-    Uuid::new_v4()
-}
-
-fn default_action_results() -> HashMap<Uuid, Value> {
-    HashMap::new()
-}
-
-fn deserialize_action_results<'de, D>(deserializer: D) -> Result<HashMap<Uuid, Value>, D::Error>
-where
-    D: Deserializer<'de>,
-{
-    let value = Option::<HashMap<Uuid, Value>>::deserialize(deserializer)?;
-    Ok(value.unwrap_or_default())
-}
-
-// The models that we use for our backends are similar to the ones that we
-// have specified in our database/Postgres backend, but not 1:1. It's better for
-// us to internally convert within the given backend
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Queued instance payload for the run loop.
-pub struct QueuedInstance {
-    pub workflow_version_id: Uuid,
-    #[serde(default)]
-    pub schedule_id: Option<Uuid>,
-    #[serde(skip, default)]
-    pub dag: Option<Arc<DAG>>,
-    pub entry_node: Uuid,
-    pub state: Option<RunnerState>,
-    #[serde(
-        default = "default_action_results",
-        deserialize_with = "deserialize_action_results"
-    )]
-    pub action_results: HashMap<Uuid, Value>,
-    #[serde(default = "default_instance_id")]
-    pub instance_id: Uuid,
-    #[serde(default)]
-    pub scheduled_at: Option<DateTime<Utc>>,
-}
-
-#[derive(Clone, Debug)]
-/// Result payload for queued instance polling.
-pub struct QueuedInstanceBatch {
-    pub instances: Vec<QueuedInstance>,
-}
-
-#[derive(Clone, Debug)]
-/// Lock claim settings for owned instances.
-pub struct LockClaim {
-    pub lock_uuid: Uuid,
-    pub lock_expires_at: DateTime<Utc>,
-}
-
-#[derive(Clone, Debug)]
-/// Current lock status for an instance.
-pub struct InstanceLockStatus {
-    pub instance_id: Uuid,
-    pub lock_uuid: Option<Uuid>,
-    pub lock_expires_at: Option<DateTime<Utc>>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Completed instance payload with result or exception.
-pub struct InstanceDone {
-    pub executor_id: Uuid,
-    pub entry_node: Uuid,
-    pub result: Option<Value>,
-    pub error: Option<Value>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Batch payload representing an updated execution graph snapshot.
-///
-/// This intentionally stores only runtime nodes and edges (no DAG template or
-/// derived caches) so persistence stays lightweight.
-pub struct GraphUpdate {
-    pub instance_id: Uuid,
-    pub nodes: HashMap<Uuid, ExecutionNode>,
-    pub edges: HashSet<ExecutionEdge>,
-}
-
-impl GraphUpdate {
-    pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self {
-        Self {
-            instance_id,
-            nodes: state.nodes.clone(),
-            edges: state.edges.clone(),
-        }
-    }
-
-    pub fn next_scheduled_at(&self) -> DateTime<Utc> {
-        let mut next: Option<DateTime<Utc>> = None;
-        for node in self.nodes.values() {
-            if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) {
-                continue;
-            }
-            if let Some(scheduled_at) = node.scheduled_at {
-                next = Some(match next {
-                    Some(existing) => existing.min(scheduled_at),
-                    None => scheduled_at,
-                });
-            }
-        }
-        next.unwrap_or_else(Utc::now)
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-/// Batch payload representing a finished action attempt (success or failure).
-pub struct ActionDone {
-    pub execution_id: Uuid,
-    pub attempt: i32,
-    pub status: ActionAttemptStatus,
-    pub started_at: Option<DateTime<Utc>>,
-    pub completed_at: Option<DateTime<Utc>>,
-    pub duration_ms: Option<i64>,
-    pub result: Value,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ActionAttemptStatus {
-    Completed,
-    Failed,
-    TimedOut,
-}
-
-impl std::fmt::Display for ActionAttemptStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Completed => write!(f, "completed"),
-            Self::Failed => write!(f, "failed"),
-            Self::TimedOut => write!(f, "timed_out"),
-        }
-    }
-}
-
-/// Worker status update for persistence.
-#[derive(Clone, Debug)]
-pub struct WorkerStatusUpdate {
-    pub pool_id: Uuid,
-    pub throughput_per_min: f64,
-    pub total_completed: i64,
-    pub last_action_at: Option<chrono::DateTime<chrono::Utc>>,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub dispatch_queue_size: i64,
-    pub total_in_flight: i64,
-    pub active_workers: i32,
-    pub actions_per_sec: f64,
-    pub median_instance_duration_secs: Option<f64>,
-    pub active_instance_count: i32,
-    pub total_instances_completed: i64,
-    pub instances_per_sec: f64,
-    pub instances_per_min: f64,
-    pub time_series: Option<Vec<u8>>,
-}
-
-/// Backend capability for recording worker status metrics.
-#[async_trait]
-pub trait WorkerStatusBackend: Send + Sync {
-    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>;
-}
-
-/// Abstract persistence backend for runner state.
-#[async_trait]
-pub trait CoreBackend: Send + Sync {
-    fn clone_box(&self) -> Box<dyn CoreBackend>;
-
-    /// Persist updated execution graphs.
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>>;
-
-    /// Persist finished action attempts (success or failure).
-    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>;
-
-    /// Return up to size queued instances without blocking.
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch>;
-
-    /// Refresh lock expiry for owned instances.
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>>;
-
-    /// Release instance locks when evicting from memory.
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()>;
-
-    /// Persist completed workflow instances.
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>;
-
-    /// Insert queued instances for run-loop consumption.
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>;
-}
-
-/// Registration payload for storing workflow DAG metadata.
-#[derive(Clone, Debug)]
-pub struct WorkflowRegistration {
-    pub workflow_name: String,
-    pub workflow_version: String,
-    pub ir_hash: String,
-    pub program_proto: Vec<u8>,
-    pub concurrent: bool,
-}
-
-#[derive(Clone, Debug)]
-/// Stored workflow version metadata and IR payload.
-pub struct WorkflowVersion {
-    pub id: Uuid,
-    pub workflow_name: String,
-    pub workflow_version: String,
-    pub ir_hash: String,
-    pub program_proto: Vec<u8>,
-    pub concurrent: bool,
-}
-
-/// Backend capability for registering workflow DAGs.
-#[async_trait]
-pub trait WorkflowRegistryBackend: Send + Sync {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid>;
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>>;
-}
-
-/// Backend capability for workflow schedule persistence.
-#[async_trait]
-pub trait SchedulerBackend: Send + Sync {
-    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId>;
-    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule>;
-    async fn get_schedule_by_name(
-        &self,
-        workflow_name: &str,
-        schedule_name: &str,
-    ) -> BackendResult<Option<WorkflowSchedule>>;
-    async fn list_schedules(&self, limit: i64, offset: i64)
-    -> BackendResult<Vec<WorkflowSchedule>>;
-    async fn count_schedules(&self) -> BackendResult<i64>;
-    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool>;
-    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool>;
-    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>>;
-    async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult<bool>;
-    async fn mark_schedule_executed(
-        &self,
-        schedule_id: ScheduleId,
-        instance_id: Uuid,
-    ) -> BackendResult<()>;
-    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>;
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-/// Summary of a garbage collection sweep.
-pub struct GarbageCollectionResult {
-    pub deleted_instances: usize,
-    pub deleted_actions: usize,
-}
-
-/// Backend capability for deleting old finished workflow data.
-#[async_trait]
-pub trait GarbageCollectorBackend: Send + Sync {
-    async fn collect_done_instances(
-        &self,
-        older_than: DateTime<Utc>,
-        limit: usize,
-    ) -> BackendResult<GarbageCollectionResult>;
-}
-
-/// Backend capability for webapp-specific queries.
-#[async_trait]
-pub trait WebappBackend: Send + Sync {
-    async fn count_instances(&self, search: Option<&str>) -> BackendResult<i64>;
-    async fn list_instances(
-        &self,
-        search: Option<&str>,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<InstanceSummary>>;
-    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail>;
-    async fn get_execution_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>>;
-    async fn get_workflow_graph(
-        &self,
-        instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>>;
-    async fn get_action_results(&self, instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>>;
-    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>>;
-    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>>;
-    async fn count_schedules(&self) -> BackendResult<i64>;
-    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>>;
-    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail>;
-    async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult<i64>;
-    async fn list_schedule_invocations(
-        &self,
-        schedule_id: Uuid,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<ScheduleInvocationSummary>>;
-    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool>;
-    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>>;
-    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>>;
-    async fn get_worker_action_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<Vec<WorkerActionRow>>;
-    async fn get_worker_aggregate_stats(
-        &self,
-        window_minutes: i64,
-    ) -> BackendResult<WorkerAggregateStats>;
-    async fn worker_status_table_exists(&self) -> bool;
-    async fn schedules_table_exists(&self) -> bool;
-    async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult<Vec<WorkerStatus>>;
-}
-
-impl Clone for Box<dyn CoreBackend> {
-    fn clone(&self) -> Self {
-        self.clone_box()
-    }
-}
diff --git a/crates/waymark/src/backends/memory.rs b/crates/waymark/src/backends/memory.rs
deleted file mode 100644
index c49bc6e0..00000000
--- a/crates/waymark/src/backends/memory.rs
+++ /dev/null
@@ -1,814 +0,0 @@
-//! In-memory backend that prints persistence operations.
-
-use std::collections::{HashMap, VecDeque};
-use std::sync::{Arc, Mutex};
-
-use chrono::{DateTime, Utc};
-use uuid::Uuid;
-
-use super::base::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult,
-    GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
-    QueuedInstance, QueuedInstanceBatch, SchedulerBackend, WebappBackend, WorkerStatusBackend,
-    WorkerStatusUpdate, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
-use crate::scheduler::compute_next_run;
-use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule};
-use crate::webapp::{
-    ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail,
-    ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow,
-    WorkerAggregateStats, WorkerStatus,
-};
-use tonic::async_trait;
-
-type WorkflowVersionKey = (String, String);
-type WorkflowVersionValue = (Uuid, WorkflowRegistration);
-type WorkflowVersionStore = HashMap<WorkflowVersionKey, WorkflowVersionValue>;
-type InstanceLockStore = HashMap<Uuid, (Option<Uuid>, Option<DateTime<Utc>>)>;
-
-/// Backend that stores updates in memory for tests or local runs.
-#[derive(Clone)]
-pub struct MemoryBackend {
-    instance_queue: Option<Arc<Mutex<VecDeque<QueuedInstance>>>>,
-    graph_updates: Arc<Mutex<Vec<GraphUpdate>>>,
-    actions_done: Arc<Mutex<Vec<ActionDone>>>,
-    instances_done: Arc<Mutex<Vec<InstanceDone>>>,
-    worker_status_updates: Arc<Mutex<Vec<WorkerStatusUpdate>>>,
-    workflow_versions: Arc<Mutex<WorkflowVersionStore>>,
-    schedules: Arc<Mutex<HashMap<ScheduleId, WorkflowSchedule>>>,
-    instance_locks: Arc<Mutex<InstanceLockStore>>,
-}
-
-impl Default for MemoryBackend {
-    fn default() -> Self {
-        Self {
-            instance_queue: None,
-            graph_updates: Arc::new(Mutex::new(Vec::new())),
-            actions_done: Arc::new(Mutex::new(Vec::new())),
-            instances_done: Arc::new(Mutex::new(Vec::new())),
-            worker_status_updates: Arc::new(Mutex::new(Vec::new())),
-            workflow_versions: Arc::new(Mutex::new(HashMap::new())),
-            schedules: Arc::new(Mutex::new(HashMap::new())),
-            instance_locks: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-}
-
-impl MemoryBackend {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn with_queue(queue: Arc<Mutex<VecDeque<QueuedInstance>>>) -> Self {
-        Self {
-            instance_queue: Some(queue),
-            ..Self::default()
-        }
-    }
-
-    pub fn instance_queue(&self) -> Option<Arc<Mutex<VecDeque<QueuedInstance>>>> {
-        self.instance_queue.clone()
-    }
-
-    pub fn graph_updates(&self) -> Vec<GraphUpdate> {
-        self.graph_updates
-            .lock()
-            .expect("graph updates poisoned")
-            .clone()
-    }
-
-    pub fn actions_done(&self) -> Vec<ActionDone> {
-        self.actions_done
-            .lock()
-            .expect("actions done poisoned")
-            .clone()
-    }
-
-    pub fn instances_done(&self) -> Vec<InstanceDone> {
-        self.instances_done
-            .lock()
-            .expect("instances done poisoned")
-            .clone()
-    }
-
-    pub fn worker_status_updates(&self) -> Vec<WorkerStatusUpdate> {
-        self.worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned")
-            .clone()
-    }
-}
-
-#[async_trait]
-impl CoreBackend for MemoryBackend {
-    fn clone_box(&self) -> Box<dyn CoreBackend> {
-        Box::new(self.clone())
-    }
-
-    async fn save_graphs(
-        &self,
-        claim: LockClaim,
-        graphs: &[GraphUpdate],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        let mut stored = self.graph_updates.lock().expect("graph updates poisoned");
-        stored.extend(graphs.iter().cloned());
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        let mut locks = Vec::with_capacity(graphs.len());
-        for graph in graphs {
-            if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id)
-                && *lock_uuid == claim.lock_uuid
-                && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at)
-            {
-                *lock_expires_at = Some(claim.lock_expires_at);
-            }
-            let (lock_uuid, lock_expires_at) = guard
-                .get(&graph.instance_id)
-                .cloned()
-                .unwrap_or((None, None));
-            locks.push(InstanceLockStatus {
-                instance_id: graph.instance_id,
-                lock_uuid,
-                lock_expires_at,
-            });
-        }
-        Ok(locks)
-    }
-
-    async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> {
-        let mut stored = self.actions_done.lock().expect("actions done poisoned");
-        stored.extend(actions.iter().cloned());
-        Ok(())
-    }
-
-    async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-        let mut stored = self.instances_done.lock().expect("instances done poisoned");
-        stored.extend(instances.iter().cloned());
-        if !instances.is_empty() {
-            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
-            for instance in instances {
-                locks.remove(&instance.executor_id);
-            }
-        }
-        Ok(())
-    }
-
-    async fn get_queued_instances(
-        &self,
-        size: usize,
-        claim: LockClaim,
-    ) -> BackendResult<QueuedInstanceBatch> {
-        if size == 0 {
-            return Ok(QueuedInstanceBatch {
-                instances: Vec::new(),
-            });
-        }
-        let queue = match &self.instance_queue {
-            Some(queue) => queue,
-            None => {
-                return Ok(QueuedInstanceBatch {
-                    instances: Vec::new(),
-                });
-            }
-        };
-        let mut guard = queue.lock().expect("instance queue poisoned");
-        let now = Utc::now();
-        let mut instances = Vec::new();
-        while instances.len() < size {
-            let Some(instance) = guard.front() else {
-                break;
-            };
-            if let Some(scheduled_at) = instance.scheduled_at
-                && scheduled_at > now
-            {
-                break;
-            }
-            let instance = guard.pop_front().expect("instance queue empty");
-            instances.push(instance);
-        }
-        if !instances.is_empty() {
-            let mut locks = self.instance_locks.lock().expect("instance locks poisoned");
-            for instance in &instances {
-                locks.insert(
-                    instance.instance_id,
-                    (Some(claim.lock_uuid), Some(claim.lock_expires_at)),
-                );
-            }
-        }
-        Ok(QueuedInstanceBatch { instances })
-    }
-
-    async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> {
-        if instances.is_empty() {
-            return Ok(());
-        }
-        let queue = self.instance_queue.as_ref().ok_or_else(|| {
-            BackendError::Message("memory backend missing instance queue".to_string())
-        })?;
-        let mut guard = queue.lock().expect("instance queue poisoned");
-        for instance in instances {
-            guard.push_back(instance.clone());
-        }
-        Ok(())
-    }
-
-    async fn refresh_instance_locks(
-        &self,
-        claim: LockClaim,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<Vec<InstanceLockStatus>> {
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        let mut locks = Vec::new();
-        for instance_id in instance_ids {
-            let entry = guard
-                .entry(*instance_id)
-                .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at)));
-            if entry.0 == Some(claim.lock_uuid) {
-                entry.1 = Some(claim.lock_expires_at);
-            }
-            locks.push(InstanceLockStatus {
-                instance_id: *instance_id,
-                lock_uuid: entry.0,
-                lock_expires_at: entry.1,
-            });
-        }
-        Ok(locks)
-    }
-
-    async fn release_instance_locks(
-        &self,
-        lock_uuid: Uuid,
-        instance_ids: &[Uuid],
-    ) -> BackendResult<()> {
-        let mut guard = self.instance_locks.lock().expect("instance locks poisoned");
-        for instance_id in instance_ids {
-            if let Some((current_lock, _)) = guard.get(instance_id)
-                && *current_lock == Some(lock_uuid)
-            {
-                guard.remove(instance_id);
-            }
-        }
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl WorkerStatusBackend for MemoryBackend {
-    async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> {
-        let mut stored = self
-            .worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned");
-        stored.push(status.clone());
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl WorkflowRegistryBackend for MemoryBackend {
-    async fn upsert_workflow_version(
-        &self,
-        registration: &WorkflowRegistration,
-    ) -> BackendResult<Uuid> {
-        let mut guard = self
-            .workflow_versions
-            .lock()
-            .expect("workflow versions poisoned");
-        let key = (
-            registration.workflow_name.clone(),
-            registration.workflow_version.clone(),
-        );
-        if let Some((id, existing)) = guard.get(&key) {
-            if existing.ir_hash != registration.ir_hash {
-                return Err(BackendError::Message(format!(
-                    "workflow version already exists with different IR hash: {}@{}",
-                    registration.workflow_name, registration.workflow_version
-                )));
-            }
-            return Ok(*id);
-        }
-
-        let id = Uuid::new_v4();
-        guard.insert(key, (id, registration.clone()));
-        Ok(id)
-    }
-
-    async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
-        if ids.is_empty() {
-            return Ok(Vec::new());
-        }
-        let guard = self
-            .workflow_versions
-            .lock()
-            .expect("workflow versions poisoned");
-        let mut versions = Vec::new();
-        for (id, registration) in guard.values() {
-            if ids.contains(id) {
-                versions.push(WorkflowVersion {
-                    id: *id,
-                    workflow_name: registration.workflow_name.clone(),
-                    workflow_version: registration.workflow_version.clone(),
-                    ir_hash: registration.ir_hash.clone(),
-                    program_proto: registration.program_proto.clone(),
-                    concurrent: registration.concurrent,
-                });
-            }
-        }
-        Ok(versions)
-    }
-}
-
-#[async_trait]
-impl SchedulerBackend for MemoryBackend {
-    async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult<ScheduleId> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let existing_schedule = guard.iter().find_map(|(id, schedule)| {
-            if schedule.workflow_name == params.workflow_name
-                && schedule.schedule_name == params.schedule_name
-            {
-                Some((*id, schedule.clone()))
-            } else {
-                None
-            }
-        });
-        let schedule_id = existing_schedule
-            .as_ref()
-            .map(|(id, _)| *id)
-            .unwrap_or_else(ScheduleId::new);
-        let now = Utc::now();
-        let next_run_at = match existing_schedule
-            .as_ref()
-            .and_then(|(_, schedule)| schedule.next_run_at)
-        {
-            Some(next_run_at) => Some(next_run_at),
-            None => Some(
-                compute_next_run(
-                    params.schedule_type,
-                    params.cron_expression.as_deref(),
-                    params.interval_seconds,
-                    params.jitter_seconds,
-                    None,
-                )
-                .map_err(BackendError::Message)?,
-            ),
-        };
-        let schedule = WorkflowSchedule {
-            id: schedule_id.0,
-            workflow_name: params.workflow_name.clone(),
-            schedule_name: params.schedule_name.clone(),
-            schedule_type: params.schedule_type.as_str().to_string(),
-            cron_expression: params.cron_expression.clone(),
-            interval_seconds: params.interval_seconds,
-            jitter_seconds: params.jitter_seconds,
-            input_payload: params.input_payload.clone(),
-            status: "active".to_string(),
-            next_run_at,
-            last_run_at: existing_schedule
-                .as_ref()
-                .and_then(|(_, schedule)| schedule.last_run_at),
-            last_instance_id: existing_schedule
-                .as_ref()
-                .and_then(|(_, schedule)| schedule.last_instance_id),
-            created_at: existing_schedule
-                .as_ref()
-                .map(|(_, schedule)| schedule.created_at)
-                .unwrap_or(now),
-            updated_at: now,
-            priority: params.priority,
-            allow_duplicate: params.allow_duplicate,
-        };
-        guard.insert(schedule_id, schedule);
-        Ok(schedule_id)
-    }
-
-    async fn get_schedule(&self, id: ScheduleId) -> BackendResult<WorkflowSchedule> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        guard
-            .get(&id)
-            .cloned()
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}")))
-    }
-
-    async fn get_schedule_by_name(
-        &self,
-        workflow_name: &str,
-        schedule_name: &str,
-    ) -> BackendResult<Option<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .find(|schedule| {
-                schedule.workflow_name == workflow_name
-                    && schedule.schedule_name == schedule_name
-                    && schedule.status != "deleted"
-            })
-            .cloned())
-    }
-
-    async fn list_schedules(
-        &self,
-        limit: i64,
-        offset: i64,
-    ) -> BackendResult<Vec<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .cloned()
-            .collect();
-        schedules.sort_by(|a, b| {
-            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
-        });
-        let start = offset.max(0) as usize;
-        let end = start.saturating_add(limit.max(0) as usize);
-        Ok(schedules
-            .into_iter()
-            .skip(start)
-            .take(end - start)
-            .collect())
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .count() as i64)
-    }
-
-    async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult<bool> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        if let Some(schedule) = guard.get_mut(&id) {
-            schedule.status = status.to_string();
-            schedule.updated_at = Utc::now();
-            Ok(true)
-        } else {
-            Ok(false)
-        }
-    }
-
-    async fn delete_schedule(&self, id: ScheduleId) -> BackendResult<bool> {
-        SchedulerBackend::update_schedule_status(self, id, "deleted").await
-    }
-
-    async fn find_due_schedules(&self, limit: i32) -> BackendResult<Vec<WorkflowSchedule>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let now = Utc::now();
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| {
-                schedule.status == "active"
-                    && schedule
-                        .next_run_at
-                        .map(|next| next <= now)
-                        .unwrap_or(false)
-            })
-            .cloned()
-            .collect();
-        schedules.sort_by_key(|schedule| schedule.next_run_at);
-        Ok(schedules.into_iter().take(limit as usize).collect())
-    }
-
-    async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult<bool> {
-        Ok(false)
-    }
-
-    async fn mark_schedule_executed(
-        &self,
-        schedule_id: ScheduleId,
-        instance_id: Uuid,
-    ) -> BackendResult<()> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .get_mut(&schedule_id)
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-        schedule.last_run_at = Some(Utc::now());
-        schedule.last_instance_id = Some(instance_id);
-        schedule.next_run_at = Some(next_run_at);
-        schedule.updated_at = Utc::now();
-        Ok(())
-    }
-
-    async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .get_mut(&schedule_id)
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-        let schedule_type = ScheduleType::parse(&schedule.schedule_type)
-            .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?;
-        let next_run_at = compute_next_run(
-            schedule_type,
-            schedule.cron_expression.as_deref(),
-            schedule.interval_seconds,
-            schedule.jitter_seconds,
-            Some(Utc::now()),
-        )
-        .map_err(BackendError::Message)?;
-        schedule.next_run_at = Some(next_run_at);
-        schedule.updated_at = Utc::now();
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl GarbageCollectorBackend for MemoryBackend {
-    async fn collect_done_instances(
-        &self,
-        _older_than: DateTime<Utc>,
-        _limit: usize,
-    ) -> BackendResult<GarbageCollectionResult> {
-        Ok(GarbageCollectionResult::default())
-    }
-}
-
-#[async_trait]
-impl WebappBackend for MemoryBackend {
-    async fn count_instances(&self, _search: Option<&str>) -> BackendResult<i64> {
-        Ok(0)
-    }
-
-    async fn list_instances(
-        &self,
-        _search: Option<&str>,
-        _limit: i64,
-        _offset: i64,
-    ) -> BackendResult<Vec<InstanceSummary>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_instance(&self, instance_id: Uuid) -> BackendResult<InstanceDetail> {
-        Err(BackendError::Message(format!(
-            "instance not found: {instance_id}"
-        )))
-    }
-
-    async fn get_execution_graph(
-        &self,
-        _instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        Ok(None)
-    }
-
-    async fn get_workflow_graph(
-        &self,
-        _instance_id: Uuid,
-    ) -> BackendResult<Option<ExecutionGraphView>> {
-        Ok(None)
-    }
-
-    async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult<Vec<TimelineEntry>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_distinct_workflows(&self) -> BackendResult<Vec<String>> {
-        Ok(Vec::new())
-    }
-
-    async fn get_distinct_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec![
-            InstanceStatus::Queued.to_string(),
-            InstanceStatus::Running.to_string(),
-            InstanceStatus::Completed.to_string(),
-            InstanceStatus::Failed.to_string(),
-        ])
-    }
-
-    async fn count_schedules(&self) -> BackendResult<i64> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        Ok(guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .count() as i64)
-    }
-
-    async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult<Vec<ScheduleSummary>> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let mut schedules: Vec<_> = guard
-            .values()
-            .filter(|schedule| schedule.status != "deleted")
-            .cloned()
-            .collect();
-        schedules.sort_by(|a, b| {
-            (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name))
-        });
-
-        let start = offset.max(0) as usize;
-        let page_limit = limit.max(0) as usize;
-        Ok(schedules
-            .into_iter()
-            .skip(start)
-            .take(page_limit)
-            .map(|schedule| ScheduleSummary {
-                id: schedule.id.to_string(),
-                workflow_name: schedule.workflow_name,
-                schedule_name: schedule.schedule_name,
-                schedule_type: schedule.schedule_type,
-                cron_expression: schedule.cron_expression,
-                interval_seconds: schedule.interval_seconds,
-                status: schedule.status,
-                next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
-                last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
-                created_at: schedule.created_at.to_rfc3339(),
-            })
-            .collect())
-    }
-
-    async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult<ScheduleDetail> {
-        let guard = self.schedules.lock().expect("schedules poisoned");
-        let schedule = guard
-            .values()
-            .find(|schedule| schedule.id == schedule_id)
-            .cloned()
-            .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?;
-
-        let input_payload = schedule.input_payload.as_ref().and_then(|bytes| {
-            rmp_serde::from_slice::<serde_json::Value>(bytes)
-                .ok()
-                .and_then(|value| serde_json::to_string_pretty(&value).ok())
-        });
-
-        Ok(ScheduleDetail {
-            id: schedule.id.to_string(),
-            workflow_name: schedule.workflow_name,
-            schedule_name: schedule.schedule_name,
-            schedule_type: schedule.schedule_type,
-            cron_expression: schedule.cron_expression,
-            interval_seconds: schedule.interval_seconds,
-            jitter_seconds: schedule.jitter_seconds,
-            status: schedule.status,
-            next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()),
-            last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()),
-            last_instance_id: schedule.last_instance_id.map(|id| id.to_string()),
-            created_at: schedule.created_at.to_rfc3339(),
-            updated_at: schedule.updated_at.to_rfc3339(),
-            priority: schedule.priority,
-            allow_duplicate: schedule.allow_duplicate,
-            input_payload,
-        })
-    }
-
-    async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult<i64> {
-        Ok(0)
-    }
-
-    async fn list_schedule_invocations(
-        &self,
-        _schedule_id: Uuid,
-        _limit: i64,
-        _offset: i64,
-    ) -> BackendResult<Vec<ScheduleInvocationSummary>> {
-        Ok(Vec::new())
-    }
-
-    async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult<bool> {
-        let mut guard = self.schedules.lock().expect("schedules poisoned");
-        let Some(schedule) = guard
-            .values_mut()
-            .find(|schedule| schedule.id == schedule_id)
-        else {
-            return Ok(false);
-        };
-        schedule.status = status.to_string();
-        schedule.updated_at = Utc::now();
-        Ok(true)
-    }
-
-    async fn get_distinct_schedule_statuses(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["active".to_string(), "paused".to_string()])
-    }
-
-    async fn get_distinct_schedule_types(&self) -> BackendResult<Vec<String>> {
-        Ok(vec!["cron".to_string(), "interval".to_string()])
-    }
-
-    async fn get_worker_action_stats(
-        &self,
-        _window_minutes: i64,
-    ) -> BackendResult<Vec<WorkerActionRow>> {
-        let statuses = latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        );
-
-        Ok(statuses
-            .into_iter()
-            .map(|status| WorkerActionRow {
-                pool_id: status.pool_id.to_string(),
-                active_workers: status.active_workers as i64,
-                actions_per_sec: format!("{:.1}", status.actions_per_sec),
-                throughput_per_min: status.throughput_per_min as i64,
-                total_completed: status.total_completed,
-                median_dequeue_ms: status.median_dequeue_ms,
-                median_handling_ms: status.median_handling_ms,
-                last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()),
-                updated_at: status.updated_at.to_rfc3339(),
-            })
-            .collect())
-    }
-
-    async fn get_worker_aggregate_stats(
-        &self,
-        _window_minutes: i64,
-    ) -> BackendResult<WorkerAggregateStats> {
-        let statuses = latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        );
-
-        let active_worker_count = statuses
-            .iter()
-            .map(|status| status.active_workers as i64)
-            .sum();
-        let total_in_flight = statuses
-            .iter()
-            .filter_map(|status| status.total_in_flight)
-            .sum();
-        let total_queue_depth = statuses
-            .iter()
-            .filter_map(|status| status.dispatch_queue_size)
-            .sum();
-        let actions_per_sec = statuses
-            .iter()
-            .map(|status| status.actions_per_sec)
-            .sum::<f64>();
-
-        Ok(WorkerAggregateStats {
-            active_worker_count,
-            actions_per_sec: format!("{:.1}", actions_per_sec),
-            total_in_flight,
-            total_queue_depth,
-        })
-    }
-
-    async fn worker_status_table_exists(&self) -> bool {
-        !self
-            .worker_status_updates
-            .lock()
-            .expect("worker status updates poisoned")
-            .is_empty()
-    }
-
-    async fn schedules_table_exists(&self) -> bool {
-        !self
-            .schedules
-            .lock()
-            .expect("schedules poisoned")
-            .is_empty()
-    }
-
-    async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult<Vec<WorkerStatus>> {
-        Ok(latest_worker_statuses(
-            &self
-                .worker_status_updates
-                .lock()
-                .expect("worker status updates poisoned"),
-        ))
-    }
-}
-
-fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec<WorkerStatus> {
-    let mut by_pool: HashMap<Uuid, WorkerStatusUpdate> = HashMap::new();
-    for update in updates {
-        by_pool.insert(update.pool_id, update.clone());
-    }
-
-    let now = Utc::now();
-    let mut statuses: Vec<_> = by_pool
-        .into_values()
-        .map(|status| WorkerStatus {
-            pool_id: status.pool_id,
-            active_workers: status.active_workers,
-            throughput_per_min: status.throughput_per_min,
-            actions_per_sec: status.actions_per_sec,
-            total_completed: status.total_completed,
-            last_action_at: status.last_action_at,
-            updated_at: now,
-            median_dequeue_ms: status.median_dequeue_ms,
-            median_handling_ms: status.median_handling_ms,
-            dispatch_queue_size: Some(status.dispatch_queue_size),
-            total_in_flight: Some(status.total_in_flight),
-            median_instance_duration_secs: status.median_instance_duration_secs,
-            active_instance_count: status.active_instance_count,
-            total_instances_completed: status.total_instances_completed,
-            instances_per_sec: status.instances_per_sec,
-            instances_per_min: status.instances_per_min,
-            time_series: status.time_series,
-        })
-        .collect();
-
-    statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec));
-    statuses
-}
diff --git a/crates/waymark/src/backends/mod.rs b/crates/waymark/src/backends/mod.rs
deleted file mode 100644
index 7fbd84ad..00000000
--- a/crates/waymark/src/backends/mod.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-//! Backend implementations for runner persistence.
-
-mod base;
-mod memory;
-mod postgres;
-
-pub use base::{
-    ActionAttemptStatus, ActionDone, BackendError, BackendResult, CoreBackend,
-    GarbageCollectionResult, GarbageCollectorBackend, GraphUpdate, InstanceDone,
-    InstanceLockStatus, LockClaim, QueuedInstance, QueuedInstanceBatch, SchedulerBackend,
-    WebappBackend, WorkerStatusBackend, WorkerStatusUpdate, WorkflowRegistration,
-    WorkflowRegistryBackend, WorkflowVersion,
-};
-pub use memory::MemoryBackend;
-pub use postgres::PostgresBackend;
diff --git a/crates/waymark/src/bin/integration_test.rs b/crates/waymark/src/bin/integration_test.rs
index 35f86fdf..e7faa3ae 100644
--- a/crates/waymark/src/bin/integration_test.rs
+++ b/crates/waymark/src/bin/integration_test.rs
@@ -19,17 +19,16 @@ use serde_json::Value;
 use sqlx::Row;
 use uuid::Uuid;
 
-use waymark::backends::{
-    CoreBackend, MemoryBackend, PostgresBackend, QueuedInstance, WorkflowRegistration,
-    WorkflowRegistryBackend,
-};
-use waymark::db;
-use waymark::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
 use waymark::messages::ast as ir;
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{PythonWorkerConfig, RemoteWorkerPool};
+use waymark_backend_memory::MemoryBackend;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::{CoreBackend, QueuedInstance};
 use waymark_dag::{DAG, convert_to_dag};
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres};
+use waymark_runner_state::RunnerState;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend};
 
 #[derive(Parser, Debug)]
 #[command(name = "integration_test")]
@@ -452,7 +451,7 @@ async fn connect_postgres_backend() -> Result<PostgresBackend> {
     let pool = connect_pool(&dsn)
         .await
         .with_context(|| format!("connect postgres backend: {dsn}"))?;
-    db::run_migrations(&pool)
+    waymark_backend_postgres_migrations::run(&pool)
         .await
         .context("run postgres migrations for integration runner")?;
     Ok(PostgresBackend::new(pool))
diff --git a/crates/waymark/src/bin/soak-harness.rs b/crates/waymark/src/bin/soak-harness.rs
index 3503fe94..2bccbb4e 100644
--- a/crates/waymark/src/bin/soak-harness.rs
+++ b/crates/waymark/src/bin/soak-harness.rs
@@ -29,14 +29,13 @@ use tokio::process::{Child, Command};
 use tracing::{error, info, warn};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use uuid::Uuid;
-use waymark::backends::{
-    PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
-use waymark::db;
 use waymark::messages::ast as ir;
-use waymark::waymark_core::ir_parser::parse_program;
-use waymark::waymark_core::runner::RunnerState;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::QueuedInstance;
 use waymark_dag::{DAG, convert_to_dag};
+use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
 const DEFAULT_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark";
 const DEFAULT_WORKFLOW_NAME: &str = "waymark_soak_timeout_mix_v1";
@@ -287,7 +286,7 @@ async fn main() -> Result<()> {
     }
 
     let pool = wait_for_database(&args.dsn, DB_READY_TIMEOUT).await?;
-    db::run_migrations(&pool)
+    waymark_backend_postgres_migrations::run(&pool)
         .await
         .context("run migrations before soak")?;
 
diff --git a/crates/waymark/src/bin/start-workers.rs b/crates/waymark/src/bin/start-workers.rs
index 20773cb5..8aa1c493 100644
--- a/crates/waymark/src/bin/start-workers.rs
+++ b/crates/waymark/src/bin/start-workers.rs
@@ -39,21 +39,16 @@ use anyhow::Result;
 use prost::Message;
 use sqlx::{PgPool, Row};
 use tokio::signal;
-use tokio::sync::watch;
 use tracing::{error, info, warn};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
 use uuid::Uuid;
-use waymark::backends::PostgresBackend;
 use waymark::config::WorkerConfig;
-use waymark::db;
 use waymark::messages::ast as ir;
 use waymark::scheduler::{DagResolver, WorkflowDag};
 use waymark::waymark_core::runloop::{RunLoopSupervisorConfig, runloop_supervisor};
-use waymark::{
-    PythonWorkerConfig, RemoteWorkerPool, WebappServer, spawn_garbage_collector, spawn_scheduler,
-    spawn_status_reporter,
-};
+use waymark::{PythonWorkerConfig, RemoteWorkerPool, WebappServer, spawn_status_reporter};
+use waymark_backend_postgres::PostgresBackend;
 use waymark_dag::convert_to_dag;
 
 #[tokio::main]
@@ -86,9 +81,12 @@ async fn main() -> Result<()> {
         "starting worker infrastructure"
     );
 
+    // Wire shutdown coordination.
+    let shutdown_token = tokio_util::sync::CancellationToken::new();
+
     // Initialize the database and backend.
     let pool = PgPool::connect(&config.database_url).await?;
-    db::run_migrations(&pool).await?;
+    waymark_backend_postgres_migrations::run(&pool).await?;
     let backend = PostgresBackend::new(pool);
 
     // Start the worker pool (bridge + python workers).
@@ -117,15 +115,29 @@ async fn main() -> Result<()> {
 
     // Start the scheduler loop.
     let dag_resolver = build_dag_resolver(backend.pool().clone());
-    let (scheduler_handle, scheduler_shutdown) =
-        spawn_scheduler(backend.clone(), config.scheduler.clone(), dag_resolver);
+    let scheduler_handle = {
+        let shutdown = shutdown_token.clone().cancelled_owned();
+        let task = waymark::SchedulerTask {
+            backend: backend.clone(),
+            config: config.scheduler.clone(),
+            dag_resolver,
+        };
+        tokio::spawn(task.run(shutdown))
+    };
     info!(
         poll_interval_ms = config.scheduler.poll_interval.as_millis(),
         batch_size = config.scheduler.batch_size,
         "scheduler task started"
     );
-    let (garbage_collector_handle, garbage_collector_shutdown) =
-        spawn_garbage_collector(backend.clone(), config.garbage_collector.clone());
+
+    let garbage_collector_handle = {
+        let shutdown = shutdown_token.clone().cancelled_owned();
+        let task = waymark::GarbageCollectorTask {
+            backend: backend.clone(),
+            config: config.garbage_collector.clone(),
+        };
+        tokio::spawn(task.run(shutdown))
+    };
     info!(
         interval_ms = config.garbage_collector.interval.as_millis(),
         batch_size = config.garbage_collector.batch_size,
@@ -133,8 +145,6 @@ async fn main() -> Result<()> {
         "garbage collector task started"
     );
 
-    // Wire shutdown coordination.
-    let (shutdown_tx, shutdown_rx) = watch::channel(false);
     let active_instance_gauge = Arc::new(AtomicUsize::new(0));
 
     // Start status reporting.
@@ -145,28 +155,24 @@ async fn main() -> Result<()> {
         remote_pool.clone(),
         active_instance_gauge.clone(),
         config.profile_interval,
-        shutdown_rx.clone(),
+        shutdown_token.clone().cancelled_owned(),
     );
     let expired_lock_reclaimer_handle = spawn_expired_lock_reclaimer(
         backend.clone(),
         config.expired_lock_reclaimer_interval,
         config.expired_lock_reclaimer_batch_size,
-        shutdown_rx.clone(),
+        shutdown_token.clone().cancelled_owned(),
     );
 
     let shutdown_handle = tokio::spawn({
-        let shutdown_tx = shutdown_tx.clone();
-        let scheduler_shutdown = scheduler_shutdown.clone();
-        let garbage_collector_shutdown = garbage_collector_shutdown.clone();
+        let shutdown_token = shutdown_token.clone();
         async move {
             if let Err(err) = wait_for_shutdown().await {
                 error!(error = %err, "shutdown signal listener failed");
                 return;
             }
             info!("shutdown signal received");
-            let _ = shutdown_tx.send(true);
-            let _ = scheduler_shutdown.send(true);
-            let _ = garbage_collector_shutdown.send(true);
+            shutdown_token.cancel();
         }
     });
 
@@ -188,7 +194,7 @@ async fn main() -> Result<()> {
             skip_sleep: false,
             active_instance_gauge: Some(active_instance_gauge),
         },
-        shutdown_rx,
+        shutdown_token,
     )
     .await;
 
@@ -248,7 +254,7 @@ fn spawn_expired_lock_reclaimer(
     backend: PostgresBackend,
     interval: Duration,
     batch_size: usize,
-    mut shutdown_rx: watch::Receiver<bool>,
+    shutdown: tokio_util::sync::WaitForCancellationFutureOwned,
 ) -> tokio::task::JoinHandle<()> {
     tokio::spawn(async move {
         let mut ticker = tokio::time::interval(interval);
@@ -257,6 +263,7 @@ fn spawn_expired_lock_reclaimer(
             interval_ms = interval.as_millis(),
             batch_size, "expired lock reclaimer started"
         );
+        let mut shutdown = std::pin::pin!(shutdown);
         loop {
             tokio::select! {
                 _ = ticker.tick() => {
@@ -282,11 +289,9 @@ fn spawn_expired_lock_reclaimer(
                         );
                     }
                 }
-                _ = shutdown_rx.changed() => {
-                    if *shutdown_rx.borrow() {
-                        info!("expired lock reclaimer shutting down");
-                        break;
-                    }
+                _ = &mut shutdown => {
+                    info!("expired lock reclaimer shutting down");
+                    break;
                 }
             }
         }
diff --git a/crates/waymark/src/bin/waymark-bridge.rs b/crates/waymark/src/bin/waymark-bridge.rs
index 1bc6ac18..878e6a6e 100644
--- a/crates/waymark/src/bin/waymark-bridge.rs
+++ b/crates/waymark/src/bin/waymark-bridge.rs
@@ -29,18 +29,22 @@ use tracing::{debug, info};
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use uuid::Uuid;
 
-use waymark::backends::{
-    ActionDone, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone,
-    InstanceLockStatus, LockClaim, PostgresBackend, QueuedInstance, QueuedInstanceBatch,
-    SchedulerBackend, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
-};
-use waymark::db;
 use waymark::messages::{self, ast as ir, proto};
-use waymark::scheduler::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType};
 use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use waymark::waymark_core::runner::RunnerState;
 use waymark::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
+use waymark_backend_postgres::PostgresBackend;
+use waymark_backends_core::{BackendError, BackendResult};
+use waymark_core_backend::{
+    ActionDone, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim,
+    QueuedInstance, QueuedInstanceBatch,
+};
 use waymark_dag::convert_to_dag;
+use waymark_runner_state::RunnerState;
+use waymark_scheduler_backend::SchedulerBackend as _;
+use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType};
+use waymark_workflow_registry_backend::{
+    WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion,
+};
 
 const DEFAULT_GRPC_ADDR: &str = "127.0.0.1:24117";
 
@@ -52,7 +56,7 @@ struct WorkflowStore {
 impl WorkflowStore {
     async fn connect(dsn: &str) -> Result<Self> {
         let pool = PgPool::connect(dsn).await?;
-        db::run_migrations(&pool).await?;
+        waymark_backend_postgres_migrations::run(&pool).await?;
         let backend = PostgresBackend::new(pool);
         Ok(Self { backend })
     }
diff --git a/crates/waymark/src/db.rs b/crates/waymark/src/db.rs
deleted file mode 100644
index f89f0e04..00000000
--- a/crates/waymark/src/db.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-//! Database helpers shared across services.
-
-use sqlx::PgPool;
-
-use crate::backends::{BackendError, BackendResult};
-
-/// Run the embedded SQLx migrations.
-pub async fn run_migrations(pool: &PgPool) -> BackendResult<()> {
-    sqlx::migrate!()
-        .run(pool)
-        .await
-        .map_err(|err| BackendError::Message(err.to_string()))?;
-    Ok(())
-}
diff --git a/crates/waymark/src/garbage_collector/mod.rs b/crates/waymark/src/garbage_collector/mod.rs
index fe29bbc0..72ce00a1 100644
--- a/crates/waymark/src/garbage_collector/mod.rs
+++ b/crates/waymark/src/garbage_collector/mod.rs
@@ -2,4 +2,4 @@
 
 mod task;
 
-pub use task::{GarbageCollectorConfig, GarbageCollectorTask, spawn_garbage_collector};
+pub use task::{GarbageCollectorConfig, GarbageCollectorTask};
diff --git a/crates/waymark/src/garbage_collector/task.rs b/crates/waymark/src/garbage_collector/task.rs
index 39eaf803..e1f673ef 100644
--- a/crates/waymark/src/garbage_collector/task.rs
+++ b/crates/waymark/src/garbage_collector/task.rs
@@ -5,10 +5,8 @@
 use std::time::Duration;
 
 use chrono::Utc;
-use tokio::sync::watch;
 use tracing::{debug, error, info};
-
-use crate::backends::{GarbageCollectionResult, GarbageCollectorBackend};
+use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend};
 
 /// Configuration for the garbage collector task.
 #[derive(Debug, Clone)]
@@ -33,29 +31,16 @@ impl Default for GarbageCollectorConfig {
 
 /// Background garbage collector task.
 pub struct GarbageCollectorTask<B> {
-    backend: B,
-    config: GarbageCollectorConfig,
-    shutdown_rx: watch::Receiver<bool>,
+    pub backend: B,
+    pub config: GarbageCollectorConfig,
 }
 
 impl<B> GarbageCollectorTask<B>
 where
     B: GarbageCollectorBackend + Clone + Send + Sync + 'static,
 {
-    pub fn new(
-        backend: B,
-        config: GarbageCollectorConfig,
-        shutdown_rx: watch::Receiver<bool>,
-    ) -> Self {
-        Self {
-            backend,
-            config,
-            shutdown_rx,
-        }
-    }
-
     /// Run the garbage collector loop.
-    pub async fn run(mut self) {
+    pub async fn run(self, shutdown: tokio_util::sync::WaitForCancellationFutureOwned) {
         info!(
             interval_ms = self.config.interval.as_millis(),
             batch_size = self.config.batch_size,
@@ -63,13 +48,13 @@ where
             "garbage collector task started"
         );
 
+        let mut shutdown = std::pin::pin!(shutdown);
+
         loop {
             tokio::select! {
-                _ = self.shutdown_rx.changed() => {
-                    if *self.shutdown_rx.borrow() {
-                        info!("garbage collector task shutting down");
-                        break;
-                    }
+                _ = &mut shutdown => {
+                    info!("garbage collector task shutting down");
+                    break;
                 }
                 _ = tokio::time::sleep(self.config.interval) => {
                     if let Err(err) = self.collect_until_drained().await {
@@ -126,20 +111,6 @@ where
     }
 }
 
-/// Convenience function to spawn a garbage collector task.
-pub fn spawn_garbage_collector<B>(
-    backend: B,
-    config: GarbageCollectorConfig,
-) -> (tokio::task::JoinHandle<()>, watch::Sender<bool>)
-where
-    B: GarbageCollectorBackend + Clone + Send + Sync + 'static,
-{
-    let (shutdown_tx, shutdown_rx) = watch::channel(false);
-    let task = GarbageCollectorTask::new(backend, config, shutdown_rx);
-    let handle = tokio::spawn(task.run());
-    (handle, shutdown_tx)
-}
-
 #[cfg(test)]
 mod tests {
     use std::collections::VecDeque;
@@ -148,9 +119,9 @@ mod tests {
 
     use chrono::{Duration as ChronoDuration, Utc};
     use tonic::async_trait;
+    use waymark_backends_core::BackendResult;
 
     use super::*;
-    use crate::backends::{BackendResult, GarbageCollectorBackend};
 
     #[derive(Clone)]
     struct StubGarbageCollectorBackend {
@@ -199,16 +170,14 @@ mod tests {
             observed_limits: Arc::new(Mutex::new(Vec::new())),
             observed_cutoffs: Arc::new(Mutex::new(Vec::new())),
         };
-        let (_shutdown_tx, shutdown_rx) = watch::channel(false);
-        let task = GarbageCollectorTask::new(
-            backend.clone(),
-            GarbageCollectorConfig {
+        let task = GarbageCollectorTask {
+            backend: backend.clone(),
+            config: GarbageCollectorConfig {
                 interval: Duration::from_secs(60),
                 batch_size: 2,
                 retention: Duration::from_secs(24 * 60 * 60),
             },
-            shutdown_rx,
-        );
+        };
 
         task.collect_until_drained()
             .await
@@ -229,16 +198,14 @@ mod tests {
             observed_limits: Arc::new(Mutex::new(Vec::new())),
             observed_cutoffs: Arc::new(Mutex::new(Vec::new())),
         };
-        let (_shutdown_tx, shutdown_rx) = watch::channel(false);
-        let task = GarbageCollectorTask::new(
-            backend.clone(),
-            GarbageCollectorConfig {
+        let task = GarbageCollectorTask {
+            backend: backend.clone(),
+            config: GarbageCollectorConfig {
                 interval: Duration::from_secs(60),
                 batch_size: 3,
                 retention: Duration::from_secs(24 * 60 * 60),
             },
-            shutdown_rx,
-        );
+        };
 
         let before = Utc::now();
         task.collect_until_drained()
diff --git a/crates/waymark/src/lib.rs b/crates/waymark/src/lib.rs
index b9193e66..568c1ba0 100644
--- a/crates/waymark/src/lib.rs
+++ b/crates/waymark/src/lib.rs
@@ -1,32 +1,22 @@
 //! Waymark - worker pool infrastructure plus the core IR/runtime port.
 
-pub mod backends;
 pub mod config;
-pub mod db;
 pub mod garbage_collector;
-pub mod integration_support;
 pub mod messages;
 pub mod observability;
 pub mod pool_status;
 pub mod scheduler;
 pub mod server_worker;
-#[cfg(test)]
-pub mod test_support;
 pub mod waymark_core;
 pub mod webapp;
 pub mod workers;
 
 // Worker infrastructure (preserved from the legacy Rust core).
-pub use garbage_collector::{
-    GarbageCollectorConfig, GarbageCollectorTask, spawn_garbage_collector,
-};
+pub use garbage_collector::{GarbageCollectorConfig, GarbageCollectorTask};
 pub use messages::{MessageError, ast as ir_ast, proto, workflow_argument_value_to_json};
 pub use observability::obs;
 pub use pool_status::{PoolTimeSeries, TimeSeriesEntry, TimeSeriesJsonEntry};
-pub use scheduler::{
-    CreateScheduleParams, ScheduleId, ScheduleType, SchedulerConfig, SchedulerTask,
-    WorkflowSchedule, spawn_scheduler,
-};
+pub use scheduler::{SchedulerConfig, SchedulerTask};
 pub use server_worker::{WorkerBridgeChannels, WorkerBridgeServer};
 pub use webapp::{WebappConfig, WebappServer};
 pub use workers::{
diff --git a/crates/waymark/src/observability.rs b/crates/waymark/src/observability.rs
index dbb8a7af..49c1700f 100644
--- a/crates/waymark/src/observability.rs
+++ b/crates/waymark/src/observability.rs
@@ -1,6 +1,6 @@
 //! Observability helpers for optional tracing instrumentation.
 
-pub use waymark_observability_macros::obs;
+pub use waymark_observability::obs;
 
 #[cfg(feature = "trace")]
 use std::sync::OnceLock;
diff --git a/crates/waymark/src/scheduler/mod.rs b/crates/waymark/src/scheduler/mod.rs
index 4c6e8c08..d14adfc4 100644
--- a/crates/waymark/src/scheduler/mod.rs
+++ b/crates/waymark/src/scheduler/mod.rs
@@ -6,9 +6,5 @@
 //! - Cron and interval utilities
 
 mod task;
-mod types;
-mod utils;
 
-pub use task::{DagResolver, SchedulerConfig, SchedulerTask, WorkflowDag, spawn_scheduler};
-pub use types::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType, WorkflowSchedule};
-pub use utils::{apply_jitter, compute_next_run, next_cron_run, next_interval_run, validate_cron};
+pub use task::{DagResolver, SchedulerConfig, SchedulerTask, WorkflowDag};
diff --git a/crates/waymark/src/scheduler/task.rs b/crates/waymark/src/scheduler/task.rs
index 9ad0da0b..283745cc 100644
--- a/crates/waymark/src/scheduler/task.rs
+++ b/crates/waymark/src/scheduler/task.rs
@@ -7,12 +7,11 @@ use std::sync::Arc;
 use std::time::Duration;
 
 use serde_json::Value;
-use tokio::sync::watch;
 use tracing::{debug, error, info};
 use uuid::Uuid;
+use waymark_core_backend::QueuedInstance;
+use waymark_scheduler_core::{ScheduleId, WorkflowSchedule};
 
-use super::types::{ScheduleId, WorkflowSchedule};
-use crate::backends::{CoreBackend, QueuedInstance, SchedulerBackend};
 use crate::messages;
 use crate::messages::ast as ir;
 use waymark_dag::DAG;
@@ -45,48 +44,33 @@ impl Default for SchedulerConfig {
 
 /// Background scheduler task.
 pub struct SchedulerTask<B> {
-    backend: B,
-    config: SchedulerConfig,
-    shutdown_rx: watch::Receiver<bool>,
+    pub backend: B,
+    pub config: SchedulerConfig,
     /// Function to get the DAG for a workflow.
     /// This should look up the workflow definition and return its DAG.
-    dag_resolver: DagResolver,
+    pub dag_resolver: DagResolver,
 }
 
 impl<B> SchedulerTask<B>
 where
-    B: CoreBackend + SchedulerBackend + Clone + Send + Sync + 'static,
+    B: waymark_core_backend::CoreBackend + waymark_scheduler_backend::SchedulerBackend,
+    B: Clone + Send + Sync + 'static,
 {
-    /// Create a new scheduler task.
-    pub fn new(
-        backend: B,
-        config: SchedulerConfig,
-        shutdown_rx: watch::Receiver<bool>,
-        dag_resolver: DagResolver,
-    ) -> Self {
-        Self {
-            backend,
-            config,
-            shutdown_rx,
-            dag_resolver,
-        }
-    }
-
     /// Run the scheduler loop.
-    pub async fn run(mut self) {
+    pub async fn run(self, shutdown: tokio_util::sync::WaitForCancellationFutureOwned) {
         info!(
             poll_interval_ms = self.config.poll_interval.as_millis(),
             batch_size = self.config.batch_size,
             "scheduler task started"
         );
 
+        let mut shutdown = std::pin::pin!(shutdown);
+
         loop {
             tokio::select! {
-                _ = self.shutdown_rx.changed() => {
-                    if *self.shutdown_rx.borrow() {
-                        info!("scheduler task shutting down");
-                        break;
-                    }
+                _ = &mut shutdown => {
+                    info!("scheduler task shutting down");
+                    break;
                 }
                 _ = tokio::time::sleep(self.config.poll_interval) => {
                     if let Err(e) = self.poll_and_fire().await {
@@ -170,12 +154,8 @@ where
             .as_ref()
             .ok_or_else(|| "DAG has no entry node".to_string())?;
 
-        let mut state = crate::waymark_core::runner::RunnerState::new(
-            Some(Arc::clone(&dag)),
-            None,
-            None,
-            false,
-        );
+        let mut state =
+            waymark_runner_state::RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
         if let Some(input_payload) = schedule.input_payload.as_deref() {
             let inputs = messages::workflow_arguments_to_json(input_payload)
                 .ok_or_else(|| "failed to decode schedule input payload".to_string())?;
@@ -287,21 +267,6 @@ fn literal_from_json_value(value: &Value) -> ir::Expr {
     }
 }
 
-/// Convenience function to spawn a scheduler task.
-pub fn spawn_scheduler<B>(
-    backend: B,
-    config: SchedulerConfig,
-    dag_resolver: DagResolver,
-) -> (tokio::task::JoinHandle<()>, watch::Sender<bool>)
-where
-    B: CoreBackend + SchedulerBackend + Clone + Send + Sync + 'static,
-{
-    let (shutdown_tx, shutdown_rx) = watch::channel(false);
-    let task = SchedulerTask::new(backend, config, shutdown_rx, dag_resolver);
-    let handle = tokio::spawn(task.run());
-    (handle, shutdown_tx)
-}
-
 #[cfg(test)]
 mod tests {
     use std::collections::VecDeque;
@@ -310,14 +275,16 @@ mod tests {
     use chrono::{Duration as ChronoDuration, Utc};
     use prost::Message;
     use serde_json::Value;
+    use waymark_backend_memory::MemoryBackend;
+    use waymark_core_backend::{CoreBackend, LockClaim};
+    use waymark_scheduler_backend::SchedulerBackend;
+    use waymark_scheduler_core::{CreateScheduleParams, ScheduleType};
 
     use super::*;
-    use crate::backends::{CoreBackend, LockClaim, MemoryBackend, SchedulerBackend};
     use crate::messages::proto;
-    use crate::scheduler::{CreateScheduleParams, ScheduleType};
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::RunnerExecutor;
     use waymark_dag::convert_to_dag;
+    use waymark_ir_parser::parse_program;
+    use waymark_runner::RunnerExecutor;
 
     fn workflow_args_payload(key: &str, value: i64) -> Vec<u8> {
         proto::WorkflowArguments {
@@ -339,7 +306,6 @@ mod tests {
     async fn scheduler_fire_schedule_applies_input_payload_to_state() {
         let queue = Arc::new(Mutex::new(VecDeque::new()));
         let backend = MemoryBackend::with_queue(queue);
-        let (_shutdown_tx, shutdown_rx) = watch::channel(false);
 
         let source = r#"
 fn main(input: [number], output: [result]):
@@ -362,12 +328,11 @@ fn main(input: [number], output: [result]):
             }
         });
 
-        let scheduler = SchedulerTask::new(
-            backend.clone(),
-            SchedulerConfig::default(),
-            shutdown_rx,
+        let scheduler = SchedulerTask {
+            backend: backend.clone(),
+            config: SchedulerConfig::default(),
             dag_resolver,
-        );
+        };
         SchedulerBackend::upsert_schedule(
             &backend,
             &CreateScheduleParams {
@@ -408,11 +373,8 @@ fn main(input: [number], output: [result]):
         let state = queued.state.clone().expect("queued state");
         let mut executor =
             RunnerExecutor::new(Arc::clone(&dag), state, queued.action_results.clone(), None);
-        let replay = crate::waymark_core::runner::replay_variables(
-            executor.state(),
-            executor.action_results(),
-        )
-        .expect("replay inputs");
+        let replay = waymark_runner::replay_variables(executor.state(), executor.action_results())
+            .expect("replay inputs");
         assert_eq!(
             replay.variables.get("number"),
             Some(&Value::Number(7.into()))
diff --git a/crates/waymark/src/waymark_core/cli/benchmark.rs b/crates/waymark/src/waymark_core/cli/benchmark.rs
index 99558eea..241afdd1 100644
--- a/crates/waymark/src/waymark_core/cli/benchmark.rs
+++ b/crates/waymark/src/waymark_core/cli/benchmark.rs
@@ -12,12 +12,11 @@ use serde_json::Value;
 use sha2::{Digest, Sha256};
 use sqlx::PgPool;
 use uuid::Uuid;
+use waymark_backend_postgres::PostgresBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres};
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
-use crate::backends::{
-    PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
-use crate::db;
-use crate::integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres};
 use crate::messages::ast as ir;
 use crate::observability::obs;
 use crate::waymark_core::cli::smoke::{
@@ -25,9 +24,9 @@ use crate::waymark_core::cli::smoke::{
     build_try_except_program, build_while_loop_program, literal_from_value,
 };
 use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use crate::waymark_core::runner::RunnerState;
 use crate::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError};
 use waymark_dag::convert_to_dag;
+use waymark_runner_state::RunnerState;
 
 const DEFAULT_DSN: &str = LOCAL_POSTGRES_DSN;
 const DEFAULT_MAX_CONCURRENT_INSTANCES: usize = 500;
@@ -318,7 +317,9 @@ async fn run_benchmark(
     }
     let pool = PgPool::connect(dsn).await.expect("connect postgres");
     drop_benchmark_tables(&pool).await;
-    db::run_migrations(&pool).await.expect("run migrations");
+    waymark_backend_postgres_migrations::run(&pool)
+        .await
+        .expect("run migrations");
     let backend = PostgresBackend::new(pool);
     backend.clear_all().await.expect("clear all");
     let total = queue_benchmark_instances(&backend, &cases, count_per_case, batch_size).await;
diff --git a/crates/waymark/src/waymark_core/cli/smoke.rs b/crates/waymark/src/waymark_core/cli/smoke.rs
index 3625e952..bb5a49c3 100644
--- a/crates/waymark/src/waymark_core/cli/smoke.rs
+++ b/crates/waymark/src/waymark_core/cli/smoke.rs
@@ -11,18 +11,18 @@ use prost::Message;
 use serde_json::Value;
 use sha2::{Digest, Sha256};
 use uuid::Uuid;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::QueuedInstance;
+use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _};
 
-use crate::backends::{
-    MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend,
-};
 use crate::messages::ast as ir;
 use crate::waymark_core::dag_viz::render_dag_image;
 use crate::waymark_core::ir_format::format_program;
-use crate::waymark_core::ir_parser::parse_program;
 use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig};
-use crate::waymark_core::runner::RunnerState;
 use crate::workers::{PythonWorkerConfig, RemoteWorkerPool};
 use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
+use waymark_runner_state::RunnerState;
 
 #[derive(Parser, Debug)]
 #[command(name = "waymark-smoke", about = "Smoke check core-python components.")]
diff --git a/crates/waymark/src/waymark_core/ir_format.rs b/crates/waymark/src/waymark_core/ir_format.rs
index 57a9460a..45795f0e 100644
--- a/crates/waymark/src/waymark_core/ir_format.rs
+++ b/crates/waymark/src/waymark_core/ir_format.rs
@@ -569,7 +569,7 @@ pub fn format_program(program: &ir::Program) -> String {
 #[cfg(test)]
 mod tests {
     use super::{DEFAULT_INDENT, format_program};
-    use crate::waymark_core::ir_parser::IRParser;
+    use waymark_ir_parser::IRParser;
 
     #[test]
     fn test_format_program_happy_path() {
diff --git a/crates/waymark/src/waymark_core/lock.rs b/crates/waymark/src/waymark_core/lock.rs
index d270bc53..3f3acd5d 100644
--- a/crates/waymark/src/waymark_core/lock.rs
+++ b/crates/waymark/src/waymark_core/lock.rs
@@ -1,17 +1,14 @@
 //! Instance lock tracking and heartbeat maintenance.
 
 use std::collections::HashSet;
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
 use chrono::{Duration as ChronoDuration, Utc};
-use tokio::sync::Notify;
 use uuid::Uuid;
 
 use tracing::{debug, info, warn};
-
-use crate::backends::{CoreBackend, LockClaim};
+use waymark_core_backend::LockClaim;
 
 #[derive(Clone)]
 pub struct InstanceLockTracker {
@@ -62,21 +59,17 @@ impl InstanceLockTracker {
 }
 
 pub fn spawn_lock_heartbeat(
-    backend: Arc<dyn CoreBackend>,
+    backend: Arc<dyn waymark_core_backend::CoreBackend>,
     tracker: InstanceLockTracker,
     heartbeat_interval: Duration,
     lock_ttl: Duration,
-    stop: Arc<AtomicBool>,
-    stop_notify: Arc<Notify>,
+    stop: tokio_util::sync::WaitForCancellationFutureOwned,
 ) -> tokio::task::JoinHandle<()> {
     tokio::spawn(async move {
+        let mut stop = std::pin::pin!(stop);
         loop {
-            if stop.load(Ordering::SeqCst) {
-                info!("lock heartbeat stop flag set");
-                break;
-            }
             tokio::select! {
-                _ = stop_notify.notified() => {
+                _ = &mut stop => {
                     info!("lock heartbeat stop notified");
                     break;
                 }
diff --git a/crates/waymark/src/waymark_core/mod.rs b/crates/waymark/src/waymark_core/mod.rs
index 85f0c008..d5a4ec27 100644
--- a/crates/waymark/src/waymark_core/mod.rs
+++ b/crates/waymark/src/waymark_core/mod.rs
@@ -4,14 +4,10 @@ pub mod cli;
 pub mod commit_barrier;
 pub mod dag_viz;
 pub mod ir_format;
-pub mod ir_parser;
 pub mod lock;
 pub mod runloop;
-pub mod runner;
 
-pub use crate::backends::{InstanceDone, QueuedInstance};
 pub use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, InlineWorkerPool};
 pub use dag_viz::{build_dag_graph, render_dag_image};
 pub use ir_format::format_program;
 pub use runloop::RunLoop;
-pub use runner::RunnerState;
diff --git a/crates/waymark/src/waymark_core/runloop.rs b/crates/waymark/src/waymark_core/runloop.rs
index 9bb739d4..407e4a3b 100644
--- a/crates/waymark/src/waymark_core/runloop.rs
+++ b/crates/waymark/src/waymark_core/runloop.rs
@@ -3,7 +3,7 @@
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::sync::{
     Arc,
-    atomic::{AtomicBool, AtomicUsize, Ordering},
+    atomic::{AtomicUsize, Ordering},
     mpsc as std_mpsc,
 };
 use std::thread;
@@ -12,27 +12,29 @@ use std::time::{Duration, Instant};
 use chrono::{DateTime, Utc};
 use prost::Message;
 use serde_json::Value;
-use tokio::sync::{Notify, mpsc, watch};
+use tokio::sync::mpsc;
 use tracing::{debug, error, info, warn};
 use uuid::Uuid;
-
-use crate::backends::{
-    ActionDone, BackendError, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus,
-    LockClaim, QueuedInstance, QueuedInstanceBatch, WorkflowRegistryBackend,
+use waymark_backends_core::BackendError;
+use waymark_core_backend::{
+    ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance,
+    QueuedInstanceBatch,
 };
+use waymark_workflow_registry_backend::WorkflowRegistryBackend;
+
 use crate::messages::ast as ir;
 use crate::observability::obs;
 use crate::waymark_core::commit_barrier::{CommitBarrier, DeferredInstanceEvent};
 use crate::waymark_core::lock::{InstanceLockTracker, spawn_lock_heartbeat};
-use crate::waymark_core::runner::synthetic_exceptions::{
+use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
+use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag};
+use waymark_runner::synthetic_exceptions::{
     SyntheticExceptionType, build_synthetic_exception_value,
 };
-use crate::waymark_core::runner::{
+use waymark_runner::{
     DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest,
     replay_variables,
 };
-use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError};
-use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag};
 
 /// Raised when the run loop cannot coordinate execution.
 #[derive(Debug, thiserror::Error)]
@@ -134,58 +136,33 @@ enum PersistAck {
     },
 }
 
-async fn send_instance_message_with_stop(
-    instance_tx: &mpsc::Sender<InstanceMessage>,
-    message: InstanceMessage,
-    stop_notify: &Notify,
+async fn send_with_stop<T>(
+    tx: &mpsc::Sender<T>,
+    item: T,
+    stop: tokio_util::sync::WaitForCancellationFuture<'_>,
+    kind: &'static str,
 ) -> bool {
-    let send_fut = instance_tx.send(message);
+    let send_fut = tx.send(item);
     tokio::pin!(send_fut);
-    let mut warned = false;
-    loop {
-        tokio::select! {
-            res = &mut send_fut => {
-                if res.is_err() {
-                    warn!("instance poller receiver dropped");
-                    return false;
-                }
-                return true;
-            }
-            _ = stop_notify.notified() => {
-                info!("instance poller stop notified during send");
-                return false;
-            }
-            _ = tokio::time::sleep(Duration::from_secs(2)), if !warned => {
-                warn!("instance poller send pending >2s");
-                warned = true;
-            }
-        }
-    }
-}
 
-async fn send_persist_command_with_stop(
-    persist_tx: &mpsc::Sender<PersistCommand>,
-    command: PersistCommand,
-    stop_notify: &Notify,
-) -> bool {
-    let send_fut = persist_tx.send(command);
-    tokio::pin!(send_fut);
+    let mut stop = std::pin::pin!(stop);
+
     let mut warned = false;
     loop {
         tokio::select! {
             res = &mut send_fut => {
                 if res.is_err() {
-                    warn!("persistence task receiver dropped");
+                    warn!(%kind, "receiver dropped");
                     return false;
                 }
                 return true;
             }
-            _ = stop_notify.notified() => {
-                info!("persist sender stop notified during send");
+            _ = &mut stop => {
+                info!(%kind, "sender stop notified during send");
                 return false;
             }
             _ = tokio::time::sleep(Duration::from_secs(2)), if !warned => {
-                warn!("persist command send pending >2s");
+                warn!(%kind, "send pending >2s");
                 warned = true;
             }
         }
@@ -376,7 +353,7 @@ impl ShardExecutor {
 
 fn run_executor_shard(
     shard_id: usize,
-    backend: Arc<dyn CoreBackend>,
+    backend: Arc<dyn waymark_core_backend::CoreBackend>,
     receiver: std_mpsc::Receiver<ShardCommand>,
     sender: mpsc::UnboundedSender<ShardEvent>,
 ) {
@@ -555,7 +532,7 @@ fn run_executor_shard(
 /// Run loop that fans out executor work across CPU-bound shard threads.
 pub struct RunLoop {
     worker_pool: Arc<dyn BaseWorkerPool>,
-    core_backend: Arc<dyn CoreBackend>,
+    core_backend: Arc<dyn waymark_core_backend::CoreBackend>,
     registry_backend: Arc<dyn WorkflowRegistryBackend>,
     workflow_cache: HashMap<Uuid, Arc<DAG>>,
     max_concurrent_instances: usize,
@@ -569,7 +546,8 @@ pub struct RunLoop {
     evict_sleep_threshold: Duration,
     skip_sleep: bool,
     active_instance_gauge: Option<Arc<AtomicUsize>>,
-    shutdown_rx: Option<watch::Receiver<bool>>,
+    shutdown_token: tokio_util::sync::CancellationToken,
+    exit_on_idle: bool,
 }
 
 #[derive(Clone, Debug)]
@@ -590,30 +568,37 @@ pub struct RunLoopSupervisorConfig {
 impl RunLoop {
     pub fn new(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
     ) -> Self {
-        Self::new_internal(worker_pool, backend, config, None)
+        Self::new_internal(
+            worker_pool,
+            backend,
+            config,
+            tokio_util::sync::CancellationToken::new(),
+            true,
+        )
     }
 
     pub fn new_with_shutdown(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
-        shutdown_rx: watch::Receiver<bool>,
+        shutdown_token: tokio_util::sync::CancellationToken,
     ) -> Self {
-        Self::new_internal(worker_pool, backend, config, Some(shutdown_rx))
+        Self::new_internal(worker_pool, backend, config, shutdown_token, false)
     }
 
     fn new_internal(
         worker_pool: impl BaseWorkerPool + 'static,
-        backend: impl CoreBackend + WorkflowRegistryBackend + 'static,
+        backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static,
         config: RunLoopSupervisorConfig,
-        shutdown_rx: Option<watch::Receiver<bool>>,
+        shutdown_token: tokio_util::sync::CancellationToken,
+        exit_on_idle: bool,
     ) -> Self {
         let max_concurrent_instances = std::cmp::max(1, config.max_concurrent_instances);
         let backend = Arc::new(backend);
-        let core_backend: Arc<dyn CoreBackend> = backend.clone();
+        let core_backend: Arc<dyn waymark_core_backend::CoreBackend> = backend.clone();
         let registry_backend: Arc<dyn WorkflowRegistryBackend> = backend;
         Self {
             worker_pool: Arc::new(worker_pool),
@@ -636,7 +621,8 @@ impl RunLoop {
             evict_sleep_threshold: config.evict_sleep_threshold,
             skip_sleep: config.skip_sleep,
             active_instance_gauge: config.active_instance_gauge.clone(),
-            shutdown_rx,
+            shutdown_token,
+            exit_on_idle,
         }
     }
 
@@ -891,30 +877,28 @@ impl RunLoop {
         let (completion_tx, mut completion_rx) = mpsc::channel::<Vec<ActionCompletion>>(32);
         let (instance_tx, mut instance_rx) = mpsc::channel::<InstanceMessage>(16);
         let (sleep_tx, mut sleep_rx) = mpsc::unbounded_channel::<SleepWake>();
-        let stop = Arc::new(AtomicBool::new(false));
-        let stop_notify = Arc::new(Notify::new());
+
         let lock_tracker = InstanceLockTracker::new(self.lock_uuid);
         let lock_handle = spawn_lock_heartbeat(
             self.core_backend.clone(),
             lock_tracker.clone(),
             self.lock_heartbeat,
             self.lock_ttl,
-            stop.clone(),
-            stop_notify.clone(),
+            self.shutdown_token.clone().cancelled_owned(),
         );
 
         let worker_pool = self.worker_pool.clone();
-        let completion_stop = stop.clone();
-        let completion_notify = stop_notify.clone();
+        let completion_shutdown_token = self.shutdown_token.clone();
         let completion_handle = tokio::spawn(async move {
+            let _completion_shutdown_guard = completion_shutdown_token.drop_guard_ref();
             loop {
-                if completion_stop.load(Ordering::SeqCst) {
+                if completion_shutdown_token.is_cancelled() {
                     info!("completion task stop flag set");
                     break;
                 }
                 debug!("completion task awaiting completions");
                 let completions = tokio::select! {
-                    _ = completion_notify.notified() => {
+                    _ = completion_shutdown_token.cancelled() => {
                         info!("completion task stop notified");
                         break;
                     }
@@ -930,35 +914,21 @@ impl RunLoop {
                     count = completions.len(),
                     "completion task sending completions"
                 );
-                let send_fut = completion_tx.send(completions);
-                tokio::pin!(send_fut);
-                let mut warned = false;
-                let mut stop_during_send = false;
-                let send_result = loop {
-                    tokio::select! {
-                        res = &mut send_fut => break Some(res),
-                        _ = completion_notify.notified() => {
-                            info!("completion task stop notified during send");
-                            stop_during_send = true;
-                            break None;
-                        }
-                        _ = tokio::time::sleep(Duration::from_secs(2)), if !warned => {
-                            warn!("completion task send pending >2s");
-                            warned = true;
-                        }
-                    }
-                };
-                if stop_during_send {
-                    break;
-                }
-                if send_result.is_none() || send_result.unwrap().is_err() {
-                    warn!("completion task receiver dropped");
+
+                if !send_with_stop(
+                    &completion_tx,
+                    completions,
+                    completion_shutdown_token.cancelled(),
+                    "completions",
+                )
+                .await
+                {
                     break;
                 }
+
                 debug!("completion task sent completions");
             }
             info!("completion task exiting");
-            completion_notify.notify_waiters();
         });
 
         let backend = self.core_backend.clone();
@@ -967,11 +937,11 @@ impl RunLoop {
         let lock_uuid = self.lock_uuid;
         let lock_ttl = self.lock_ttl;
         let instance_available_slots = Arc::clone(&available_instance_slots);
-        let instance_stop = stop.clone();
-        let instance_notify = stop_notify.clone();
+        let instance_shutdown_token = self.shutdown_token.clone();
         let instance_handle = tokio::spawn(async move {
+            let _instance_shutdown_guard = instance_shutdown_token.drop_guard_ref();
             loop {
-                if instance_stop.load(Ordering::SeqCst) {
+                if instance_shutdown_token.is_cancelled() {
                     info!("instance poller stop flag set");
                     break;
                 }
@@ -1005,7 +975,14 @@ impl RunLoop {
                     }
                     Err(err) => InstanceMessage::Error(err),
                 };
-                if !send_instance_message_with_stop(&instance_tx, message, &instance_notify).await {
+                if !send_with_stop(
+                    &instance_tx,
+                    message,
+                    instance_shutdown_token.cancelled(),
+                    "instance message",
+                )
+                .await
+                {
                     break;
                 }
                 if poll_interval > Duration::ZERO {
@@ -1015,7 +992,6 @@ impl RunLoop {
                 }
             }
             info!("instance poller exiting");
-            instance_notify.notify_waiters();
         });
 
         const PERSIST_COALESCE_WINDOW: Duration = Duration::from_millis(2);
@@ -1175,18 +1151,15 @@ impl RunLoop {
         let mut commit_barrier: CommitBarrier<ShardStep> = CommitBarrier::new();
         let mut instances_idle = false;
         let mut instances_done_pending: Vec<InstanceDone> = Vec::new();
-        let mut run_result = Ok(());
-        let mut shutdown_rx = self.shutdown_rx.clone();
+        let shutdown_token = self.shutdown_token.clone();
 
-        loop {
-            if let Some(rx) = shutdown_rx.as_ref()
-                && *rx.borrow()
-            {
+        let mut run_result = 'runloop: loop {
+            if shutdown_token.is_cancelled() {
                 info!("runloop exiting: shutdown requested");
-                break;
+                break 'runloop Ok(());
             }
 
-            if shutdown_rx.is_none()
+            if self.exit_on_idle
                 && instances_idle
                 && executor_shards.is_empty()
                 && sleeping_nodes.is_empty()
@@ -1196,18 +1169,13 @@ impl RunLoop {
                     blocked = blocked_until_by_instance.len(),
                     "runloop exiting: idle with no active executors"
                 );
-                break;
+                break 'runloop Ok(());
             }
 
-            let has_shutdown = shutdown_rx.is_some();
-            let shutdown_rx_fut = async { shutdown_rx.as_mut().unwrap().changed().await.is_ok() };
             let first_event = tokio::select! {
-                shutdown_signal = shutdown_rx_fut, if has_shutdown => {
-                    if !shutdown_signal || shutdown_rx.as_ref().is_some_and(|rx| *rx.borrow()) {
-                        info!("runloop exiting: shutdown requested");
-                        break;
-                    }
-                    None
+                _ = shutdown_token.cancelled() => {
+                    info!("runloop exiting: shutdown requested");
+                    break 'runloop Ok(());
                 }
                 Some(completions) = completion_rx.recv() => {
                     Some(CoordinatorEvent::Completions(completions))
@@ -1235,7 +1203,7 @@ impl RunLoop {
                 }
                 else => {
                     warn!("runloop exiting: event channels closed");
-                    break;
+                    break 'runloop Ok(());
                 },
             };
 
@@ -1267,8 +1235,7 @@ impl RunLoop {
                 }
                 CoordinatorEvent::Instance(InstanceMessage::Error(err)) => {
                     warn!(error = %err, "runloop exiting: instance poller backend error");
-                    run_result = Err(RunLoopError::Backend(err));
-                    break;
+                    break 'runloop Err(RunLoopError::Backend(err));
                 }
                 CoordinatorEvent::Shard(event) => match event {
                     ShardEvent::Step(step) => all_steps.push(step),
@@ -1308,15 +1275,11 @@ impl RunLoop {
                     }
                     InstanceMessage::Error(err) => {
                         warn!(error = %err, "runloop exiting: instance poller backend error");
-                        run_result = Err(RunLoopError::Backend(err));
-                        break;
+                        break 'runloop Err(RunLoopError::Backend(err));
                     }
                 }
             }
-            if run_result.is_err() {
-                warn!("runloop exiting: error after draining instance messages");
-                break;
-            }
+
             while let Ok(event) = event_rx.try_recv() {
                 match event {
                     ShardEvent::Step(step) => all_steps.push(step),
@@ -1334,10 +1297,7 @@ impl RunLoop {
                     }
                 }
             }
-            if run_result.is_err() {
-                warn!("runloop exiting: error after draining shard events");
-                break;
-            }
+
             while let Ok(wake) = sleep_rx.try_recv() {
                 all_wakes.push(wake);
             }
@@ -1418,8 +1378,7 @@ impl RunLoop {
                                     )
                                     .await
                             {
-                                run_result = Err(err);
-                                break;
+                                break 'runloop Err(err);
                             }
                             for step in batch.steps {
                                 if !batch.instance_ids.contains(&step.executor_id) {
@@ -1434,13 +1393,10 @@ impl RunLoop {
                                     continue;
                                 }
                                 if let Err(err) = self.apply_confirmed_step(step, &mut state) {
-                                    run_result = Err(err);
-                                    break;
+                                    break 'runloop Err(err);
                                 }
                             }
-                            if run_result.is_err() {
-                                break;
-                            }
+
                             for instance_id in batch.instance_ids {
                                 if evict_ids.contains(&instance_id) {
                                     state.commit_barrier.remove_instance(instance_id);
@@ -1457,14 +1413,10 @@ impl RunLoop {
                         }
                         PersistAck::StepsPersistFailed { batch_id, error } => {
                             warn!(batch_id, error = %error, "persist step batch failed");
-                            run_result = Err(error);
-                            break;
+                            break 'runloop Err(error);
                         }
                     }
                 }
-                if run_result.is_err() {
-                    break;
-                }
             }
 
             if !all_completions.is_empty() {
@@ -1587,8 +1539,7 @@ impl RunLoop {
             if had_instances {
                 instances_idle = false;
                 if let Err(err) = self.hydrate_instances(&mut all_instances).await {
-                    run_result = Err(err);
-                    break;
+                    break 'runloop Err(err);
                 }
                 debug!(count = all_instances.len(), "hydrated queued instances");
                 let mut by_shard: HashMap<usize, Vec<QueuedInstance>> = HashMap::new();
@@ -1680,7 +1631,7 @@ impl RunLoop {
                     .map(|update| update.instance_id)
                     .collect();
                 let batch_id = commit_barrier.register_batch(instance_ids.clone(), all_steps);
-                if !send_persist_command_with_stop(
+                if !send_with_stop(
                     &persist_tx,
                     PersistCommand {
                         batch_id,
@@ -1689,7 +1640,8 @@ impl RunLoop {
                         actions_done,
                         graph_updates,
                     },
-                    &stop_notify,
+                    shutdown_token.cancelled(),
+                    "persist command",
                 )
                 .await
                 {
@@ -1698,10 +1650,9 @@ impl RunLoop {
                             commit_barrier.remove_instance(instance_id);
                         }
                     }
-                    run_result = Err(RunLoopError::Message(
+                    break 'runloop Err(RunLoopError::Message(
                         "failed to submit persist batch to persistence task".to_string(),
                     ));
-                    break;
                 }
             }
 
@@ -1737,8 +1688,7 @@ impl RunLoop {
                         sleep_tx: &sleep_tx,
                     };
                     if let Err(err) = self.evict_instances(&evict_ids, &mut state).await {
-                        run_result = Err(err);
-                        break;
+                        break 'runloop Err(err);
                     }
                     for instance_id in evict_ids {
                         state.commit_barrier.remove_instance(instance_id);
@@ -1751,10 +1701,9 @@ impl RunLoop {
             if instances_done_pending.len() >= self.instance_done_batch_size
                 && let Err(err) = self.flush_instances_done(&mut instances_done_pending).await
             {
-                run_result = Err(err);
-                break;
+                break 'runloop Err(err);
             }
-        }
+        };
 
         info!(
             instances_idle,
@@ -1775,8 +1724,7 @@ impl RunLoop {
         }
         drop(persist_tx);
         let _ = persist_handle.await;
-        stop.store(true, Ordering::SeqCst);
-        stop_notify.notify_waiters();
+        shutdown_token.cancel();
         let _ = completion_handle.await;
         let _ = instance_handle.await;
         let _ = lock_handle.await;
@@ -1818,9 +1766,9 @@ pub async fn runloop_supervisor<B, W>(
     backend: B,
     worker_pool: W,
     config: RunLoopSupervisorConfig,
-    shutdown_rx: watch::Receiver<bool>,
+    shutdown_token: tokio_util::sync::CancellationToken,
 ) where
-    B: CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static,
+    B: waymark_core_backend::CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static,
     W: BaseWorkerPool + Clone + Send + Sync + 'static,
 {
     let mut backoff = Duration::from_millis(200);
@@ -1829,7 +1777,7 @@ pub async fn runloop_supervisor<B, W>(
     let poll_interval = config.poll_interval;
 
     loop {
-        if *shutdown_rx.borrow() {
+        if shutdown_token.is_cancelled() {
             break;
         }
 
@@ -1844,12 +1792,12 @@ pub async fn runloop_supervisor<B, W>(
             worker_pool.clone(),
             backend.clone(),
             config.clone(),
-            shutdown_rx.clone(),
+            shutdown_token.child_token(),
         );
 
         let result = runloop.run().await;
 
-        if *shutdown_rx.borrow() {
+        if shutdown_token.is_cancelled() {
             break;
         }
 
@@ -1983,1081 +1931,4 @@ fn build_instance_done(
 }
 
 #[cfg(test)]
-mod tests {
-    use super::*;
-    use std::collections::{HashMap, VecDeque};
-    use std::sync::{
-        Arc, Mutex,
-        atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
-    };
-    use std::time::Duration;
-
-    use chrono::Utc;
-    use prost::Message;
-    use sha2::{Digest, Sha256};
-    use tonic::async_trait;
-
-    use crate::backends::{
-        ActionAttemptStatus, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone,
-        InstanceLockStatus, LockClaim, MemoryBackend, QueuedInstanceBatch, WorkflowRegistration,
-        WorkflowRegistryBackend, WorkflowVersion,
-    };
-    use crate::messages::ast as ir;
-    use crate::waymark_core::ir_parser::parse_program;
-    use crate::waymark_core::runner::RunnerState;
-    use crate::waymark_core::runner::state::NodeStatus;
-    use crate::workers::ActionCallable;
-    use waymark_dag::convert_to_dag;
-
-    #[derive(Clone)]
-    struct FaultInjectingBackend {
-        inner: MemoryBackend,
-        fail_get_queued_instances_with_depth_limit: Arc<AtomicBool>,
-        get_queued_instances_calls: Arc<AtomicUsize>,
-    }
-
-    impl FaultInjectingBackend {
-        fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self {
-            Self {
-                inner,
-                fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)),
-                get_queued_instances_calls: Arc::new(AtomicUsize::new(0)),
-            }
-        }
-
-        fn get_queued_instances_calls(&self) -> usize {
-            self.get_queued_instances_calls.load(AtomicOrdering::SeqCst)
-        }
-
-        fn queue_len(&self) -> usize {
-            self.inner
-                .instance_queue()
-                .as_ref()
-                .map(|queue| queue.lock().expect("queue poisoned").len())
-                .unwrap_or(0)
-        }
-
-        fn instances_done_len(&self) -> usize {
-            self.inner.instances_done().len()
-        }
-    }
-
-    #[async_trait]
-    impl CoreBackend for FaultInjectingBackend {
-        fn clone_box(&self) -> Box<dyn CoreBackend> {
-            Box::new(self.clone())
-        }
-
-        async fn save_graphs(
-            &self,
-            claim: LockClaim,
-            graphs: &[GraphUpdate],
-        ) -> BackendResult<Vec<InstanceLockStatus>> {
-            self.inner.save_graphs(claim, graphs).await
-        }
-
-        async fn save_actions_done(
-            &self,
-            actions: &[crate::backends::ActionDone],
-        ) -> BackendResult<()> {
-            self.inner.save_actions_done(actions).await
-        }
-
-        async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> {
-            self.inner.save_instances_done(instances).await
-        }
-
-        async fn get_queued_instances(
-            &self,
-            size: usize,
-            claim: LockClaim,
-        ) -> BackendResult<QueuedInstanceBatch> {
-            self.get_queued_instances_calls
-                .fetch_add(1, AtomicOrdering::SeqCst);
-            if self
-                .fail_get_queued_instances_with_depth_limit
-                .load(AtomicOrdering::SeqCst)
-            {
-                return Err(BackendError::Message("depth limit exceeded".to_string()));
-            }
-            self.inner.get_queued_instances(size, claim).await
-        }
-
-        async fn queue_instances(
-            &self,
-            instances: &[crate::backends::QueuedInstance],
-        ) -> BackendResult<()> {
-            self.inner.queue_instances(instances).await
-        }
-
-        async fn refresh_instance_locks(
-            &self,
-            claim: LockClaim,
-            instance_ids: &[Uuid],
-        ) -> BackendResult<Vec<InstanceLockStatus>> {
-            self.inner.refresh_instance_locks(claim, instance_ids).await
-        }
-
-        async fn release_instance_locks(
-            &self,
-            lock_uuid: Uuid,
-            instance_ids: &[Uuid],
-        ) -> BackendResult<()> {
-            self.inner
-                .release_instance_locks(lock_uuid, instance_ids)
-                .await
-        }
-    }
-
-    #[async_trait]
-    impl WorkflowRegistryBackend for FaultInjectingBackend {
-        async fn upsert_workflow_version(
-            &self,
-            registration: &WorkflowRegistration,
-        ) -> BackendResult<Uuid> {
-            self.inner.upsert_workflow_version(registration).await
-        }
-
-        async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult<Vec<WorkflowVersion>> {
-            self.inner.get_workflow_versions(ids).await
-        }
-    }
-
-    fn default_test_config(lock_uuid: Uuid) -> RunLoopSupervisorConfig {
-        RunLoopSupervisorConfig {
-            max_concurrent_instances: 25,
-            executor_shards: 1,
-            instance_done_batch_size: None,
-            poll_interval: Duration::from_millis(10),
-            persistence_interval: Duration::from_millis(10),
-            lock_uuid,
-            lock_ttl: Duration::from_secs(15),
-            lock_heartbeat: Duration::from_secs(5),
-            evict_sleep_threshold: Duration::from_secs(10),
-            skip_sleep: false,
-            active_instance_gauge: None,
-        }
-    }
-
-    #[tokio::test]
-    async fn test_runloop_executes_actions() {
-        let source = r#"
-fn main(input: [x], output: [y]):
-    y = @tests.fixtures.test_actions.double(value=x)
-    return y
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let _ = state
-            .record_assignment(
-                vec!["x".to_string()],
-                &ir::Expr {
-                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                        value: Some(ir::literal::Value::IntValue(4)),
-                    })),
-                    span: None,
-                },
-                None,
-                Some("input x = 4".to_string()),
-            )
-            .expect("record assignment");
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let mut actions: HashMap<String, ActionCallable> = HashMap::new();
-        actions.insert(
-            "double".to_string(),
-            Arc::new(|kwargs| {
-                Box::pin(async move {
-                    let value = kwargs
-                        .get("value")
-                        .and_then(|value| value.as_i64())
-                        .unwrap_or(0);
-                    Ok(Value::Number((value * 2).into()))
-                })
-            }),
-        );
-        let worker_pool = crate::workers::InlineWorkerPool::new(actions);
-
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            RunLoopSupervisorConfig {
-                max_concurrent_instances: 25,
-                executor_shards: 1,
-                instance_done_batch_size: None,
-                poll_interval: Duration::from_secs_f64(0.0),
-                persistence_interval: Duration::from_secs_f64(0.1),
-                lock_uuid: Uuid::new_v4(),
-                lock_ttl: Duration::from_secs(15),
-                lock_heartbeat: Duration::from_secs(5),
-                evict_sleep_threshold: Duration::from_secs(10),
-                skip_sleep: false,
-                active_instance_gauge: None,
-            },
-        );
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id: Uuid::new_v4(),
-            scheduled_at: None,
-        });
-
-        runloop.run().await.expect("runloop");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        let done = &instances_done[0];
-        let output = done.result.clone().expect("instance result");
-        let Value::Object(map) = output else {
-            panic!("expected output object");
-        };
-        assert_eq!(map.get("y"), Some(&Value::Number(8.into())));
-    }
-
-    #[tokio::test]
-    async fn test_runloop_times_out_action_and_persists_timestamps() {
-        let source = r#"
-fn main(input: [], output: [y]):
-    y = @tests.fixtures.test_actions.hang()[timeout: 1 s]
-    return y
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test_timeout".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let mut actions: HashMap<String, ActionCallable> = HashMap::new();
-        actions.insert(
-            "hang".to_string(),
-            Arc::new(|_kwargs| {
-                Box::pin(async move {
-                    tokio::time::sleep(Duration::from_secs(5)).await;
-                    Ok(Value::String("late".to_string()))
-                })
-            }),
-        );
-        let worker_pool = crate::workers::InlineWorkerPool::new(actions);
-
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            RunLoopSupervisorConfig {
-                max_concurrent_instances: 25,
-                executor_shards: 1,
-                instance_done_batch_size: None,
-                poll_interval: Duration::from_secs_f64(0.0),
-                persistence_interval: Duration::from_secs_f64(0.05),
-                lock_uuid: Uuid::new_v4(),
-                lock_ttl: Duration::from_secs(15),
-                lock_heartbeat: Duration::from_secs(5),
-                evict_sleep_threshold: Duration::from_secs(10),
-                skip_sleep: false,
-                active_instance_gauge: None,
-            },
-        );
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id: Uuid::new_v4(),
-            scheduled_at: None,
-        });
-
-        runloop.run().await.expect("runloop");
-
-        let actions_done = backend.actions_done();
-        assert_eq!(actions_done.len(), 1);
-        let action_done = &actions_done[0];
-        assert_eq!(action_done.status, ActionAttemptStatus::TimedOut);
-        assert!(action_done.started_at.is_some());
-        assert!(action_done.completed_at.is_some());
-        assert!(action_done.duration_ms.is_some());
-
-        let execution_id = action_done.execution_id;
-        let graph_updates = backend.graph_updates();
-        let mut saw_running_snapshot = false;
-        let mut saw_failed_snapshot = false;
-        for update in graph_updates {
-            let Some(node) = update.nodes.get(&execution_id) else {
-                continue;
-            };
-            if node.status == NodeStatus::Running && node.started_at.is_some() {
-                saw_running_snapshot = true;
-            }
-            if node.status == NodeStatus::Failed
-                && node.started_at.is_some()
-                && node.completed_at.is_some()
-            {
-                saw_failed_snapshot = true;
-            }
-        }
-        assert!(saw_running_snapshot, "expected running graph snapshot");
-        assert!(saw_failed_snapshot, "expected failed graph snapshot");
-
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        assert!(instances_done[0].result.is_none());
-        let Value::Object(error_obj) = instances_done[0]
-            .error
-            .clone()
-            .expect("instance error payload")
-        else {
-            panic!("expected error payload object");
-        };
-        assert_eq!(
-            error_obj.get("type"),
-            Some(&Value::String("ActionTimeout".to_string()))
-        );
-    }
-
-    #[tokio::test]
-    async fn test_runloop_marks_instance_failed_on_executor_error() {
-        let source = r#"
-fn main(input: [x], output: [y]):
-    y = @tests.fixtures.test_actions.double(value=x)
-    return y
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        // Intentionally omit input assignment so action kwarg resolution fails at runtime.
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            RunLoopSupervisorConfig {
-                max_concurrent_instances: 25,
-                executor_shards: 1,
-                instance_done_batch_size: None,
-                poll_interval: Duration::from_secs_f64(0.0),
-                persistence_interval: Duration::from_secs_f64(0.1),
-                lock_uuid: Uuid::new_v4(),
-                lock_ttl: Duration::from_secs(15),
-                lock_heartbeat: Duration::from_secs(5),
-                evict_sleep_threshold: Duration::from_secs(10),
-                skip_sleep: false,
-                active_instance_gauge: None,
-            },
-        );
-        let instance_id = Uuid::new_v4();
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: None,
-        });
-
-        runloop
-            .run()
-            .await
-            .expect("runloop should continue after instance failure");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-
-        let done = &instances_done[0];
-        assert_eq!(done.executor_id, instance_id);
-        assert!(done.result.is_none());
-        let error = done.error.as_ref().expect("instance error");
-        let Value::Object(error_obj) = error else {
-            panic!("expected error payload object");
-        };
-        assert_eq!(
-            error_obj.get("type"),
-            Some(&Value::String("ExecutionError".to_string()))
-        );
-        let message = error_obj
-            .get("message")
-            .and_then(Value::as_str)
-            .expect("error message");
-        assert!(message.contains("variable not found: x"));
-    }
-
-    #[tokio::test]
-    async fn test_runloop_executes_for_loop_action_assignments() {
-        let source = r#"
-fn main(input: [limit], output: [result]):
-    current = 0
-    iterations = 0
-    for _ in range(limit):
-        current = @tests.fixtures.test_actions.increment(value=current)
-        iterations = iterations + 1
-    result = @tests.fixtures.test_actions.pack(limit=limit, final=current, iterations=iterations)
-    return result
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let _ = state
-            .record_assignment(
-                vec!["limit".to_string()],
-                &ir::Expr {
-                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                        value: Some(ir::literal::Value::IntValue(4)),
-                    })),
-                    span: None,
-                },
-                None,
-                Some("input limit = 4".to_string()),
-            )
-            .expect("record assignment");
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test_loop_actions".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let mut actions: HashMap<String, ActionCallable> = HashMap::new();
-        let increment_inputs = Arc::new(Mutex::new(Vec::new()));
-        let increment_inputs_clone = Arc::clone(&increment_inputs);
-        actions.insert(
-            "increment".to_string(),
-            Arc::new(move |kwargs| {
-                let increment_inputs = Arc::clone(&increment_inputs_clone);
-                Box::pin(async move {
-                    let value = kwargs
-                        .get("value")
-                        .and_then(|value| value.as_i64())
-                        .unwrap_or(0);
-                    increment_inputs
-                        .lock()
-                        .expect("increment inputs lock")
-                        .push(value);
-                    Ok(Value::Number((value + 1).into()))
-                })
-            }),
-        );
-        actions.insert(
-            "pack".to_string(),
-            Arc::new(|kwargs| {
-                Box::pin(async move {
-                    let limit = kwargs.get("limit").cloned().unwrap_or(Value::Null);
-                    let final_value = kwargs.get("final").cloned().unwrap_or(Value::Null);
-                    let iterations = kwargs.get("iterations").cloned().unwrap_or(Value::Null);
-                    Ok(Value::Object(
-                        [
-                            ("limit".to_string(), limit),
-                            ("final".to_string(), final_value),
-                            ("iterations".to_string(), iterations),
-                        ]
-                        .into_iter()
-                        .collect(),
-                    ))
-                })
-            }),
-        );
-        let worker_pool = crate::workers::InlineWorkerPool::new(actions);
-
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            RunLoopSupervisorConfig {
-                max_concurrent_instances: 25,
-                executor_shards: 1,
-                instance_done_batch_size: None,
-                poll_interval: Duration::from_secs_f64(0.0),
-                persistence_interval: Duration::from_secs_f64(0.1),
-                lock_uuid: Uuid::new_v4(),
-                lock_ttl: Duration::from_secs(15),
-                lock_heartbeat: Duration::from_secs(5),
-                evict_sleep_threshold: Duration::from_secs(10),
-                skip_sleep: false,
-                active_instance_gauge: None,
-            },
-        );
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id: Uuid::new_v4(),
-            scheduled_at: None,
-        });
-
-        runloop.run().await.expect("runloop");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        let done = &instances_done[0];
-        let output = done.result.clone().expect("instance result");
-        let Value::Object(map) = output else {
-            panic!("expected output object");
-        };
-        let Value::Object(result_map) = map
-            .get("result")
-            .cloned()
-            .expect("result payload should include result")
-        else {
-            panic!("expected nested result object");
-        };
-        assert_eq!(
-            *increment_inputs.lock().expect("increment inputs lock"),
-            vec![0, 1, 2, 3]
-        );
-        assert_eq!(result_map.get("limit"), Some(&Value::Number(4.into())));
-        assert_eq!(result_map.get("final"), Some(&Value::Number(4.into())));
-        assert_eq!(result_map.get("iterations"), Some(&Value::Number(4.into())));
-    }
-
-    #[tokio::test]
-    async fn test_instance_poller_send_unblocks_on_stop_notification() {
-        let (instance_tx, mut instance_rx) = mpsc::channel::<InstanceMessage>(1);
-        instance_tx
-            .send(InstanceMessage::Batch {
-                instances: Vec::new(),
-            })
-            .await
-            .expect("seed channel");
-
-        let stop_notify = Arc::new(Notify::new());
-        let send_task = tokio::spawn({
-            let instance_tx = instance_tx.clone();
-            let stop_notify = Arc::clone(&stop_notify);
-            async move {
-                send_instance_message_with_stop(
-                    &instance_tx,
-                    InstanceMessage::Batch {
-                        instances: Vec::new(),
-                    },
-                    &stop_notify,
-                )
-                .await
-            }
-        });
-
-        tokio::time::sleep(Duration::from_millis(20)).await;
-        stop_notify.notify_waiters();
-        let sent = tokio::time::timeout(Duration::from_millis(300), send_task)
-            .await
-            .expect("send task should complete")
-            .expect("send task should not panic");
-        assert!(!sent, "send should abort when stop is notified");
-
-        let _ = instance_rx.recv().await;
-    }
-
-    #[tokio::test]
-    async fn test_instance_poller_send_succeeds_when_channel_has_capacity() {
-        let (instance_tx, mut instance_rx) = mpsc::channel::<InstanceMessage>(1);
-        let stop_notify = Notify::new();
-        let sent = send_instance_message_with_stop(
-            &instance_tx,
-            InstanceMessage::Batch {
-                instances: Vec::new(),
-            },
-            &stop_notify,
-        )
-        .await;
-        assert!(sent);
-
-        let received = instance_rx.recv().await.expect("queued message");
-        match received {
-            InstanceMessage::Batch { instances } => assert!(instances.is_empty()),
-            InstanceMessage::Error(err) => panic!("unexpected error message: {err}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn test_runloop_supervisor_restarts_on_depth_limit_backend_errors() {
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend =
-            FaultInjectingBackend::with_depth_limit_poll_failures(MemoryBackend::with_queue(queue));
-        let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
-        let (shutdown_tx, shutdown_rx) = watch::channel(false);
-
-        let supervisor = tokio::spawn(runloop_supervisor(
-            backend.clone(),
-            worker_pool,
-            default_test_config(Uuid::new_v4()),
-            shutdown_rx,
-        ));
-
-        tokio::time::sleep(Duration::from_millis(750)).await;
-        shutdown_tx.send(true).expect("send shutdown");
-        tokio::time::timeout(Duration::from_secs(2), supervisor)
-            .await
-            .expect("supervisor should stop")
-            .expect("supervisor task should not panic");
-
-        assert!(
-            backend.get_queued_instances_calls() >= 2,
-            "expected multiple polling attempts while supervisor restarts"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_runloop_supervisor_reproduces_no_progress_with_continued_queue_growth() {
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend =
-            FaultInjectingBackend::with_depth_limit_poll_failures(MemoryBackend::with_queue(queue));
-        let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
-        let (shutdown_tx, shutdown_rx) = watch::channel(false);
-
-        let supervisor = tokio::spawn(runloop_supervisor(
-            backend.clone(),
-            worker_pool,
-            default_test_config(Uuid::new_v4()),
-            shutdown_rx,
-        ));
-
-        for _ in 0..20 {
-            backend
-                .queue_instances(&[QueuedInstance {
-                    workflow_version_id: Uuid::new_v4(),
-                    schedule_id: None,
-                    dag: None,
-                    entry_node: Uuid::new_v4(),
-                    state: None,
-                    action_results: HashMap::new(),
-                    instance_id: Uuid::new_v4(),
-                    scheduled_at: None,
-                }])
-                .await
-                .expect("queue synthetic instance");
-        }
-
-        tokio::time::sleep(Duration::from_millis(500)).await;
-        shutdown_tx.send(true).expect("send shutdown");
-        tokio::time::timeout(Duration::from_secs(2), supervisor)
-            .await
-            .expect("supervisor should stop")
-            .expect("supervisor task should not panic");
-
-        assert!(
-            backend.get_queued_instances_calls() >= 1,
-            "expected polling attempts during stall simulation"
-        );
-        assert!(
-            backend.queue_len() >= 20,
-            "queued work should continue to grow when poller cannot read instances"
-        );
-        assert_eq!(
-            backend.instances_done_len(),
-            0,
-            "no instances should complete while poller is failing"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_runloop_marks_instance_failed_when_rehydrated_state_is_missing_action_result() {
-        let source = r#"
-fn main(input: [x], output: [y]):
-    y = @tests.fixtures.test_actions.double(value=x)
-    return y
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let _ = state
-            .record_assignment(
-                vec!["x".to_string()],
-                &ir::Expr {
-                    kind: Some(ir::expr::Kind::Literal(ir::Literal {
-                        value: Some(ir::literal::Value::IntValue(4)),
-                    })),
-                    span: None,
-                },
-                None,
-                Some("input x = 4".to_string()),
-            )
-            .expect("record assignment");
-        let template_entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&template_entry_node, None)
-            .expect("queue entry node");
-        let mut bootstrap_executor =
-            RunnerExecutor::new(Arc::clone(&dag), state, HashMap::new(), None);
-        let bootstrap_step = bootstrap_executor
-            .increment(&[entry_exec.node_id])
-            .expect("bootstrap increment should materialize action node");
-        let action_exec = bootstrap_step
-            .actions
-            .first()
-            .expect("bootstrap should queue one action call")
-            .clone();
-
-        // Simulate a reclaimed instance whose graph says the action execution node
-        // has finished, but action_results payload was lost.
-        bootstrap_executor
-            .state_mut()
-            .mark_completed(action_exec.node_id)
-            .expect("mark action completed");
-        bootstrap_executor.state_mut().ready_queue.clear();
-        assert!(
-            bootstrap_executor
-                .state()
-                .nodes
-                .get(&action_exec.node_id)
-                .is_some_and(|node| node.is_action_call() && node.status == NodeStatus::Completed),
-            "expected completed action execution node"
-        );
-        let state = bootstrap_executor.state().clone();
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test_missing_action_result".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            default_test_config(Uuid::new_v4()),
-        );
-        let instance_id = Uuid::new_v4();
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: action_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: None,
-        });
-
-        runloop
-            .run()
-            .await
-            .expect("runloop should continue after instance failure");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        assert_eq!(instances_done[0].executor_id, instance_id);
-        let Value::Object(error_obj) = instances_done[0]
-            .error
-            .clone()
-            .expect("instance error payload")
-        else {
-            panic!("expected error payload object");
-        };
-        assert_eq!(
-            error_obj.get("type"),
-            Some(&Value::String("ExecutionError".to_string()))
-        );
-        let message = error_obj
-            .get("message")
-            .and_then(Value::as_str)
-            .expect("error message");
-        assert!(
-            message.contains("missing action result for"),
-            "expected missing action result error, got: {message}"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_runloop_marks_instance_failed_with_dict_key_error() {
-        let source = r#"
-fn main(input: [], output: [result]):
-    payload = @tests.fixtures.test_actions.make_payload()
-    result = payload["missing"]
-    return result
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test_dict_key_error".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let mut actions: HashMap<String, ActionCallable> = HashMap::new();
-        actions.insert(
-            "make_payload".to_string(),
-            Arc::new(|_kwargs| {
-                Box::pin(async move {
-                    Ok(Value::Object(
-                        [("present".to_string(), Value::Number(1.into()))]
-                            .into_iter()
-                            .collect(),
-                    ))
-                })
-            }),
-        );
-        let worker_pool = crate::workers::InlineWorkerPool::new(actions);
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            default_test_config(Uuid::new_v4()),
-        );
-        let instance_id = Uuid::new_v4();
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: None,
-        });
-
-        runloop
-            .run()
-            .await
-            .expect("runloop should continue after instance failure");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        assert_eq!(instances_done[0].executor_id, instance_id);
-        let Value::Object(error_obj) = instances_done[0]
-            .error
-            .clone()
-            .expect("instance error payload")
-        else {
-            panic!("expected error payload object");
-        };
-        let message = error_obj
-            .get("message")
-            .and_then(Value::as_str)
-            .expect("error message");
-        assert!(
-            message.contains("dict has no key"),
-            "expected dict key error, got: {message}"
-        );
-    }
-
-    #[tokio::test]
-    async fn test_runloop_marks_instance_failed_with_attribute_error() {
-        let source = r#"
-fn main(input: [], output: [result]):
-    payload = @tests.fixtures.test_actions.make_number()
-    result = payload.missing
-    return result
-"#;
-        let program = parse_program(source.trim()).expect("parse program");
-        let program_proto = program.encode_to_vec();
-        let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
-        let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
-
-        let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
-        let entry_node = dag
-            .entry_node
-            .as_ref()
-            .expect("DAG entry node not found")
-            .clone();
-        let entry_exec = state
-            .queue_template_node(&entry_node, None)
-            .expect("queue entry node");
-
-        let queue = Arc::new(Mutex::new(VecDeque::new()));
-        let backend = MemoryBackend::with_queue(queue.clone());
-        let workflow_version_id = backend
-            .upsert_workflow_version(&WorkflowRegistration {
-                workflow_name: "test_attribute_error".to_string(),
-                workflow_version: ir_hash.clone(),
-                ir_hash,
-                program_proto,
-                concurrent: false,
-            })
-            .await
-            .expect("register workflow version");
-
-        let mut actions: HashMap<String, ActionCallable> = HashMap::new();
-        actions.insert(
-            "make_number".to_string(),
-            Arc::new(|_kwargs| Box::pin(async move { Ok(Value::Number(7.into())) })),
-        );
-        let worker_pool = crate::workers::InlineWorkerPool::new(actions);
-        let mut runloop = RunLoop::new(
-            worker_pool,
-            backend.clone(),
-            default_test_config(Uuid::new_v4()),
-        );
-        let instance_id = Uuid::new_v4();
-        queue.lock().expect("queue lock").push_back(QueuedInstance {
-            workflow_version_id,
-            schedule_id: None,
-            dag: None,
-            entry_node: entry_exec.node_id,
-            state: Some(state),
-            action_results: HashMap::new(),
-            instance_id,
-            scheduled_at: None,
-        });
-
-        runloop
-            .run()
-            .await
-            .expect("runloop should continue after instance failure");
-        let instances_done = backend.instances_done();
-        assert_eq!(instances_done.len(), 1);
-        assert_eq!(instances_done[0].executor_id, instance_id);
-        let Value::Object(error_obj) = instances_done[0]
-            .error
-            .clone()
-            .expect("instance error payload")
-        else {
-            panic!("expected error payload object");
-        };
-        let message = error_obj
-            .get("message")
-            .and_then(Value::as_str)
-            .expect("error message");
-        assert!(
-            message.contains("attribute not found"),
-            "expected attribute error, got: {message}"
-        );
-    }
-
-    #[test]
-    fn test_lock_mismatches_ignores_expired_lock_with_matching_owner() {
-        let backend = MemoryBackend::new();
-        let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
-        let lock_uuid = Uuid::new_v4();
-        let runloop = RunLoop::new(worker_pool, backend, default_test_config(lock_uuid));
-
-        let instance_id = Uuid::new_v4();
-        let statuses = vec![InstanceLockStatus {
-            instance_id,
-            lock_uuid: Some(lock_uuid),
-            lock_expires_at: Some(Utc::now() - chrono::Duration::seconds(60)),
-        }];
-        assert!(
-            runloop.lock_mismatches(&statuses).is_empty(),
-            "matching lock UUID should not evict solely due to stale expiry"
-        );
-
-        let mismatched = vec![InstanceLockStatus {
-            instance_id,
-            lock_uuid: Some(Uuid::new_v4()),
-            lock_expires_at: Some(Utc::now() + chrono::Duration::seconds(60)),
-        }];
-        let evict_ids = runloop.lock_mismatches(&mismatched);
-        assert_eq!(evict_ids, HashSet::from([instance_id]));
-    }
-}
+mod tests;
diff --git a/crates/waymark/src/waymark_core/runloop/tests.rs b/crates/waymark/src/waymark_core/runloop/tests.rs
new file mode 100644
index 00000000..53d6c634
--- /dev/null
+++ b/crates/waymark/src/waymark_core/runloop/tests.rs
@@ -0,0 +1,964 @@
+use super::*;
+use std::collections::{HashMap, VecDeque};
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use chrono::Utc;
+use prost::Message;
+use sha2::{Digest, Sha256};
+use waymark_backend_fault_injection::FaultInjectingBackend;
+use waymark_backend_memory::MemoryBackend;
+use waymark_core_backend::{ActionAttemptStatus, CoreBackend};
+use waymark_workflow_registry_backend::WorkflowRegistration;
+
+use crate::messages::ast as ir;
+use crate::workers::ActionCallable;
+
+use waymark_dag::convert_to_dag;
+use waymark_ir_parser::parse_program;
+use waymark_runner_state::NodeStatus;
+use waymark_runner_state::RunnerState;
+
+fn default_test_config(lock_uuid: Uuid) -> RunLoopSupervisorConfig {
+    RunLoopSupervisorConfig {
+        max_concurrent_instances: 25,
+        executor_shards: 1,
+        instance_done_batch_size: None,
+        poll_interval: Duration::from_millis(10),
+        persistence_interval: Duration::from_millis(10),
+        lock_uuid,
+        lock_ttl: Duration::from_secs(15),
+        lock_heartbeat: Duration::from_secs(5),
+        evict_sleep_threshold: Duration::from_secs(10),
+        skip_sleep: false,
+        active_instance_gauge: None,
+    }
+}
+
+#[tokio::test]
+async fn test_runloop_executes_actions() {
+    let source = r#"
+fn main(input: [x], output: [y]):
+    y = @tests.fixtures.test_actions.double(value=x)
+    return y
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let _ = state
+        .record_assignment(
+            vec!["x".to_string()],
+            &ir::Expr {
+                kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                    value: Some(ir::literal::Value::IntValue(4)),
+                })),
+                span: None,
+            },
+            None,
+            Some("input x = 4".to_string()),
+        )
+        .expect("record assignment");
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let mut actions: HashMap<String, ActionCallable> = HashMap::new();
+    actions.insert(
+        "double".to_string(),
+        Arc::new(|kwargs| {
+            Box::pin(async move {
+                let value = kwargs
+                    .get("value")
+                    .and_then(|value| value.as_i64())
+                    .unwrap_or(0);
+                Ok(Value::Number((value * 2).into()))
+            })
+        }),
+    );
+    let worker_pool = crate::workers::InlineWorkerPool::new(actions);
+
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        RunLoopSupervisorConfig {
+            max_concurrent_instances: 25,
+            executor_shards: 1,
+            instance_done_batch_size: None,
+            poll_interval: Duration::from_secs_f64(0.0),
+            persistence_interval: Duration::from_secs_f64(0.1),
+            lock_uuid: Uuid::new_v4(),
+            lock_ttl: Duration::from_secs(15),
+            lock_heartbeat: Duration::from_secs(5),
+            evict_sleep_threshold: Duration::from_secs(10),
+            skip_sleep: false,
+            active_instance_gauge: None,
+        },
+    );
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id: Uuid::new_v4(),
+        scheduled_at: None,
+    });
+
+    tracing::info!("1");
+
+    runloop.run().await.expect("runloop");
+
+    tracing::info!("1");
+
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    let done = &instances_done[0];
+    let output = done.result.clone().expect("instance result");
+    let Value::Object(map) = output else {
+        panic!("expected output object");
+    };
+    assert_eq!(map.get("y"), Some(&Value::Number(8.into())));
+}
+
+#[tokio::test]
+async fn test_runloop_times_out_action_and_persists_timestamps() {
+    let source = r#"
+fn main(input: [], output: [y]):
+    y = @tests.fixtures.test_actions.hang()[timeout: 1 s]
+    return y
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test_timeout".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let mut actions: HashMap<String, ActionCallable> = HashMap::new();
+    actions.insert(
+        "hang".to_string(),
+        Arc::new(|_kwargs| {
+            Box::pin(async move {
+                tokio::time::sleep(Duration::from_secs(5)).await;
+                Ok(Value::String("late".to_string()))
+            })
+        }),
+    );
+    let worker_pool = crate::workers::InlineWorkerPool::new(actions);
+
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        RunLoopSupervisorConfig {
+            max_concurrent_instances: 25,
+            executor_shards: 1,
+            instance_done_batch_size: None,
+            poll_interval: Duration::from_secs_f64(0.0),
+            persistence_interval: Duration::from_secs_f64(0.05),
+            lock_uuid: Uuid::new_v4(),
+            lock_ttl: Duration::from_secs(15),
+            lock_heartbeat: Duration::from_secs(5),
+            evict_sleep_threshold: Duration::from_secs(10),
+            skip_sleep: false,
+            active_instance_gauge: None,
+        },
+    );
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id: Uuid::new_v4(),
+        scheduled_at: None,
+    });
+
+    runloop.run().await.expect("runloop");
+
+    let actions_done = backend.actions_done();
+    assert_eq!(actions_done.len(), 1);
+    let action_done = &actions_done[0];
+    assert_eq!(action_done.status, ActionAttemptStatus::TimedOut);
+    assert!(action_done.started_at.is_some());
+    assert!(action_done.completed_at.is_some());
+    assert!(action_done.duration_ms.is_some());
+
+    let execution_id = action_done.execution_id;
+    let graph_updates = backend.graph_updates();
+    let mut saw_running_snapshot = false;
+    let mut saw_failed_snapshot = false;
+    for update in graph_updates {
+        let Some(node) = update.nodes.get(&execution_id) else {
+            continue;
+        };
+        if node.status == NodeStatus::Running && node.started_at.is_some() {
+            saw_running_snapshot = true;
+        }
+        if node.status == NodeStatus::Failed
+            && node.started_at.is_some()
+            && node.completed_at.is_some()
+        {
+            saw_failed_snapshot = true;
+        }
+    }
+    assert!(saw_running_snapshot, "expected running graph snapshot");
+    assert!(saw_failed_snapshot, "expected failed graph snapshot");
+
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    assert!(instances_done[0].result.is_none());
+    let Value::Object(error_obj) = instances_done[0]
+        .error
+        .clone()
+        .expect("instance error payload")
+    else {
+        panic!("expected error payload object");
+    };
+    assert_eq!(
+        error_obj.get("type"),
+        Some(&Value::String("ActionTimeout".to_string()))
+    );
+}
+
+#[tokio::test]
+async fn test_runloop_marks_instance_failed_on_executor_error() {
+    let source = r#"
+fn main(input: [x], output: [y]):
+    y = @tests.fixtures.test_actions.double(value=x)
+    return y
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    // Intentionally omit input assignment so action kwarg resolution fails at runtime.
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        RunLoopSupervisorConfig {
+            max_concurrent_instances: 25,
+            executor_shards: 1,
+            instance_done_batch_size: None,
+            poll_interval: Duration::from_secs_f64(0.0),
+            persistence_interval: Duration::from_secs_f64(0.1),
+            lock_uuid: Uuid::new_v4(),
+            lock_ttl: Duration::from_secs(15),
+            lock_heartbeat: Duration::from_secs(5),
+            evict_sleep_threshold: Duration::from_secs(10),
+            skip_sleep: false,
+            active_instance_gauge: None,
+        },
+    );
+    let instance_id = Uuid::new_v4();
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id,
+        scheduled_at: None,
+    });
+
+    runloop
+        .run()
+        .await
+        .expect("runloop should continue after instance failure");
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+
+    let done = &instances_done[0];
+    assert_eq!(done.executor_id, instance_id);
+    assert!(done.result.is_none());
+    let error = done.error.as_ref().expect("instance error");
+    let Value::Object(error_obj) = error else {
+        panic!("expected error payload object");
+    };
+    assert_eq!(
+        error_obj.get("type"),
+        Some(&Value::String("ExecutionError".to_string()))
+    );
+    let message = error_obj
+        .get("message")
+        .and_then(Value::as_str)
+        .expect("error message");
+    assert!(message.contains("variable not found: x"));
+}
+
+#[tokio::test]
+async fn test_runloop_executes_for_loop_action_assignments() {
+    let source = r#"
+fn main(input: [limit], output: [result]):
+    current = 0
+    iterations = 0
+    for _ in range(limit):
+        current = @tests.fixtures.test_actions.increment(value=current)
+        iterations = iterations + 1
+    result = @tests.fixtures.test_actions.pack(limit=limit, final=current, iterations=iterations)
+    return result
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let _ = state
+        .record_assignment(
+            vec!["limit".to_string()],
+            &ir::Expr {
+                kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                    value: Some(ir::literal::Value::IntValue(4)),
+                })),
+                span: None,
+            },
+            None,
+            Some("input limit = 4".to_string()),
+        )
+        .expect("record assignment");
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test_loop_actions".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let mut actions: HashMap<String, ActionCallable> = HashMap::new();
+    let increment_inputs = Arc::new(Mutex::new(Vec::new()));
+    let increment_inputs_clone = Arc::clone(&increment_inputs);
+    actions.insert(
+        "increment".to_string(),
+        Arc::new(move |kwargs| {
+            let increment_inputs = Arc::clone(&increment_inputs_clone);
+            Box::pin(async move {
+                let value = kwargs
+                    .get("value")
+                    .and_then(|value| value.as_i64())
+                    .unwrap_or(0);
+                increment_inputs
+                    .lock()
+                    .expect("increment inputs lock")
+                    .push(value);
+                Ok(Value::Number((value + 1).into()))
+            })
+        }),
+    );
+    actions.insert(
+        "pack".to_string(),
+        Arc::new(|kwargs| {
+            Box::pin(async move {
+                let limit = kwargs.get("limit").cloned().unwrap_or(Value::Null);
+                let final_value = kwargs.get("final").cloned().unwrap_or(Value::Null);
+                let iterations = kwargs.get("iterations").cloned().unwrap_or(Value::Null);
+                Ok(Value::Object(
+                    [
+                        ("limit".to_string(), limit),
+                        ("final".to_string(), final_value),
+                        ("iterations".to_string(), iterations),
+                    ]
+                    .into_iter()
+                    .collect(),
+                ))
+            })
+        }),
+    );
+    let worker_pool = crate::workers::InlineWorkerPool::new(actions);
+
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        RunLoopSupervisorConfig {
+            max_concurrent_instances: 25,
+            executor_shards: 1,
+            instance_done_batch_size: None,
+            poll_interval: Duration::from_secs_f64(0.0),
+            persistence_interval: Duration::from_secs_f64(0.1),
+            lock_uuid: Uuid::new_v4(),
+            lock_ttl: Duration::from_secs(15),
+            lock_heartbeat: Duration::from_secs(5),
+            evict_sleep_threshold: Duration::from_secs(10),
+            skip_sleep: false,
+            active_instance_gauge: None,
+        },
+    );
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id: Uuid::new_v4(),
+        scheduled_at: None,
+    });
+
+    runloop.run().await.expect("runloop");
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    let done = &instances_done[0];
+    let output = done.result.clone().expect("instance result");
+    let Value::Object(map) = output else {
+        panic!("expected output object");
+    };
+    let Value::Object(result_map) = map
+        .get("result")
+        .cloned()
+        .expect("result payload should include result")
+    else {
+        panic!("expected nested result object");
+    };
+    assert_eq!(
+        *increment_inputs.lock().expect("increment inputs lock"),
+        vec![0, 1, 2, 3]
+    );
+    assert_eq!(result_map.get("limit"), Some(&Value::Number(4.into())));
+    assert_eq!(result_map.get("final"), Some(&Value::Number(4.into())));
+    assert_eq!(result_map.get("iterations"), Some(&Value::Number(4.into())));
+}
+
+#[tokio::test]
+async fn test_instance_poller_send_unblocks_on_stop_notification() {
+    let (instance_tx, mut instance_rx) = mpsc::channel::<InstanceMessage>(1);
+    instance_tx
+        .send(InstanceMessage::Batch {
+            instances: Vec::new(),
+        })
+        .await
+        .expect("seed channel");
+
+    let shutdown_token = tokio_util::sync::CancellationToken::new();
+    let send_task = tokio::spawn({
+        let instance_tx = instance_tx.clone();
+        let shutdown_token = shutdown_token.clone();
+        async move {
+            send_with_stop(
+                &instance_tx,
+                InstanceMessage::Batch {
+                    instances: Vec::new(),
+                },
+                shutdown_token.cancelled(),
+                "instance message",
+            )
+            .await
+        }
+    });
+
+    tokio::time::sleep(Duration::from_millis(20)).await;
+    shutdown_token.cancel();
+    let sent = tokio::time::timeout(Duration::from_millis(300), send_task)
+        .await
+        .expect("send task should complete")
+        .expect("send task should not panic");
+    assert!(!sent, "send should abort when stop is notified");
+
+    let _ = instance_rx.recv().await;
+}
+
+#[tokio::test]
+async fn test_instance_poller_send_succeeds_when_channel_has_capacity() {
+    let (instance_tx, mut instance_rx) = mpsc::channel::<InstanceMessage>(1);
+    let shutdown_token = tokio_util::sync::CancellationToken::new();
+    let sent = send_with_stop(
+        &instance_tx,
+        InstanceMessage::Batch {
+            instances: Vec::new(),
+        },
+        shutdown_token.cancelled(),
+        "instance message",
+    )
+    .await;
+    assert!(sent);
+
+    let received = instance_rx.recv().await.expect("queued message");
+    match received {
+        InstanceMessage::Batch { instances } => assert!(instances.is_empty()),
+        InstanceMessage::Error(err) => panic!("unexpected error message: {err}"),
+    }
+}
+
+#[tokio::test]
+async fn test_runloop_supervisor_restarts_on_depth_limit_backend_errors() {
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend =
+        FaultInjectingBackend::with_depth_limit_poll_failures(MemoryBackend::with_queue(queue));
+    let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
+    let shutdown_token = tokio_util::sync::CancellationToken::new();
+
+    let supervisor = tokio::spawn(runloop_supervisor(
+        backend.clone(),
+        worker_pool,
+        default_test_config(Uuid::new_v4()),
+        shutdown_token.clone(),
+    ));
+
+    tokio::time::sleep(Duration::from_millis(750)).await;
+    shutdown_token.cancel();
+    tokio::time::timeout(Duration::from_secs(2), supervisor)
+        .await
+        .expect("supervisor should stop")
+        .expect("supervisor task should not panic");
+
+    assert!(
+        backend.get_queued_instances_calls() >= 2,
+        "expected multiple polling attempts while supervisor restarts"
+    );
+}
+
+#[tokio::test]
+async fn test_runloop_supervisor_reproduces_no_progress_with_continued_queue_growth() {
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend =
+        FaultInjectingBackend::with_depth_limit_poll_failures(MemoryBackend::with_queue(queue));
+    let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
+    let shutdown_token = tokio_util::sync::CancellationToken::new();
+
+    let supervisor = tokio::spawn(runloop_supervisor(
+        backend.clone(),
+        worker_pool,
+        default_test_config(Uuid::new_v4()),
+        shutdown_token.clone(),
+    ));
+
+    for _ in 0..20 {
+        backend
+            .queue_instances(&[QueuedInstance {
+                workflow_version_id: Uuid::new_v4(),
+                schedule_id: None,
+                dag: None,
+                entry_node: Uuid::new_v4(),
+                state: None,
+                action_results: HashMap::new(),
+                instance_id: Uuid::new_v4(),
+                scheduled_at: None,
+            }])
+            .await
+            .expect("queue synthetic instance");
+    }
+
+    tokio::time::sleep(Duration::from_millis(500)).await;
+    shutdown_token.cancel();
+    tokio::time::timeout(Duration::from_secs(2), supervisor)
+        .await
+        .expect("supervisor should stop")
+        .expect("supervisor task should not panic");
+
+    assert!(
+        backend.get_queued_instances_calls() >= 1,
+        "expected polling attempts during stall simulation"
+    );
+    assert!(
+        backend.queue_len() >= 20,
+        "queued work should continue to grow when poller cannot read instances"
+    );
+    assert_eq!(
+        backend.instances_done_len(),
+        0,
+        "no instances should complete while poller is failing"
+    );
+}
+
+#[tokio::test]
+async fn test_runloop_marks_instance_failed_when_rehydrated_state_is_missing_action_result() {
+    let source = r#"
+fn main(input: [x], output: [y]):
+    y = @tests.fixtures.test_actions.double(value=x)
+    return y
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let _ = state
+        .record_assignment(
+            vec!["x".to_string()],
+            &ir::Expr {
+                kind: Some(ir::expr::Kind::Literal(ir::Literal {
+                    value: Some(ir::literal::Value::IntValue(4)),
+                })),
+                span: None,
+            },
+            None,
+            Some("input x = 4".to_string()),
+        )
+        .expect("record assignment");
+    let template_entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&template_entry_node, None)
+        .expect("queue entry node");
+    let mut bootstrap_executor = RunnerExecutor::new(Arc::clone(&dag), state, HashMap::new(), None);
+    let bootstrap_step = bootstrap_executor
+        .increment(&[entry_exec.node_id])
+        .expect("bootstrap increment should materialize action node");
+    let action_exec = bootstrap_step
+        .actions
+        .first()
+        .expect("bootstrap should queue one action call")
+        .clone();
+
+    // Simulate a reclaimed instance whose graph says the action execution node
+    // has finished, but action_results payload was lost.
+    bootstrap_executor
+        .state_mut()
+        .mark_completed(action_exec.node_id)
+        .expect("mark action completed");
+    bootstrap_executor.state_mut().ready_queue.clear();
+    assert!(
+        bootstrap_executor
+            .state()
+            .nodes
+            .get(&action_exec.node_id)
+            .is_some_and(|node| node.is_action_call() && node.status == NodeStatus::Completed),
+        "expected completed action execution node"
+    );
+    let state = bootstrap_executor.state().clone();
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test_missing_action_result".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        default_test_config(Uuid::new_v4()),
+    );
+    let instance_id = Uuid::new_v4();
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: action_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id,
+        scheduled_at: None,
+    });
+
+    runloop
+        .run()
+        .await
+        .expect("runloop should continue after instance failure");
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    assert_eq!(instances_done[0].executor_id, instance_id);
+    let Value::Object(error_obj) = instances_done[0]
+        .error
+        .clone()
+        .expect("instance error payload")
+    else {
+        panic!("expected error payload object");
+    };
+    assert_eq!(
+        error_obj.get("type"),
+        Some(&Value::String("ExecutionError".to_string()))
+    );
+    let message = error_obj
+        .get("message")
+        .and_then(Value::as_str)
+        .expect("error message");
+    assert!(
+        message.contains("missing action result for"),
+        "expected missing action result error, got: {message}"
+    );
+}
+
+#[tokio::test]
+async fn test_runloop_marks_instance_failed_with_dict_key_error() {
+    let source = r#"
+fn main(input: [], output: [result]):
+    payload = @tests.fixtures.test_actions.make_payload()
+    result = payload["missing"]
+    return result
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test_dict_key_error".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let mut actions: HashMap<String, ActionCallable> = HashMap::new();
+    actions.insert(
+        "make_payload".to_string(),
+        Arc::new(|_kwargs| {
+            Box::pin(async move {
+                Ok(Value::Object(
+                    [("present".to_string(), Value::Number(1.into()))]
+                        .into_iter()
+                        .collect(),
+                ))
+            })
+        }),
+    );
+    let worker_pool = crate::workers::InlineWorkerPool::new(actions);
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        default_test_config(Uuid::new_v4()),
+    );
+    let instance_id = Uuid::new_v4();
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id,
+        scheduled_at: None,
+    });
+
+    runloop
+        .run()
+        .await
+        .expect("runloop should continue after instance failure");
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    assert_eq!(instances_done[0].executor_id, instance_id);
+    let Value::Object(error_obj) = instances_done[0]
+        .error
+        .clone()
+        .expect("instance error payload")
+    else {
+        panic!("expected error payload object");
+    };
+    let message = error_obj
+        .get("message")
+        .and_then(Value::as_str)
+        .expect("error message");
+    assert!(
+        message.contains("dict has no key"),
+        "expected dict key error, got: {message}"
+    );
+}
+
+#[tokio::test]
+async fn test_runloop_marks_instance_failed_with_attribute_error() {
+    let source = r#"
+fn main(input: [], output: [result]):
+    payload = @tests.fixtures.test_actions.make_number()
+    result = payload.missing
+    return result
+"#;
+    let program = parse_program(source.trim()).expect("parse program");
+    let program_proto = program.encode_to_vec();
+    let ir_hash = format!("{:x}", Sha256::digest(&program_proto));
+    let dag = Arc::new(convert_to_dag(&program).expect("convert to dag"));
+
+    let mut state = RunnerState::new(Some(Arc::clone(&dag)), None, None, false);
+    let entry_node = dag
+        .entry_node
+        .as_ref()
+        .expect("DAG entry node not found")
+        .clone();
+    let entry_exec = state
+        .queue_template_node(&entry_node, None)
+        .expect("queue entry node");
+
+    let queue = Arc::new(Mutex::new(VecDeque::new()));
+    let backend = MemoryBackend::with_queue(queue.clone());
+    let workflow_version_id = backend
+        .upsert_workflow_version(&WorkflowRegistration {
+            workflow_name: "test_attribute_error".to_string(),
+            workflow_version: ir_hash.clone(),
+            ir_hash,
+            program_proto,
+            concurrent: false,
+        })
+        .await
+        .expect("register workflow version");
+
+    let mut actions: HashMap<String, ActionCallable> = HashMap::new();
+    actions.insert(
+        "make_number".to_string(),
+        Arc::new(|_kwargs| Box::pin(async move { Ok(Value::Number(7.into())) })),
+    );
+    let worker_pool = crate::workers::InlineWorkerPool::new(actions);
+    let mut runloop = RunLoop::new(
+        worker_pool,
+        backend.clone(),
+        default_test_config(Uuid::new_v4()),
+    );
+    let instance_id = Uuid::new_v4();
+    queue.lock().expect("queue lock").push_back(QueuedInstance {
+        workflow_version_id,
+        schedule_id: None,
+        dag: None,
+        entry_node: entry_exec.node_id,
+        state: Some(state),
+        action_results: HashMap::new(),
+        instance_id,
+        scheduled_at: None,
+    });
+
+    runloop
+        .run()
+        .await
+        .expect("runloop should continue after instance failure");
+    let instances_done = backend.instances_done();
+    assert_eq!(instances_done.len(), 1);
+    assert_eq!(instances_done[0].executor_id, instance_id);
+    let Value::Object(error_obj) = instances_done[0]
+        .error
+        .clone()
+        .expect("instance error payload")
+    else {
+        panic!("expected error payload object");
+    };
+    let message = error_obj
+        .get("message")
+        .and_then(Value::as_str)
+        .expect("error message");
+    assert!(
+        message.contains("attribute not found"),
+        "expected attribute error, got: {message}"
+    );
+}
+
+#[test]
+fn test_lock_mismatches_ignores_expired_lock_with_matching_owner() {
+    let backend = MemoryBackend::new();
+    let worker_pool = crate::workers::InlineWorkerPool::new(HashMap::new());
+    let lock_uuid = Uuid::new_v4();
+    let runloop = RunLoop::new(worker_pool, backend, default_test_config(lock_uuid));
+
+    let instance_id = Uuid::new_v4();
+    let statuses = vec![InstanceLockStatus {
+        instance_id,
+        lock_uuid: Some(lock_uuid),
+        lock_expires_at: Some(Utc::now() - chrono::Duration::seconds(60)),
+    }];
+    assert!(
+        runloop.lock_mismatches(&statuses).is_empty(),
+        "matching lock UUID should not evict solely due to stale expiry"
+    );
+
+    let mismatched = vec![InstanceLockStatus {
+        instance_id,
+        lock_uuid: Some(Uuid::new_v4()),
+        lock_expires_at: Some(Utc::now() + chrono::Duration::seconds(60)),
+    }];
+    let evict_ids = runloop.lock_mismatches(&mismatched);
+    assert_eq!(evict_ids, HashSet::from([instance_id]));
+}
diff --git a/crates/waymark/src/webapp/server.rs b/crates/waymark/src/webapp/server.rs
index 43818ca3..9afda350 100644
--- a/crates/waymark/src/webapp/server.rs
+++ b/crates/waymark/src/webapp/server.rs
@@ -16,12 +16,15 @@ use tera::{Context as TeraContext, Tera};
 use tokio::net::TcpListener;
 use tracing::{error, info};
 use uuid::Uuid;
+use waymark_webapp_backend::WebappBackend;
+use waymark_webapp_core::WorkerStatus;
 
-use super::types::{
+use waymark_webapp_core::{
     ActionLogsResponse, FilterValuesResponse, HealthResponse, InstanceExportInfo, TimelineEntry,
-    WebappConfig, WorkflowInstanceExport, WorkflowRunDataResponse,
+    WorkflowInstanceExport, WorkflowRunDataResponse,
 };
-use crate::backends::WebappBackend;
+
+use crate::WebappConfig;
 
 // Embed templates at compile time
 const TEMPLATE_BASE: &str = include_str!("../../templates/base.html");
@@ -367,7 +370,7 @@ async fn get_action_logs(
     let logs: Vec<_> = timeline
         .into_iter()
         .filter(|e| e.action_id == action_id_str)
-        .map(|e| super::types::ActionLogEntry {
+        .map(|e| waymark_webapp_core::ActionLogEntry {
             action_id: e.action_id,
             action_name: e.action_name,
             module_name: e.module_name,
@@ -736,7 +739,7 @@ struct InvocationRow {
 
 fn render_invocations_page(
     templates: &Tera,
-    instances: &[super::types::InstanceSummary],
+    instances: &[waymark_webapp_core::InstanceSummary],
     current_page: i64,
     total_pages: i64,
     search_query: Option<String>,
@@ -812,8 +815,8 @@ struct GraphNode {
 
 fn render_instance_detail_page(
     templates: &Tera,
-    instance: &super::types::InstanceDetail,
-    graph: Option<super::types::ExecutionGraphView>,
+    instance: &waymark_webapp_core::InstanceDetail,
+    graph: Option<waymark_webapp_core::ExecutionGraphView>,
 ) -> String {
     let graph_data = graph
         .as_ref()
@@ -843,8 +846,8 @@ fn render_instance_detail_page(
     render_template(templates, "workflow_run.html", &context)
 }
 
-fn build_graph_data(graph: &super::types::ExecutionGraphView) -> GraphData {
-    let action_nodes: Vec<&super::types::ExecutionNodeView> = graph
+fn build_graph_data(graph: &waymark_webapp_core::ExecutionGraphView) -> GraphData {
+    let action_nodes: Vec<&waymark_webapp_core::ExecutionNodeView> = graph
         .nodes
         .iter()
         .filter(|node| is_action_node(&node.node_type))
@@ -1055,7 +1058,7 @@ struct ScheduleRow {
 
 fn render_schedules_page(
     templates: &Tera,
-    schedules: &[super::types::ScheduleSummary],
+    schedules: &[waymark_webapp_core::ScheduleSummary],
     current_page: i64,
     total_pages: i64,
     total_count: i64,
@@ -1136,8 +1139,8 @@ struct ScheduleInvocationRow {
 
 fn render_schedule_detail_page(
     templates: &Tera,
-    schedule: &super::types::ScheduleDetail,
-    invocations: &[super::types::ScheduleInvocationSummary],
+    schedule: &waymark_webapp_core::ScheduleDetail,
+    invocations: &[waymark_webapp_core::ScheduleInvocationSummary],
     current_page: i64,
     total_pages: i64,
 ) -> String {
@@ -1234,11 +1237,7 @@ struct WorkerInstanceRowView {
     updated_at: String,
 }
 
-fn render_workers_page(
-    templates: &Tera,
-    statuses: &[super::WorkerStatus],
-    window_minutes: i64,
-) -> String {
+fn render_workers_page(templates: &Tera, statuses: &[WorkerStatus], window_minutes: i64) -> String {
     use crate::pool_status::PoolTimeSeries;
 
     // Build action rows
@@ -1373,13 +1372,15 @@ mod tests {
     use sqlx::postgres::PgPoolOptions;
     use tower::util::ServiceExt;
     use uuid::Uuid;
+    use waymark_backend_memory::MemoryBackend;
+    use waymark_backend_postgres::PostgresBackend;
+    use waymark_webapp_backend::WebappBackend;
+    use waymark_worker_status_backend::{WorkerStatusBackend as _, WorkerStatusUpdate};
 
     use super::{WebappState, build_graph_data, build_router, init_templates};
-    use crate::backends::{
-        MemoryBackend, PostgresBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate,
-    };
-    use crate::test_support::postgres_setup;
-    use crate::webapp::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView};
+
+    use waymark_test_support::postgres_setup;
+    use waymark_webapp_core::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView};
 
     #[test]
     fn build_graph_data_projects_internal_nodes_to_action_dependencies() {
diff --git a/crates/waymark/src/webapp/types.rs b/crates/waymark/src/webapp/types.rs
index 7805c428..0b2ec6e8 100644
--- a/crates/waymark/src/webapp/types.rs
+++ b/crates/waymark/src/webapp/types.rs
@@ -1,8 +1,4 @@
-//! Shared types for the webapp.
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use uuid::Uuid;
+//! Shared types for the webapp server.
 
 /// Configuration for the webapp server.
 #[derive(Debug, Clone)]
@@ -55,245 +51,3 @@ impl WebappConfig {
         format!("{}:{}", self.host, self.port)
     }
 }
-
-/// Instance status.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum InstanceStatus {
-    Queued,
-    Running,
-    Completed,
-    Failed,
-}
-
-impl std::fmt::Display for InstanceStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Queued => write!(f, "queued"),
-            Self::Running => write!(f, "running"),
-            Self::Completed => write!(f, "completed"),
-            Self::Failed => write!(f, "failed"),
-        }
-    }
-}
-
-/// Summary of a workflow instance for listing.
-#[derive(Debug, Clone, Serialize)]
-pub struct InstanceSummary {
-    pub id: Uuid,
-    pub entry_node: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-    pub workflow_name: Option<String>,
-    pub input_preview: String,
-}
-
-/// Full details of a workflow instance.
-#[derive(Debug, Clone, Serialize)]
-pub struct InstanceDetail {
-    pub id: Uuid,
-    pub entry_node: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-    pub workflow_name: Option<String>,
-    pub input_payload: String,
-    pub result_payload: String,
-    pub error_payload: Option<String>,
-}
-
-/// Node in the execution graph for display.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionNodeView {
-    pub id: String,
-    pub node_type: String,
-    pub label: String,
-    pub status: String,
-    pub action_name: Option<String>,
-    pub module_name: Option<String>,
-}
-
-/// Edge in the execution graph for display.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionEdgeView {
-    pub source: String,
-    pub target: String,
-    pub edge_type: String,
-}
-
-/// Execution graph data for rendering.
-#[derive(Debug, Clone, Serialize)]
-pub struct ExecutionGraphView {
-    pub nodes: Vec<ExecutionNodeView>,
-    pub edges: Vec<ExecutionEdgeView>,
-}
-
-/// Timeline entry for an action execution.
-#[derive(Debug, Clone, Serialize)]
-pub struct TimelineEntry {
-    pub action_id: String,
-    pub action_name: String,
-    pub module_name: Option<String>,
-    pub status: String,
-    pub attempt_number: i32,
-    pub dispatched_at: Option<String>,
-    pub completed_at: Option<String>,
-    pub duration_ms: Option<i64>,
-    pub request_preview: String,
-    pub response_preview: String,
-    pub error: Option<String>,
-}
-
-/// Action log entry with full details.
-#[derive(Debug, Clone, Serialize)]
-pub struct ActionLogEntry {
-    pub action_id: String,
-    pub action_name: String,
-    pub module_name: Option<String>,
-    pub status: String,
-    pub attempt_number: i32,
-    pub dispatched_at: Option<String>,
-    pub completed_at: Option<String>,
-    pub duration_ms: Option<i64>,
-    pub request: String,
-    pub response: String,
-    pub error: Option<String>,
-}
-
-/// Response for the workflow run data API.
-#[derive(Debug, Serialize)]
-pub struct WorkflowRunDataResponse {
-    pub nodes: Vec<ExecutionNodeView>,
-    pub timeline: Vec<TimelineEntry>,
-    pub page: i64,
-    pub per_page: i64,
-    pub total: i64,
-    pub has_more: bool,
-}
-
-/// Response for action logs API.
-#[derive(Debug, Serialize)]
-pub struct ActionLogsResponse {
-    pub logs: Vec<ActionLogEntry>,
-}
-
-/// Filter values response.
-#[derive(Debug, Serialize)]
-pub struct FilterValuesResponse {
-    pub values: Vec<String>,
-}
-
-/// Health check response.
-#[derive(Debug, Serialize)]
-pub struct HealthResponse {
-    pub status: &'static str,
-    pub service: &'static str,
-}
-
-/// Export format for a workflow instance.
-#[derive(Debug, Serialize)]
-pub struct WorkflowInstanceExport {
-    pub export_version: &'static str,
-    pub exported_at: String,
-    pub instance: InstanceExportInfo,
-    pub nodes: Vec<ExecutionNodeView>,
-    pub timeline: Vec<TimelineEntry>,
-}
-
-/// Full worker status for webapp display.
-#[derive(Debug, Clone)]
-pub struct WorkerStatus {
-    pub pool_id: Uuid,
-    pub active_workers: i32,
-    pub throughput_per_min: f64,
-    pub actions_per_sec: f64,
-    pub total_completed: i64,
-    pub last_action_at: Option<DateTime<Utc>>,
-    pub updated_at: DateTime<Utc>,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub dispatch_queue_size: Option<i64>,
-    pub total_in_flight: Option<i64>,
-    pub median_instance_duration_secs: Option<f64>,
-    pub active_instance_count: i32,
-    pub total_instances_completed: i64,
-    pub instances_per_sec: f64,
-    pub instances_per_min: f64,
-    pub time_series: Option<Vec<u8>>,
-}
-
-/// Worker action stats row for display.
-#[derive(Debug, Clone)]
-pub struct WorkerActionRow {
-    pub pool_id: String,
-    pub active_workers: i64,
-    pub actions_per_sec: String,
-    pub throughput_per_min: i64,
-    pub total_completed: i64,
-    pub median_dequeue_ms: Option<i64>,
-    pub median_handling_ms: Option<i64>,
-    pub last_action_at: Option<String>,
-    pub updated_at: String,
-}
-
-/// Aggregate worker stats for overview cards.
-#[derive(Debug, Clone)]
-pub struct WorkerAggregateStats {
-    pub active_worker_count: i64,
-    pub actions_per_sec: String,
-    pub total_in_flight: i64,
-    pub total_queue_depth: i64,
-}
-
-/// Instance info for export.
-#[derive(Debug, Serialize)]
-pub struct InstanceExportInfo {
-    pub id: String,
-    pub status: String,
-    pub created_at: String,
-    pub input_payload: String,
-    pub result_payload: String,
-}
-
-/// Schedule summary for listing.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleSummary {
-    pub id: String,
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: String,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub status: String,
-    pub next_run_at: Option<String>,
-    pub last_run_at: Option<String>,
-    pub created_at: String,
-}
-
-/// Full schedule details.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleDetail {
-    pub id: String,
-    pub workflow_name: String,
-    pub schedule_name: String,
-    pub schedule_type: String,
-    pub cron_expression: Option<String>,
-    pub interval_seconds: Option<i64>,
-    pub jitter_seconds: i64,
-    pub status: String,
-    pub next_run_at: Option<String>,
-    pub last_run_at: Option<String>,
-    pub last_instance_id: Option<String>,
-    pub created_at: String,
-    pub updated_at: String,
-    pub priority: i32,
-    pub allow_duplicate: bool,
-    pub input_payload: Option<String>,
-}
-
-/// Invocation summary row for schedule detail pages.
-#[derive(Debug, Clone, Serialize)]
-pub struct ScheduleInvocationSummary {
-    pub id: Uuid,
-    pub created_at: DateTime<Utc>,
-    pub status: InstanceStatus,
-}
diff --git a/crates/waymark/src/workers/status.rs b/crates/waymark/src/workers/status.rs
index dfd0a962..03fec671 100644
--- a/crates/waymark/src/workers/status.rs
+++ b/crates/waymark/src/workers/status.rs
@@ -7,11 +7,10 @@ use std::sync::{
 use std::time::Duration;
 
 use chrono::{DateTime, Utc};
-use tokio::sync::watch;
 use tracing::{info, warn};
 use uuid::Uuid;
+use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate};
 
-use crate::backends::{WorkerStatusBackend, WorkerStatusUpdate};
 use crate::pool_status::{PoolTimeSeries, TimeSeriesEntry};
 
 #[derive(Debug, Clone)]
@@ -37,7 +36,7 @@ pub fn spawn_status_reporter<B, P>(
     worker_pool: P,
     active_instances: Arc<AtomicUsize>,
     interval: Duration,
-    mut shutdown_rx: watch::Receiver<bool>,
+    shutdown: tokio_util::sync::WaitForCancellationFutureOwned,
 ) -> tokio::task::JoinHandle<()>
 where
     B: WorkerStatusBackend + Send + Sync + 'static,
@@ -54,6 +53,8 @@ where
             "status reporter started"
         );
 
+        let mut shutdown = std::pin::pin!(shutdown);
+
         loop {
             tokio::select! {
                 _ = ticker.tick() => {
@@ -99,11 +100,9 @@ where
                         warn!(error = %err, "failed to update worker status");
                     }
                 }
-                _ = shutdown_rx.changed() => {
-                    if *shutdown_rx.borrow() {
-                        info!("status reporter shutting down");
-                        break;
-                    }
+                _ = &mut shutdown => {
+                    info!("status reporter shutting down");
+                    break;
                 }
             }
         }