diff --git a/Cargo.lock b/Cargo.lock index e57393b9..08e29a10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1517,9 +1517,9 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "metrics" -version = "0.24.2" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dea7ac8057892855ec285c440160265225438c3c45072613c25a4b26e98ef5" +checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" dependencies = [ "ahash", "portable-atomic", @@ -1847,9 +1847,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -3366,9 +3366,123 @@ dependencies = [ "tracing-chrome", "tracing-subscriber", "uuid", + "waymark-backend-fault-injection", + "waymark-backend-memory", + "waymark-backend-postgres", + "waymark-backend-postgres-migrations", + "waymark-backends-core", + "waymark-core-backend", "waymark-dag", - "waymark-observability-macros", + "waymark-garbage-collector-backend", + "waymark-integration-support", + "waymark-ir-parser", + "waymark-observability", "waymark-proto", + "waymark-runner", + "waymark-runner-state", + "waymark-scheduler-backend", + "waymark-scheduler-core", + "waymark-test-support", + "waymark-webapp-backend", + "waymark-webapp-core", + "waymark-worker-status-backend", + "waymark-workflow-registry-backend", +] + +[[package]] +name = "waymark-backend-fault-injection" +version = "0.1.0" +dependencies = [ + "async-trait", + "uuid", + "waymark-backend-memory", + "waymark-backends-core", + "waymark-core-backend", + "waymark-workflow-registry-backend", +] + +[[package]] +name = "waymark-backend-memory" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "rmp-serde", + "serde_json", + "uuid", + "waymark-backends-core", + "waymark-core-backend", + "waymark-garbage-collector-backend", + "waymark-scheduler-backend", + "waymark-scheduler-core", + "waymark-webapp-backend", + "waymark-webapp-core", + "waymark-worker-status-backend", + "waymark-workflow-registry-backend", +] + +[[package]] +name = "waymark-backend-postgres" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "prost 0.12.6", + "rmp-serde", + "serde", + "serde_json", + "serial_test", + "sqlx", + "tokio", + "tracing", + "uuid", + "waymark-backend-postgres-migrations", + "waymark-backends-core", + "waymark-core-backend", + "waymark-dag", + "waymark-garbage-collector-backend", + "waymark-ir-parser", + "waymark-observability", + "waymark-proto", + "waymark-runner", + "waymark-runner-state", + "waymark-scheduler-backend", + "waymark-scheduler-core", + "waymark-test-support", + "waymark-webapp-backend", + "waymark-webapp-core", + "waymark-worker-status-backend", + "waymark-workflow-registry-backend", +] + +[[package]] +name = "waymark-backend-postgres-migrations" +version = "0.1.0" +dependencies = [ + "sqlx", +] + +[[package]] +name = "waymark-backends-core" +version = "0.1.0" +dependencies = [ + "serde_json", + "sqlx", + "thiserror", +] + +[[package]] +name = "waymark-core-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "serde", + "serde_json", + "uuid", + "waymark-backends-core", + "waymark-dag", + "waymark-runner-state", ] [[package]] @@ -3379,7 +3493,7 @@ dependencies = [ "serde", "thiserror", "uuid", - "waymark", + "waymark-ir-parser", "waymark-proto", ] @@ -3396,7 +3510,47 @@ dependencies = [ "tokio", "uuid", "waymark", + "waymark-backend-memory", + "waymark-core-backend", "waymark-dag", + "waymark-ir-parser", + "waymark-runner-state", + "waymark-workflow-registry-backend", +] + +[[package]] +name = "waymark-garbage-collector-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "waymark-backends-core", +] + +[[package]] +name = "waymark-integration-support" +version = "0.1.0" +dependencies = [ + "anyhow", + "sqlx", + "tokio", + "waymark-backend-postgres-migrations", +] + +[[package]] +name = "waymark-ir-parser" +version = "0.1.0" +dependencies = [ + "regex", + "waymark-proto", +] + +[[package]] +name = "waymark-observability" +version = "0.1.0" +dependencies = [ + "tracing", + "waymark-observability-macros", ] [[package]] @@ -3419,6 +3573,105 @@ dependencies = [ "tonic-build", ] +[[package]] +name = "waymark-runner" +version = "0.1.0" +dependencies = [ + "chrono", + "rustc-hash", + "serde_json", + "thiserror", + "tracing", + "uuid", + "waymark-backend-memory", + "waymark-core-backend", + "waymark-dag", + "waymark-ir-parser", + "waymark-observability", + "waymark-proto", + "waymark-runner-state", +] + +[[package]] +name = "waymark-runner-state" +version = "0.1.0" +dependencies = [ + "chrono", + "serde", + "serde_json", + "thiserror", + "uuid", + "waymark-dag", + "waymark-proto", +] + +[[package]] +name = "waymark-scheduler-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "uuid", + "waymark-backends-core", + "waymark-scheduler-core", +] + +[[package]] +name = "waymark-scheduler-core" +version = "0.1.0" +dependencies = [ + "chrono", + "cron", + "rand 0.8.5", + "serde", + "uuid", +] + +[[package]] +name = "waymark-test-support" +version = "0.1.0" +dependencies = [ + "sqlx", + "waymark-integration-support", +] + +[[package]] +name = "waymark-webapp-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "uuid", + "waymark-backends-core", + "waymark-webapp-core", +] + +[[package]] +name = "waymark-webapp-core" +version = "0.1.0" +dependencies = [ + "chrono", + "serde", + "uuid", +] + +[[package]] +name = "waymark-worker-status-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "uuid", + "waymark-backends-core", +] + +[[package]] +name = "waymark-workflow-registry-backend" +version = "0.1.0" +dependencies = [ + "async-trait", + "uuid", + "waymark-backends-core", +] + [[package]] name = "webpki-roots" version = "0.26.11" diff --git a/Cargo.toml b/Cargo.toml index ee36ff5d..4c881345 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,20 +4,49 @@ members = ["crates/*"] [workspace.dependencies] waymark = { path = "crates/waymark" } +waymark-backend-fault-injection = { path = "crates/backend-fault-injection" } +waymark-backend-memory = { path = "crates/backend-memory" } +waymark-backend-postgres = { path = "crates/backend-postgres" } +waymark-backend-postgres-migrations = { path = "crates/backend-postgres-migrations" } +waymark-backends-core = { path = "crates/backends-core" } +waymark-core-backend = { path = "crates/core-backend" } waymark-dag = { path = "crates/dag" } +waymark-garbage-collector-backend = { path = "crates/garbage-collector-backend" } +waymark-integration-support = { path = "crates/integration-support" } +waymark-ir-parser = { path = "crates/ir-parser" } +waymark-observability = { path = "crates/observability" } +waymark-observability-macros = { path = "crates/observability-macros" } waymark-proto = { path = "crates/proto" } +waymark-runner = { path = "crates/runner" } +waymark-runner-state = { path = "crates/runner-state" } +waymark-scheduler-backend = { path = "crates/scheduler-backend" } +waymark-scheduler-core = { path = "crates/scheduler-core" } +waymark-test-support = { path = "crates/test-support" } +waymark-webapp-backend = { path = "crates/webapp-backend" } +waymark-webapp-core = { path = "crates/webapp-core" } +waymark-worker-status-backend = { path = "crates/worker-status-backend" } +waymark-workflow-registry-backend = { path = "crates/workflow-registry-backend" } anyhow = "1" +async-trait = "0.1" +chrono = { version = "0.4", default-features = false } clap = "4.5" +cron = "0.12" proptest = "1.9" prost = "0.12" prost-types = "0.12" +rand = "0.8" +regex = "1" +rmp-serde = "1" rustc-hash = "2" serde = "1" serde_json = "1" +serial_test = "2" sha2 = "0.10" +sqlx = { version = "0.8", default-features = false } thiserror = "2" tokio = "1" tonic = "0.11" tonic-build = "0.11" +tracing = "0.1" uuid = "1" diff --git a/crates/backend-fault-injection/Cargo.toml b/crates/backend-fault-injection/Cargo.toml new file mode 100644 index 00000000..1b592ba1 --- /dev/null +++ b/crates/backend-fault-injection/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "waymark-backend-fault-injection" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-backend-memory = { workspace = true } +waymark-core-backend = { workspace = true } +waymark-workflow-registry-backend = { workspace = true } diff --git a/crates/backend-fault-injection/src/lib.rs b/crates/backend-fault-injection/src/lib.rs new file mode 100644 index 00000000..4a43d305 --- /dev/null +++ b/crates/backend-fault-injection/src/lib.rs @@ -0,0 +1,128 @@ +use std::sync::{ + Arc, + atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}, +}; + +use uuid::Uuid; +use waymark_backend_memory::MemoryBackend; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_core_backend::{ + CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstanceBatch, +}; +use waymark_workflow_registry_backend::{ + WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, +}; + +#[derive(Clone)] +pub struct FaultInjectingBackend { + inner: MemoryBackend, + fail_get_queued_instances_with_depth_limit: Arc, + get_queued_instances_calls: Arc, +} + +impl FaultInjectingBackend { + pub fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self { + Self { + inner, + fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)), + get_queued_instances_calls: Arc::new(AtomicUsize::new(0)), + } + } + + pub fn get_queued_instances_calls(&self) -> usize { + self.get_queued_instances_calls.load(AtomicOrdering::SeqCst) + } + + pub fn queue_len(&self) -> usize { + self.inner + .instance_queue() + .as_ref() + .map(|queue| queue.lock().expect("queue poisoned").len()) + .unwrap_or(0) + } + + pub fn instances_done_len(&self) -> usize { + self.inner.instances_done().len() + } +} + +#[async_trait::async_trait] +impl CoreBackend for FaultInjectingBackend { + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } + + async fn save_graphs( + &self, + claim: LockClaim, + graphs: &[GraphUpdate], + ) -> BackendResult> { + self.inner.save_graphs(claim, graphs).await + } + + async fn save_actions_done( + &self, + actions: &[waymark_core_backend::ActionDone], + ) -> BackendResult<()> { + self.inner.save_actions_done(actions).await + } + + async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> { + self.inner.save_instances_done(instances).await + } + + async fn get_queued_instances( + &self, + size: usize, + claim: LockClaim, + ) -> BackendResult { + self.get_queued_instances_calls + .fetch_add(1, AtomicOrdering::SeqCst); + if self + .fail_get_queued_instances_with_depth_limit + .load(AtomicOrdering::SeqCst) + { + return Err(BackendError::Message("depth limit exceeded".to_string())); + } + self.inner.get_queued_instances(size, claim).await + } + + async fn queue_instances( + &self, + instances: &[waymark_core_backend::QueuedInstance], + ) -> BackendResult<()> { + self.inner.queue_instances(instances).await + } + + async fn refresh_instance_locks( + &self, + claim: LockClaim, + instance_ids: &[Uuid], + ) -> BackendResult> { + self.inner.refresh_instance_locks(claim, instance_ids).await + } + + async fn release_instance_locks( + &self, + lock_uuid: Uuid, + instance_ids: &[Uuid], + ) -> BackendResult<()> { + self.inner + .release_instance_locks(lock_uuid, instance_ids) + .await + } +} + +#[async_trait::async_trait] +impl WorkflowRegistryBackend for FaultInjectingBackend { + async fn upsert_workflow_version( + &self, + registration: &WorkflowRegistration, + ) -> BackendResult { + self.inner.upsert_workflow_version(registration).await + } + + async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult> { + self.inner.get_workflow_versions(ids).await + } +} diff --git a/crates/backend-memory/Cargo.toml b/crates/backend-memory/Cargo.toml new file mode 100644 index 00000000..203e0f35 --- /dev/null +++ b/crates/backend-memory/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "waymark-backend-memory" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +chrono = { workspace = true } +rmp-serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-core-backend = { workspace = true } +waymark-garbage-collector-backend = { workspace = true, optional = true } +waymark-scheduler-backend = { workspace = true, optional = true } +waymark-scheduler-core = { workspace = true } +waymark-worker-status-backend = { workspace = true } +waymark-workflow-registry-backend = { workspace = true } +waymark-webapp-backend = { workspace = true, optional = true } +waymark-webapp-core = { workspace = true, optional = true } + +[features] +default = [ + "core-backend", + "worker-status-backend", + "workflow-registry-backend", + "scheduler-backend", + "garbage-collector-backend", + "webapp-backend", +] + +core-backend = [] +garbage-collector-backend = ["dep:waymark-garbage-collector-backend"] +scheduler-backend = ["dep:waymark-scheduler-backend"] +worker-status-backend = [] +workflow-registry-backend = [] +webapp-backend = ["dep:waymark-webapp-backend", "dep:waymark-webapp-core"] diff --git a/crates/backend-memory/src/core_backend.rs b/crates/backend-memory/src/core_backend.rs new file mode 100644 index 00000000..49a40330 --- /dev/null +++ b/crates/backend-memory/src/core_backend.rs @@ -0,0 +1,159 @@ +use chrono::Utc; +use uuid::Uuid; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_core_backend::{ + ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance, + QueuedInstanceBatch, +}; + +#[async_trait::async_trait] +impl waymark_core_backend::CoreBackend for crate::MemoryBackend { + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } + + async fn save_graphs( + &self, + claim: LockClaim, + graphs: &[GraphUpdate], + ) -> BackendResult> { + let mut stored = self.graph_updates.lock().expect("graph updates poisoned"); + stored.extend(graphs.iter().cloned()); + let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); + let mut locks = Vec::with_capacity(graphs.len()); + for graph in graphs { + if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id) + && *lock_uuid == claim.lock_uuid + && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at) + { + *lock_expires_at = Some(claim.lock_expires_at); + } + let (lock_uuid, lock_expires_at) = guard + .get(&graph.instance_id) + .cloned() + .unwrap_or((None, None)); + locks.push(InstanceLockStatus { + instance_id: graph.instance_id, + lock_uuid, + lock_expires_at, + }); + } + Ok(locks) + } + + async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> { + let mut stored = self.actions_done.lock().expect("actions done poisoned"); + stored.extend(actions.iter().cloned()); + Ok(()) + } + + async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> { + let mut stored = self.instances_done.lock().expect("instances done poisoned"); + stored.extend(instances.iter().cloned()); + if !instances.is_empty() { + let mut locks = self.instance_locks.lock().expect("instance locks poisoned"); + for instance in instances { + locks.remove(&instance.executor_id); + } + } + Ok(()) + } + + async fn get_queued_instances( + &self, + size: usize, + claim: LockClaim, + ) -> BackendResult { + if size == 0 { + return Ok(QueuedInstanceBatch { + instances: Vec::new(), + }); + } + let queue = match &self.instance_queue { + Some(queue) => queue, + None => { + return Ok(QueuedInstanceBatch { + instances: Vec::new(), + }); + } + }; + let mut guard = queue.lock().expect("instance queue poisoned"); + let now = Utc::now(); + let mut instances = Vec::new(); + while instances.len() < size { + let Some(instance) = guard.front() else { + break; + }; + if let Some(scheduled_at) = instance.scheduled_at + && scheduled_at > now + { + break; + } + let instance = guard.pop_front().expect("instance queue empty"); + instances.push(instance); + } + if !instances.is_empty() { + let mut locks = self.instance_locks.lock().expect("instance locks poisoned"); + for instance in &instances { + locks.insert( + instance.instance_id, + (Some(claim.lock_uuid), Some(claim.lock_expires_at)), + ); + } + } + Ok(QueuedInstanceBatch { instances }) + } + + async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> { + if instances.is_empty() { + return Ok(()); + } + let queue = self.instance_queue.as_ref().ok_or_else(|| { + BackendError::Message("memory backend missing instance queue".to_string()) + })?; + let mut guard = queue.lock().expect("instance queue poisoned"); + for instance in instances { + guard.push_back(instance.clone()); + } + Ok(()) + } + + async fn refresh_instance_locks( + &self, + claim: LockClaim, + instance_ids: &[Uuid], + ) -> BackendResult> { + let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); + let mut locks = Vec::new(); + for instance_id in instance_ids { + let entry = guard + .entry(*instance_id) + .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at))); + if entry.0 == Some(claim.lock_uuid) { + entry.1 = Some(claim.lock_expires_at); + } + locks.push(InstanceLockStatus { + instance_id: *instance_id, + lock_uuid: entry.0, + lock_expires_at: entry.1, + }); + } + Ok(locks) + } + + async fn release_instance_locks( + &self, + lock_uuid: Uuid, + instance_ids: &[Uuid], + ) -> BackendResult<()> { + let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); + for instance_id in instance_ids { + if let Some((current_lock, _)) = guard.get(instance_id) + && *current_lock == Some(lock_uuid) + { + guard.remove(instance_id); + } + } + Ok(()) + } +} diff --git a/crates/backend-memory/src/garbage_collector_backend.rs b/crates/backend-memory/src/garbage_collector_backend.rs new file mode 100644 index 00000000..6a4cda66 --- /dev/null +++ b/crates/backend-memory/src/garbage_collector_backend.rs @@ -0,0 +1,14 @@ +use chrono::{DateTime, Utc}; +use waymark_backends_core::BackendResult; +use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend}; + +#[async_trait::async_trait] +impl GarbageCollectorBackend for crate::MemoryBackend { + async fn collect_done_instances( + &self, + _older_than: DateTime, + _limit: usize, + ) -> BackendResult { + Ok(GarbageCollectionResult::default()) + } +} diff --git a/crates/backend-memory/src/lib.rs b/crates/backend-memory/src/lib.rs new file mode 100644 index 00000000..e2ef56e4 --- /dev/null +++ b/crates/backend-memory/src/lib.rs @@ -0,0 +1,111 @@ +//! In-memory backend that prints persistence operations. + +#[cfg(feature = "core-backend")] +mod core_backend; + +#[cfg(feature = "garbage-collector-backend")] +mod garbage_collector_backend; + +#[cfg(feature = "scheduler-backend")] +mod scheduler_backend; + +#[cfg(feature = "webapp-backend")] +mod webapp_backend; + +#[cfg(feature = "worker-status-backend")] +mod worker_status_backend; + +#[cfg(feature = "workflow-registry-backend")] +mod workflow_registry_backend; + +use std::collections::{HashMap, VecDeque}; +use std::sync::{Arc, Mutex}; + +use chrono::{DateTime, Utc}; +use uuid::Uuid; + +use waymark_core_backend::{ActionDone, GraphUpdate, InstanceDone, QueuedInstance}; +use waymark_scheduler_core::{ScheduleId, WorkflowSchedule}; +use waymark_worker_status_backend::WorkerStatusUpdate; +use waymark_workflow_registry_backend::WorkflowRegistration; + +type WorkflowVersionKey = (String, String); +type WorkflowVersionValue = (Uuid, WorkflowRegistration); +type WorkflowVersionStore = HashMap; +type InstanceLockStore = HashMap, Option>)>; + +/// Backend that stores updates in memory for tests or local runs. +#[derive(Clone)] +pub struct MemoryBackend { + instance_queue: Option>>>, + graph_updates: Arc>>, + actions_done: Arc>>, + instances_done: Arc>>, + worker_status_updates: Arc>>, + #[cfg_attr(not(feature = "workflow-registry-backend"), allow(dead_code))] + workflow_versions: Arc>, + #[cfg_attr(not(feature = "scheduler-backend"), allow(dead_code))] + schedules: Arc>>, + #[cfg_attr(not(feature = "core-backend"), allow(dead_code))] + instance_locks: Arc>, +} + +impl Default for MemoryBackend { + fn default() -> Self { + Self { + instance_queue: None, + graph_updates: Arc::new(Mutex::new(Vec::new())), + actions_done: Arc::new(Mutex::new(Vec::new())), + instances_done: Arc::new(Mutex::new(Vec::new())), + worker_status_updates: Arc::new(Mutex::new(Vec::new())), + workflow_versions: Arc::new(Mutex::new(HashMap::new())), + schedules: Arc::new(Mutex::new(HashMap::new())), + instance_locks: Arc::new(Mutex::new(HashMap::new())), + } + } +} + +impl MemoryBackend { + pub fn new() -> Self { + Self::default() + } + + pub fn with_queue(queue: Arc>>) -> Self { + Self { + instance_queue: Some(queue), + ..Self::default() + } + } + + pub fn instance_queue(&self) -> Option>>> { + self.instance_queue.clone() + } + + pub fn graph_updates(&self) -> Vec { + self.graph_updates + .lock() + .expect("graph updates poisoned") + .clone() + } + + pub fn actions_done(&self) -> Vec { + self.actions_done + .lock() + .expect("actions done poisoned") + .clone() + } + + pub fn instances_done(&self) -> Vec { + self.instances_done + .lock() + .expect("instances done poisoned") + .clone() + } + + pub fn worker_status_updates(&self) -> Vec { + self.worker_status_updates + .lock() + .expect("worker status updates poisoned") + .clone() + } +} diff --git a/crates/backend-memory/src/scheduler_backend.rs b/crates/backend-memory/src/scheduler_backend.rs new file mode 100644 index 00000000..3764f489 --- /dev/null +++ b/crates/backend-memory/src/scheduler_backend.rs @@ -0,0 +1,209 @@ +use chrono::Utc; +use uuid::Uuid; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_scheduler_backend::SchedulerBackend; +use waymark_scheduler_core::{ + CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule, compute_next_run, +}; + +#[async_trait::async_trait] +impl SchedulerBackend for crate::MemoryBackend { + async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult { + let mut guard = self.schedules.lock().expect("schedules poisoned"); + let existing_schedule = guard.iter().find_map(|(id, schedule)| { + if schedule.workflow_name == params.workflow_name + && schedule.schedule_name == params.schedule_name + { + Some((*id, schedule.clone())) + } else { + None + } + }); + let schedule_id = existing_schedule + .as_ref() + .map(|(id, _)| *id) + .unwrap_or_else(ScheduleId::new); + let now = Utc::now(); + let next_run_at = match existing_schedule + .as_ref() + .and_then(|(_, schedule)| schedule.next_run_at) + { + Some(next_run_at) => Some(next_run_at), + None => Some( + compute_next_run( + params.schedule_type, + params.cron_expression.as_deref(), + params.interval_seconds, + params.jitter_seconds, + None, + ) + .map_err(BackendError::Message)?, + ), + }; + let schedule = WorkflowSchedule { + id: schedule_id.0, + workflow_name: params.workflow_name.clone(), + schedule_name: params.schedule_name.clone(), + schedule_type: params.schedule_type.as_str().to_string(), + cron_expression: params.cron_expression.clone(), + interval_seconds: params.interval_seconds, + jitter_seconds: params.jitter_seconds, + input_payload: params.input_payload.clone(), + status: "active".to_string(), + next_run_at, + last_run_at: existing_schedule + .as_ref() + .and_then(|(_, schedule)| schedule.last_run_at), + last_instance_id: existing_schedule + .as_ref() + .and_then(|(_, schedule)| schedule.last_instance_id), + created_at: existing_schedule + .as_ref() + .map(|(_, schedule)| schedule.created_at) + .unwrap_or(now), + updated_at: now, + priority: params.priority, + allow_duplicate: params.allow_duplicate, + }; + guard.insert(schedule_id, schedule); + Ok(schedule_id) + } + + async fn get_schedule(&self, id: ScheduleId) -> BackendResult { + let guard = self.schedules.lock().expect("schedules poisoned"); + guard + .get(&id) + .cloned() + .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}"))) + } + + async fn get_schedule_by_name( + &self, + workflow_name: &str, + schedule_name: &str, + ) -> BackendResult> { + let guard = self.schedules.lock().expect("schedules poisoned"); + Ok(guard + .values() + .find(|schedule| { + schedule.workflow_name == workflow_name + && schedule.schedule_name == schedule_name + && schedule.status != "deleted" + }) + .cloned()) + } + + async fn list_schedules( + &self, + limit: i64, + offset: i64, + ) -> BackendResult> { + let guard = self.schedules.lock().expect("schedules poisoned"); + let mut schedules: Vec<_> = guard + .values() + .filter(|schedule| schedule.status != "deleted") + .cloned() + .collect(); + schedules.sort_by(|a, b| { + (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name)) + }); + let start = offset.max(0) as usize; + let end = start.saturating_add(limit.max(0) as usize); + Ok(schedules + .into_iter() + .skip(start) + .take(end - start) + .collect()) + } + + async fn count_schedules(&self) -> BackendResult { + let guard = self.schedules.lock().expect("schedules poisoned"); + Ok(guard + .values() + .filter(|schedule| schedule.status != "deleted") + .count() as i64) + } + + async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult { + let mut guard = self.schedules.lock().expect("schedules poisoned"); + if let Some(schedule) = guard.get_mut(&id) { + schedule.status = status.to_string(); + schedule.updated_at = Utc::now(); + Ok(true) + } else { + Ok(false) + } + } + + async fn delete_schedule(&self, id: ScheduleId) -> BackendResult { + SchedulerBackend::update_schedule_status(self, id, "deleted").await + } + + async fn find_due_schedules(&self, limit: i32) -> BackendResult> { + let guard = self.schedules.lock().expect("schedules poisoned"); + let now = Utc::now(); + let mut schedules: Vec<_> = guard + .values() + .filter(|schedule| { + schedule.status == "active" + && schedule + .next_run_at + .map(|next| next <= now) + .unwrap_or(false) + }) + .cloned() + .collect(); + schedules.sort_by_key(|schedule| schedule.next_run_at); + Ok(schedules.into_iter().take(limit as usize).collect()) + } + + async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult { + Ok(false) + } + + async fn mark_schedule_executed( + &self, + schedule_id: ScheduleId, + instance_id: Uuid, + ) -> BackendResult<()> { + let mut guard = self.schedules.lock().expect("schedules poisoned"); + let schedule = guard + .get_mut(&schedule_id) + .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; + let schedule_type = ScheduleType::parse(&schedule.schedule_type) + .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?; + let next_run_at = compute_next_run( + schedule_type, + schedule.cron_expression.as_deref(), + schedule.interval_seconds, + schedule.jitter_seconds, + Some(Utc::now()), + ) + .map_err(BackendError::Message)?; + schedule.last_run_at = Some(Utc::now()); + schedule.last_instance_id = Some(instance_id); + schedule.next_run_at = Some(next_run_at); + schedule.updated_at = Utc::now(); + Ok(()) + } + + async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> { + let mut guard = self.schedules.lock().expect("schedules poisoned"); + let schedule = guard + .get_mut(&schedule_id) + .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; + let schedule_type = ScheduleType::parse(&schedule.schedule_type) + .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?; + let next_run_at = compute_next_run( + schedule_type, + schedule.cron_expression.as_deref(), + schedule.interval_seconds, + schedule.jitter_seconds, + Some(Utc::now()), + ) + .map_err(BackendError::Message)?; + schedule.next_run_at = Some(next_run_at); + schedule.updated_at = Utc::now(); + Ok(()) + } +} diff --git a/crates/backend-memory/src/webapp_backend.rs b/crates/backend-memory/src/webapp_backend.rs new file mode 100644 index 00000000..5bcca7c1 --- /dev/null +++ b/crates/backend-memory/src/webapp_backend.rs @@ -0,0 +1,295 @@ +use std::collections::HashMap; + +use chrono::Utc; +use uuid::Uuid; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_webapp_backend::WebappBackend; +use waymark_webapp_core::{ + ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail, + ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow, + WorkerAggregateStats, WorkerStatus, +}; +use waymark_worker_status_backend::WorkerStatusUpdate; + +#[async_trait::async_trait] +impl WebappBackend for crate::MemoryBackend { + async fn count_instances(&self, _search: Option<&str>) -> BackendResult { + Ok(0) + } + + async fn list_instances( + &self, + _search: Option<&str>, + _limit: i64, + _offset: i64, + ) -> BackendResult> { + Ok(Vec::new()) + } + + async fn get_instance(&self, instance_id: Uuid) -> BackendResult { + Err(BackendError::Message(format!( + "instance not found: {instance_id}" + ))) + } + + async fn get_execution_graph( + &self, + _instance_id: Uuid, + ) -> BackendResult> { + Ok(None) + } + + async fn get_workflow_graph( + &self, + _instance_id: Uuid, + ) -> BackendResult> { + Ok(None) + } + + async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult> { + Ok(Vec::new()) + } + + async fn get_distinct_workflows(&self) -> BackendResult> { + Ok(Vec::new()) + } + + async fn get_distinct_statuses(&self) -> BackendResult> { + Ok(vec![ + InstanceStatus::Queued.to_string(), + InstanceStatus::Running.to_string(), + InstanceStatus::Completed.to_string(), + InstanceStatus::Failed.to_string(), + ]) + } + + async fn count_schedules(&self) -> BackendResult { + let guard = self.schedules.lock().expect("schedules poisoned"); + Ok(guard + .values() + .filter(|schedule| schedule.status != "deleted") + .count() as i64) + } + + async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult> { + let guard = self.schedules.lock().expect("schedules poisoned"); + let mut schedules: Vec<_> = guard + .values() + .filter(|schedule| schedule.status != "deleted") + .cloned() + .collect(); + schedules.sort_by(|a, b| { + (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name)) + }); + + let start = offset.max(0) as usize; + let page_limit = limit.max(0) as usize; + Ok(schedules + .into_iter() + .skip(start) + .take(page_limit) + .map(|schedule| ScheduleSummary { + id: schedule.id.to_string(), + workflow_name: schedule.workflow_name, + schedule_name: schedule.schedule_name, + schedule_type: schedule.schedule_type, + cron_expression: schedule.cron_expression, + interval_seconds: schedule.interval_seconds, + status: schedule.status, + next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()), + last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()), + created_at: schedule.created_at.to_rfc3339(), + }) + .collect()) + } + + async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult { + let guard = self.schedules.lock().expect("schedules poisoned"); + let schedule = guard + .values() + .find(|schedule| schedule.id == schedule_id) + .cloned() + .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; + + let input_payload = schedule.input_payload.as_ref().and_then(|bytes| { + rmp_serde::from_slice::(bytes) + .ok() + .and_then(|value| serde_json::to_string_pretty(&value).ok()) + }); + + Ok(ScheduleDetail { + id: schedule.id.to_string(), + workflow_name: schedule.workflow_name, + schedule_name: schedule.schedule_name, + schedule_type: schedule.schedule_type, + cron_expression: schedule.cron_expression, + interval_seconds: schedule.interval_seconds, + jitter_seconds: schedule.jitter_seconds, + status: schedule.status, + next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()), + last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()), + last_instance_id: schedule.last_instance_id.map(|id| id.to_string()), + created_at: schedule.created_at.to_rfc3339(), + updated_at: schedule.updated_at.to_rfc3339(), + priority: schedule.priority, + allow_duplicate: schedule.allow_duplicate, + input_payload, + }) + } + + async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult { + Ok(0) + } + + async fn list_schedule_invocations( + &self, + _schedule_id: Uuid, + _limit: i64, + _offset: i64, + ) -> BackendResult> { + Ok(Vec::new()) + } + + async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult { + let mut guard = self.schedules.lock().expect("schedules poisoned"); + let Some(schedule) = guard + .values_mut() + .find(|schedule| schedule.id == schedule_id) + else { + return Ok(false); + }; + schedule.status = status.to_string(); + schedule.updated_at = Utc::now(); + Ok(true) + } + + async fn get_distinct_schedule_statuses(&self) -> BackendResult> { + Ok(vec!["active".to_string(), "paused".to_string()]) + } + + async fn get_distinct_schedule_types(&self) -> BackendResult> { + Ok(vec!["cron".to_string(), "interval".to_string()]) + } + + async fn get_worker_action_stats( + &self, + _window_minutes: i64, + ) -> BackendResult> { + let statuses = latest_worker_statuses( + &self + .worker_status_updates + .lock() + .expect("worker status updates poisoned"), + ); + + Ok(statuses + .into_iter() + .map(|status| WorkerActionRow { + pool_id: status.pool_id.to_string(), + active_workers: status.active_workers as i64, + actions_per_sec: format!("{:.1}", status.actions_per_sec), + throughput_per_min: status.throughput_per_min as i64, + total_completed: status.total_completed, + median_dequeue_ms: status.median_dequeue_ms, + median_handling_ms: status.median_handling_ms, + last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()), + updated_at: status.updated_at.to_rfc3339(), + }) + .collect()) + } + + async fn get_worker_aggregate_stats( + &self, + _window_minutes: i64, + ) -> BackendResult { + let statuses = latest_worker_statuses( + &self + .worker_status_updates + .lock() + .expect("worker status updates poisoned"), + ); + + let active_worker_count = statuses + .iter() + .map(|status| status.active_workers as i64) + .sum(); + let total_in_flight = statuses + .iter() + .filter_map(|status| status.total_in_flight) + .sum(); + let total_queue_depth = statuses + .iter() + .filter_map(|status| status.dispatch_queue_size) + .sum(); + let actions_per_sec = statuses + .iter() + .map(|status| status.actions_per_sec) + .sum::(); + + Ok(WorkerAggregateStats { + active_worker_count, + actions_per_sec: format!("{:.1}", actions_per_sec), + total_in_flight, + total_queue_depth, + }) + } + + async fn worker_status_table_exists(&self) -> bool { + !self + .worker_status_updates + .lock() + .expect("worker status updates poisoned") + .is_empty() + } + + async fn schedules_table_exists(&self) -> bool { + !self + .schedules + .lock() + .expect("schedules poisoned") + .is_empty() + } + + async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult> { + Ok(latest_worker_statuses( + &self + .worker_status_updates + .lock() + .expect("worker status updates poisoned"), + )) + } +} + +fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec { + let mut by_pool: HashMap = HashMap::new(); + for update in updates { + by_pool.insert(update.pool_id, update.clone()); + } + + let now = Utc::now(); + let mut statuses: Vec<_> = by_pool + .into_values() + .map(|status| WorkerStatus { + pool_id: status.pool_id, + active_workers: status.active_workers, + throughput_per_min: status.throughput_per_min, + actions_per_sec: status.actions_per_sec, + total_completed: status.total_completed, + last_action_at: status.last_action_at, + updated_at: now, + median_dequeue_ms: status.median_dequeue_ms, + median_handling_ms: status.median_handling_ms, + dispatch_queue_size: Some(status.dispatch_queue_size), + total_in_flight: Some(status.total_in_flight), + median_instance_duration_secs: status.median_instance_duration_secs, + active_instance_count: status.active_instance_count, + total_instances_completed: status.total_instances_completed, + instances_per_sec: status.instances_per_sec, + instances_per_min: status.instances_per_min, + time_series: status.time_series, + }) + .collect(); + + statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec)); + statuses +} diff --git a/crates/backend-memory/src/worker_status_backend.rs b/crates/backend-memory/src/worker_status_backend.rs new file mode 100644 index 00000000..dbca9794 --- /dev/null +++ b/crates/backend-memory/src/worker_status_backend.rs @@ -0,0 +1,13 @@ +use waymark_worker_status_backend::{BackendResult, WorkerStatusBackend, WorkerStatusUpdate}; + +#[async_trait::async_trait] +impl WorkerStatusBackend for crate::MemoryBackend { + async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> { + let mut stored = self + .worker_status_updates + .lock() + .expect("worker status updates poisoned"); + stored.push(status.clone()); + Ok(()) + } +} diff --git a/crates/backend-memory/src/workflow_registry_backend.rs b/crates/backend-memory/src/workflow_registry_backend.rs new file mode 100644 index 00000000..e820b5a9 --- /dev/null +++ b/crates/backend-memory/src/workflow_registry_backend.rs @@ -0,0 +1,58 @@ +use uuid::Uuid; +use waymark_workflow_registry_backend::{ + BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, +}; + +#[async_trait::async_trait] +impl WorkflowRegistryBackend for crate::MemoryBackend { + async fn upsert_workflow_version( + &self, + registration: &WorkflowRegistration, + ) -> BackendResult { + let mut guard = self + .workflow_versions + .lock() + .expect("workflow versions poisoned"); + let key = ( + registration.workflow_name.clone(), + registration.workflow_version.clone(), + ); + if let Some((id, existing)) = guard.get(&key) { + if existing.ir_hash != registration.ir_hash { + return Err(BackendError::Message(format!( + "workflow version already exists with different IR hash: {}@{}", + registration.workflow_name, registration.workflow_version + ))); + } + return Ok(*id); + } + + let id = Uuid::new_v4(); + guard.insert(key, (id, registration.clone())); + Ok(id) + } + + async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult> { + if ids.is_empty() { + return Ok(Vec::new()); + } + let guard = self + .workflow_versions + .lock() + .expect("workflow versions poisoned"); + let mut versions = Vec::new(); + for (id, registration) in guard.values() { + if ids.contains(id) { + versions.push(WorkflowVersion { + id: *id, + workflow_name: registration.workflow_name.clone(), + workflow_version: registration.workflow_version.clone(), + ir_hash: registration.ir_hash.clone(), + program_proto: registration.program_proto.clone(), + concurrent: registration.concurrent, + }); + } + } + Ok(versions) + } +} diff --git a/crates/backend-postgres-migrations/Cargo.toml b/crates/backend-postgres-migrations/Cargo.toml new file mode 100644 index 00000000..f84ad14c --- /dev/null +++ b/crates/backend-postgres-migrations/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "waymark-backend-postgres-migrations" +version = "0.1.0" +edition = "2024" + +[dependencies] +sqlx = { workspace = true, features = ["postgres", "macros", "migrate"] } diff --git a/crates/backend-postgres-migrations/build.rs b/crates/backend-postgres-migrations/build.rs new file mode 100644 index 00000000..3a8149ef --- /dev/null +++ b/crates/backend-postgres-migrations/build.rs @@ -0,0 +1,3 @@ +fn main() { + println!("cargo:rerun-if-changed=migrations"); +} diff --git a/crates/waymark/migrations/0001_init.sql b/crates/backend-postgres-migrations/migrations/0001_init.sql similarity index 100% rename from crates/waymark/migrations/0001_init.sql rename to crates/backend-postgres-migrations/migrations/0001_init.sql diff --git a/crates/waymark/migrations/0002_runner_actions_done_execution_id.sql b/crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql similarity index 100% rename from crates/waymark/migrations/0002_runner_actions_done_execution_id.sql rename to crates/backend-postgres-migrations/migrations/0002_runner_actions_done_execution_id.sql diff --git a/crates/waymark/migrations/0003_instance_locks.sql b/crates/backend-postgres-migrations/migrations/0003_instance_locks.sql similarity index 100% rename from crates/waymark/migrations/0003_instance_locks.sql rename to crates/backend-postgres-migrations/migrations/0003_instance_locks.sql diff --git a/crates/waymark/migrations/0004_workflow_versions.sql b/crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql similarity index 100% rename from crates/waymark/migrations/0004_workflow_versions.sql rename to crates/backend-postgres-migrations/migrations/0004_workflow_versions.sql diff --git a/crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql b/crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql similarity index 100% rename from crates/waymark/migrations/0005_runner_instances_workflow_version_id.sql rename to crates/backend-postgres-migrations/migrations/0005_runner_instances_workflow_version_id.sql diff --git a/crates/waymark/migrations/0006_drop_unused_runner_tables.sql b/crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql similarity index 100% rename from crates/waymark/migrations/0006_drop_unused_runner_tables.sql rename to crates/backend-postgres-migrations/migrations/0006_drop_unused_runner_tables.sql diff --git a/crates/waymark/migrations/0007_runner_instances_schedule_id.sql b/crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql similarity index 100% rename from crates/waymark/migrations/0007_runner_instances_schedule_id.sql rename to crates/backend-postgres-migrations/migrations/0007_runner_instances_schedule_id.sql diff --git a/crates/waymark/migrations/0008_runner_actions_done_timing.sql b/crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql similarity index 100% rename from crates/waymark/migrations/0008_runner_actions_done_timing.sql rename to crates/backend-postgres-migrations/migrations/0008_runner_actions_done_timing.sql diff --git a/crates/waymark/migrations/0009_instance_search_columns.sql b/crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql similarity index 100% rename from crates/waymark/migrations/0009_instance_search_columns.sql rename to crates/backend-postgres-migrations/migrations/0009_instance_search_columns.sql diff --git a/crates/backend-postgres-migrations/src/lib.rs b/crates/backend-postgres-migrations/src/lib.rs new file mode 100644 index 00000000..82495aeb --- /dev/null +++ b/crates/backend-postgres-migrations/src/lib.rs @@ -0,0 +1,8 @@ +//! Migrations for the postgres backend. + +use sqlx::PgPool; + +/// Run the embedded SQLx migrations. +pub async fn run(pool: &PgPool) -> Result<(), sqlx::migrate::MigrateError> { + sqlx::migrate!().run(pool).await +} diff --git a/crates/backend-postgres/Cargo.toml b/crates/backend-postgres/Cargo.toml new file mode 100644 index 00000000..f61f582f --- /dev/null +++ b/crates/backend-postgres/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "waymark-backend-postgres" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +chrono = { workspace = true } +rmp-serde = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +sqlx = { workspace = true, features = ["uuid", "chrono"] } +tokio = { workspace = true, features = ["macros"] } +tracing = { workspace = true } +uuid = { workspace = true } +waymark-backend-postgres-migrations = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-core-backend = { workspace = true } +waymark-dag = { workspace = true } +waymark-proto = { workspace = true } +waymark-garbage-collector-backend = { workspace = true } +waymark-observability = { workspace = true } +waymark-runner = { workspace = true } +waymark-runner-state = { workspace = true } +waymark-scheduler-backend = { workspace = true } +waymark-scheduler-core = { workspace = true } +waymark-webapp-backend = { workspace = true } +waymark-webapp-core = { workspace = true } +waymark-worker-status-backend = { workspace = true } +waymark-workflow-registry-backend = { workspace = true } +prost = { workspace = true } + +[dev-dependencies] +serial_test = { workspace = true } +waymark-test-support = { workspace = true } +waymark-ir-parser = { workspace = true } + +[features] +trace = [] diff --git a/crates/waymark/src/backends/postgres/core.rs b/crates/backend-postgres/src/core.rs similarity index 98% rename from crates/waymark/src/backends/postgres/core.rs rename to crates/backend-postgres/src/core.rs index c827b4c0..bc46747e 100644 --- a/crates/waymark/src/backends/postgres/core.rs +++ b/crates/backend-postgres/src/core.rs @@ -4,18 +4,19 @@ use std::time::Duration as StdDuration; use chrono::{DateTime, Utc}; use sqlx::{Postgres, QueryBuilder, Row}; -use tonic::async_trait; use tracing::warn; use uuid::Uuid; +use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend}; +use waymark_scheduler_backend::{BackendError, BackendResult}; +use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate}; use super::PostgresBackend; -use crate::backends::base::{ - ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult, - GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, - QueuedInstance, QueuedInstanceBatch, WorkerStatusBackend, WorkerStatusUpdate, +use waymark_core_backend::{ + ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance, + QueuedInstanceBatch, }; -use crate::observability::obs; -use crate::waymark_core::runner::state::RunnerState; +use waymark_observability::obs; +use waymark_runner_state::RunnerState; const INSTANCE_STATUS_QUEUED: &str = "queued"; const INSTANCE_STATUS_RUNNING: &str = "running"; @@ -59,7 +60,7 @@ fn is_transient_sqlstate(code: &str) -> bool { fn is_transient_backend_error(err: &BackendError) -> bool { match err { - BackendError::Sqlx(sqlx::Error::Database(db_err)) => { + BackendError::Inner(sqlx::Error::Database(db_err)) => { db_err.code().as_deref().is_some_and(is_transient_sqlstate) } // Fallback for cases where sqlstate is not preserved in wrapping. @@ -842,16 +843,16 @@ impl PostgresBackend { } } -#[async_trait] -impl CoreBackend for PostgresBackend { - fn clone_box(&self) -> Box { +#[async_trait::async_trait] +impl waymark_core_backend::CoreBackend for PostgresBackend { + fn clone_box(&self) -> Box { Box::new(self.clone()) } async fn save_graphs( &self, - claim: LockClaim, - graphs: &[GraphUpdate], + claim: waymark_core_backend::LockClaim, + graphs: &[waymark_core_backend::GraphUpdate], ) -> BackendResult> { self.save_graphs_impl(claim, graphs).await } @@ -935,7 +936,10 @@ impl CoreBackend for PostgresBackend { Ok(()) } - async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> { + async fn queue_instances( + &self, + instances: &[waymark_core_backend::QueuedInstance], + ) -> BackendResult<()> { PostgresBackend::queue_instances(self, instances).await } } @@ -988,7 +992,7 @@ impl PostgresBackend { } } -#[async_trait] +#[async_trait::async_trait] impl GarbageCollectorBackend for PostgresBackend { async fn collect_done_instances( &self, @@ -999,7 +1003,7 @@ impl GarbageCollectorBackend for PostgresBackend { } } -#[async_trait] +#[async_trait::async_trait] impl WorkerStatusBackend for PostgresBackend { async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> { PostgresBackend::upsert_worker_status(self, status).await @@ -1017,14 +1021,13 @@ mod tests { use serial_test::serial; use sqlx::Row; use uuid::Uuid; + use waymark_core_backend::{ActionAttemptStatus, CoreBackend}; use super::super::test_helpers::setup_backend; use super::*; - use crate::backends::{ - ActionAttemptStatus, CoreBackend, GarbageCollectorBackend, WorkerStatusBackend, - }; - use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus}; + use waymark_dag::EdgeType; + use waymark_runner_state::{ActionCallSpec, ExecutionNode, NodeStatus}; fn sample_runner_state() -> RunnerState { RunnerState::new(None, None, None, false) @@ -1343,13 +1346,11 @@ mod tests { let graph = GraphUpdate { instance_id, nodes, - edges: std::collections::HashSet::from([ - crate::waymark_core::runner::state::ExecutionEdge { - source: execution_id, - target: execution_id, - edge_type: EdgeType::StateMachine, - }, - ]), + edges: std::collections::HashSet::from([waymark_runner_state::ExecutionEdge { + source: execution_id, + target: execution_id, + edge_type: EdgeType::StateMachine, + }]), }; let extended_claim = LockClaim { lock_uuid: claim.lock_uuid, diff --git a/crates/waymark/src/backends/postgres/mod.rs b/crates/backend-postgres/src/lib.rs similarity index 86% rename from crates/waymark/src/backends/postgres/mod.rs rename to crates/backend-postgres/src/lib.rs index 4bec275d..2b4e3821 100644 --- a/crates/waymark/src/backends/postgres/mod.rs +++ b/crates/backend-postgres/src/lib.rs @@ -11,11 +11,8 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use sqlx::PgPool; - -use crate::db; -use crate::observability::obs; - -use super::base::{BackendError, BackendResult}; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_observability::obs; /// Persist runner state and action results in Postgres. #[derive(Clone)] @@ -37,7 +34,9 @@ impl PostgresBackend { #[obs] pub async fn connect(dsn: &str) -> BackendResult { let pool = PgPool::connect(dsn).await?; - db::run_migrations(&pool).await?; + waymark_backend_postgres_migrations::run(&pool) + .await + .map_err(|err| BackendError::Message(err.to_string()))?; Ok(Self::new(pool)) } @@ -86,12 +85,12 @@ impl PostgresBackend { .clone() } - pub(super) fn count_query(counts: &Arc>>, label: &str) { + pub(crate) fn count_query(counts: &Arc>>, label: &str) { let mut guard = counts.lock().expect("query counts poisoned"); *guard.entry(label.to_string()).or_insert(0) += 1; } - pub(super) fn count_batch_size( + pub(crate) fn count_batch_size( counts: &Arc>>>, label: &str, size: usize, @@ -104,11 +103,11 @@ impl PostgresBackend { *entry.entry(size).or_insert(0) += 1; } - pub(super) fn serialize(value: &T) -> Result, BackendError> { + pub(crate) fn serialize(value: &T) -> Result, BackendError> { rmp_serde::to_vec_named(value).map_err(|e| BackendError::Message(e.to_string())) } - pub(super) fn deserialize( + pub(crate) fn deserialize( payload: &[u8], ) -> Result { rmp_serde::from_slice(payload).map_err(|e| BackendError::Message(e.to_string())) diff --git a/crates/waymark/src/backends/postgres/registry.rs b/crates/backend-postgres/src/registry.rs similarity index 94% rename from crates/waymark/src/backends/postgres/registry.rs rename to crates/backend-postgres/src/registry.rs index c8fb5a68..94fc1e2c 100644 --- a/crates/waymark/src/backends/postgres/registry.rs +++ b/crates/backend-postgres/src/registry.rs @@ -1,13 +1,13 @@ use sqlx::Row; -use tonic::async_trait; use uuid::Uuid; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_workflow_registry_backend::{ + WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, +}; use super::PostgresBackend; -use crate::backends::base::{ - BackendError, BackendResult, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, -}; -#[async_trait] +#[async_trait::async_trait] impl WorkflowRegistryBackend for PostgresBackend { async fn upsert_workflow_version( &self, @@ -95,7 +95,7 @@ mod tests { use serial_test::serial; use super::super::test_helpers::setup_backend; - use crate::backends::{WorkflowRegistration, WorkflowRegistryBackend}; + use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend}; fn sample_registration(version: &str) -> WorkflowRegistration { WorkflowRegistration { diff --git a/crates/waymark/src/backends/postgres/scheduler.rs b/crates/backend-postgres/src/scheduler.rs similarity index 97% rename from crates/waymark/src/backends/postgres/scheduler.rs rename to crates/backend-postgres/src/scheduler.rs index 5eb00735..e47f2114 100644 --- a/crates/waymark/src/backends/postgres/scheduler.rs +++ b/crates/backend-postgres/src/scheduler.rs @@ -1,15 +1,14 @@ use chrono::{DateTime, Utc}; use sqlx::Row; -use tonic::async_trait; use uuid::Uuid; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_scheduler_backend::SchedulerBackend; -use super::PostgresBackend; -use crate::backends::base::{BackendError, BackendResult, SchedulerBackend}; -use crate::scheduler::compute_next_run; -use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule}; +use waymark_scheduler_core::compute_next_run; +use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule}; -#[async_trait] -impl SchedulerBackend for PostgresBackend { +#[async_trait::async_trait] +impl SchedulerBackend for crate::PostgresBackend { async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult { let next_run_at = compute_next_run( params.schedule_type, @@ -307,10 +306,12 @@ mod tests { use sqlx::Row; use uuid::Uuid; + use crate::PostgresBackend; + use super::super::test_helpers::setup_backend; use super::*; - use crate::backends::SchedulerBackend; - use crate::scheduler::CreateScheduleParams; + use waymark_scheduler_backend::SchedulerBackend; + use waymark_scheduler_core::CreateScheduleParams; fn sample_params(schedule_name: &str) -> CreateScheduleParams { CreateScheduleParams { diff --git a/crates/waymark/src/backends/postgres/test_helpers.rs b/crates/backend-postgres/src/test_helpers.rs similarity index 93% rename from crates/waymark/src/backends/postgres/test_helpers.rs rename to crates/backend-postgres/src/test_helpers.rs index dd03cd7f..addb1ad4 100644 --- a/crates/waymark/src/backends/postgres/test_helpers.rs +++ b/crates/backend-postgres/src/test_helpers.rs @@ -1,7 +1,7 @@ use sqlx::PgPool; use super::PostgresBackend; -use crate::test_support::postgres_setup; +use waymark_test_support::postgres_setup; pub(super) async fn setup_backend() -> PostgresBackend { let pool = postgres_setup().await; diff --git a/crates/waymark/src/backends/postgres/webapp.rs b/crates/backend-postgres/src/webapp.rs similarity index 98% rename from crates/waymark/src/backends/postgres/webapp.rs rename to crates/backend-postgres/src/webapp.rs index 8f6b932f..e3f50ced 100644 --- a/crates/waymark/src/backends/postgres/webapp.rs +++ b/crates/backend-postgres/src/webapp.rs @@ -4,20 +4,22 @@ use chrono::{DateTime, Utc}; use prost::Message; use serde_json::Value; use sqlx::{Postgres, QueryBuilder, Row}; -use tonic::async_trait; + use uuid::Uuid; -use super::PostgresBackend; -use crate::backends::base::{BackendError, BackendResult, GraphUpdate, WebappBackend}; -use crate::messages::ast as ir; -use crate::waymark_core::runner::state::{ActionCallSpec, ExecutionNode, NodeStatus}; -use crate::waymark_core::runner::{RunnerState, ValueExpr, format_value, replay_action_kwargs}; -use crate::webapp::{ +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_core_backend::GraphUpdate; +use waymark_dag::{DAGNode, EdgeType, convert_to_dag}; +use waymark_proto::ast as ir; +use waymark_runner::replay_action_kwargs; +use waymark_runner_state::{ + ActionCallSpec, ExecutionNode, NodeStatus, RunnerState, format_value, value_visitor::ValueExpr, +}; +use waymark_webapp_core::{ ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus, }; -use waymark_dag::{DAGNode, EdgeType, convert_to_dag}; const INSTANCE_STATUS_FALLBACK_SQL: &str = r#" CASE @@ -263,8 +265,8 @@ fn parse_instance_status(status: &str) -> Option { } } -#[async_trait] -impl WebappBackend for PostgresBackend { +#[async_trait::async_trait] +impl waymark_webapp_backend::WebappBackend for crate::PostgresBackend { async fn count_instances(&self, search: Option<&str>) -> BackendResult { let mut builder: QueryBuilder = QueryBuilder::new( r#" @@ -1407,20 +1409,23 @@ mod tests { use prost::Message; use serial_test::serial; use uuid::Uuid; + use waymark_scheduler_backend::SchedulerBackend; + use waymark_webapp_backend::WebappBackend; + use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate}; + use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend}; + + use crate::PostgresBackend; use super::super::test_helpers::setup_backend; use super::*; - use crate::backends::{ - SchedulerBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate, - WorkflowRegistration, WorkflowRegistryBackend, - }; - use crate::scheduler::{CreateScheduleParams, ScheduleType}; - use crate::waymark_core::ir_parser::parse_program; - use crate::waymark_core::runner::ValueExpr; - use crate::waymark_core::runner::state::{ + + use waymark_dag::EdgeType; + use waymark_ir_parser::parse_program; + use waymark_runner_state::{ ActionCallSpec, ExecutionEdge, ExecutionNode, LiteralValue, NodeStatus, + value_visitor::ValueExpr, }; - use waymark_dag::EdgeType; + use waymark_scheduler_core::{CreateScheduleParams, ScheduleType}; #[test] fn format_extracted_inputs_happy_path() { diff --git a/crates/backends-core/Cargo.toml b/crates/backends-core/Cargo.toml new file mode 100644 index 00000000..194062c0 --- /dev/null +++ b/crates/backends-core/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "waymark-backends-core" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde_json = { workspace = true } +thiserror = { workspace = true } +sqlx = { workspace = true, optional = true } + +[features] +default = ["sqlx-error"] + +# TODO: this has to abstracted away since not all backends will use sqlx. +sqlx-error = ["dep:sqlx"] diff --git a/crates/backends-core/src/lib.rs b/crates/backends-core/src/lib.rs new file mode 100644 index 00000000..ee49d385 --- /dev/null +++ b/crates/backends-core/src/lib.rs @@ -0,0 +1,36 @@ +//! Core primitives for various waymark subsystem backends. + +/// The common backend error. +/// +/// TODO: move away from a shared notion of backend error to use concrete error +/// type per-operation (rather than per-subsystem or per-crate). +#[derive(Debug, thiserror::Error)] +pub enum BackendError { + #[error("{0}")] + Message(String), + + #[error(transparent)] + Inner(Inner), + + #[error(transparent)] + Serialization(serde_json::Error), +} + +#[cfg(feature = "sqlx-error")] +pub type InnerError = sqlx::Error; + +#[cfg(not(feature = "sqlx-error"))] +pub type InnerError = (); + +/// Utility type alias for backend results. +/// +/// TODO: move away from the single-`Result` type aliases as we want to vary +/// rrors per-call. +pub type BackendResult = Result>; + +#[cfg(feature = "sqlx-error")] +impl From for BackendError { + fn from(value: sqlx::Error) -> Self { + Self::Inner(value) + } +} diff --git a/crates/core-backend/Cargo.toml b/crates/core-backend/Cargo.toml new file mode 100644 index 00000000..da2aa394 --- /dev/null +++ b/crates/core-backend/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "waymark-core-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +chrono = { workspace = true } +waymark-runner-state = { workspace = true } +waymark-dag = { workspace = true } +waymark-backends-core = { workspace = true } diff --git a/crates/core-backend/src/data.rs b/crates/core-backend/src/data.rs new file mode 100644 index 00000000..d9320e71 --- /dev/null +++ b/crates/core-backend/src/data.rs @@ -0,0 +1,150 @@ +// The models that we use for our backends are similar to the ones that we +// have specified in our database/Postgres backend, but not 1:1. It's better for +// us to internally convert within the given backend + +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use waymark_dag::DAG; +use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Queued instance payload for the run loop. +pub struct QueuedInstance { + pub workflow_version_id: Uuid, + #[serde(default)] + pub schedule_id: Option, + #[serde(skip, default)] + pub dag: Option>, + pub entry_node: Uuid, + pub state: Option, + #[serde( + default = "default_action_results", + deserialize_with = "deserialize_action_results" + )] + pub action_results: HashMap, + #[serde(default = "default_instance_id")] + pub instance_id: Uuid, + #[serde(default)] + pub scheduled_at: Option>, +} + +#[derive(Clone, Debug)] +/// Result payload for queued instance polling. +pub struct QueuedInstanceBatch { + pub instances: Vec, +} + +#[derive(Clone, Debug)] +/// Lock claim settings for owned instances. +pub struct LockClaim { + pub lock_uuid: Uuid, + pub lock_expires_at: DateTime, +} + +#[derive(Clone, Debug)] +/// Current lock status for an instance. +pub struct InstanceLockStatus { + pub instance_id: Uuid, + pub lock_uuid: Option, + pub lock_expires_at: Option>, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Completed instance payload with result or exception. +pub struct InstanceDone { + pub executor_id: Uuid, + pub entry_node: Uuid, + pub result: Option, + pub error: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Batch payload representing an updated execution graph snapshot. +/// +/// This intentionally stores only runtime nodes and edges (no DAG template or +/// derived caches) so persistence stays lightweight. +pub struct GraphUpdate { + pub instance_id: Uuid, + pub nodes: HashMap, + pub edges: HashSet, +} + +impl GraphUpdate { + pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self { + Self { + instance_id, + nodes: state.nodes.clone(), + edges: state.edges.clone(), + } + } + + pub fn next_scheduled_at(&self) -> DateTime { + let mut next: Option> = None; + for node in self.nodes.values() { + if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) { + continue; + } + if let Some(scheduled_at) = node.scheduled_at { + next = Some(match next { + Some(existing) => existing.min(scheduled_at), + None => scheduled_at, + }); + } + } + next.unwrap_or_else(Utc::now) + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Batch payload representing a finished action attempt (success or failure). +pub struct ActionDone { + pub execution_id: Uuid, + pub attempt: i32, + pub status: ActionAttemptStatus, + pub started_at: Option>, + pub completed_at: Option>, + pub duration_ms: Option, + pub result: serde_json::Value, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ActionAttemptStatus { + Completed, + Failed, + TimedOut, +} + +impl std::fmt::Display for ActionAttemptStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Completed => write!(f, "completed"), + Self::Failed => write!(f, "failed"), + Self::TimedOut => write!(f, "timed_out"), + } + } +} + +fn default_instance_id() -> Uuid { + Uuid::new_v4() +} + +fn default_action_results() -> HashMap { + HashMap::new() +} + +fn deserialize_action_results<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let value = Option::>::deserialize(deserializer)?; + Ok(value.unwrap_or_default()) +} diff --git a/crates/core-backend/src/lib.rs b/crates/core-backend/src/lib.rs new file mode 100644 index 00000000..5e876b29 --- /dev/null +++ b/crates/core-backend/src/lib.rs @@ -0,0 +1,58 @@ +//! Core backend traits for waymark. + +mod data; + +use uuid::Uuid; + +use waymark_backends_core::BackendResult; + +pub use self::data::*; + +/// Abstract persistence backend for runner state. +#[async_trait::async_trait] +pub trait CoreBackend: Send + Sync { + fn clone_box(&self) -> Box; + + /// Persist updated execution graphs. + async fn save_graphs( + &self, + claim: LockClaim, + graphs: &[GraphUpdate], + ) -> BackendResult>; + + /// Persist finished action attempts (success or failure). + async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>; + + /// Return up to size queued instances without blocking. + async fn get_queued_instances( + &self, + size: usize, + claim: LockClaim, + ) -> BackendResult; + + /// Refresh lock expiry for owned instances. + async fn refresh_instance_locks( + &self, + claim: LockClaim, + instance_ids: &[Uuid], + ) -> BackendResult>; + + /// Release instance locks when evicting from memory. + async fn release_instance_locks( + &self, + lock_uuid: Uuid, + instance_ids: &[Uuid], + ) -> BackendResult<()>; + + /// Persist completed workflow instances. + async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>; + + /// Insert queued instances for run-loop consumption. + async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>; +} + +impl Clone for Box { + fn clone(&self) -> Self { + self.clone_box() + } +} diff --git a/crates/dag/Cargo.toml b/crates/dag/Cargo.toml index 54abfd13..a8bda894 100644 --- a/crates/dag/Cargo.toml +++ b/crates/dag/Cargo.toml @@ -11,4 +11,4 @@ uuid = { workspace = true, features = ["serde", "v4"] } waymark-proto = { workspace = true, features = ["serde"] } [dev-dependencies] -waymark = { workspace = true } +waymark-ir-parser = { workspace = true } diff --git a/crates/dag/src/builder/test_helpers.rs b/crates/dag/src/builder/test_helpers.rs index ed33ca00..0c9811c1 100644 --- a/crates/dag/src/builder/test_helpers.rs +++ b/crates/dag/src/builder/test_helpers.rs @@ -1,5 +1,5 @@ use crate::{DAG, DAGConverter, convert_to_dag}; -use waymark::waymark_core::ir_parser::parse_program; +use waymark_ir_parser::parse_program; use waymark_proto::ast as ir; pub(super) fn dedent(source: &str) -> String { diff --git a/crates/dag/src/validate.rs b/crates/dag/src/validate.rs index b6aa67e1..0e48504e 100644 --- a/crates/dag/src/validate.rs +++ b/crates/dag/src/validate.rs @@ -361,7 +361,7 @@ fn collect_expr_variables(expr: &ir::Expr, vars: &mut HashSet) { mod tests { use super::validate_dag; use crate::convert_to_dag; - use waymark::waymark_core::ir_parser::parse_program; + use waymark_ir_parser::parse_program; #[test] fn validate_dag_rejects_unresolved_variable_reference() { diff --git a/crates/fuzzer/Cargo.toml b/crates/fuzzer/Cargo.toml index e31f9970..8c7c039d 100644 --- a/crates/fuzzer/Cargo.toml +++ b/crates/fuzzer/Cargo.toml @@ -14,3 +14,8 @@ uuid = { workspace = true, features = ["serde", "v4"] } tokio = { workspace = true } waymark = { workspace = true } waymark-dag = { workspace = true } +waymark-ir-parser = { workspace = true } +waymark-runner-state = { workspace = true } +waymark-backend-memory = { workspace = true } +waymark-core-backend = { workspace = true } +waymark-workflow-registry-backend = { workspace = true } diff --git a/crates/fuzzer/src/harness.rs b/crates/fuzzer/src/harness.rs index 242d2924..2bec4043 100644 --- a/crates/fuzzer/src/harness.rs +++ b/crates/fuzzer/src/harness.rs @@ -9,17 +9,17 @@ use prost::Message; use serde_json::Value; use sha2::{Digest, Sha256}; use uuid::Uuid; +use waymark_backend_memory::MemoryBackend; +use waymark_core_backend::QueuedInstance; +use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _}; use super::generator::GeneratedCase; -use waymark::backends::{ - MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend, -}; use waymark::messages::ast as ir; -use waymark::waymark_core::ir_parser::parse_program; use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig}; -use waymark::waymark_core::runner::RunnerState; use waymark::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError}; use waymark_dag::convert_to_dag; +use waymark_ir_parser::parse_program; +use waymark_runner_state::RunnerState; pub async fn run_case(case_index: usize, case: &GeneratedCase) -> Result<()> { let program = parse_program(case.source.trim()).map_err(|err| { diff --git a/crates/garbage-collector-backend/Cargo.toml b/crates/garbage-collector-backend/Cargo.toml new file mode 100644 index 00000000..e1e4f300 --- /dev/null +++ b/crates/garbage-collector-backend/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "waymark-garbage-collector-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +chrono = { workspace = true } +waymark-backends-core = { workspace = true } diff --git a/crates/garbage-collector-backend/src/lib.rs b/crates/garbage-collector-backend/src/lib.rs new file mode 100644 index 00000000..af8badff --- /dev/null +++ b/crates/garbage-collector-backend/src/lib.rs @@ -0,0 +1,20 @@ +use chrono::{DateTime, Utc}; + +use waymark_backends_core::BackendResult; + +#[derive(Clone, Copy, Debug, Default)] +/// Summary of a garbage collection sweep. +pub struct GarbageCollectionResult { + pub deleted_instances: usize, + pub deleted_actions: usize, +} + +/// Backend capability for deleting old finished workflow data. +#[async_trait::async_trait] +pub trait GarbageCollectorBackend: Send + Sync { + async fn collect_done_instances( + &self, + older_than: DateTime, + limit: usize, + ) -> BackendResult; +} diff --git a/crates/integration-support/Cargo.toml b/crates/integration-support/Cargo.toml new file mode 100644 index 00000000..1e24644c --- /dev/null +++ b/crates/integration-support/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "waymark-integration-support" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +sqlx = { workspace = true } +tokio = { workspace = true, features = ["process", "time", "sync"] } +waymark-backend-postgres-migrations = { workspace = true } diff --git a/crates/waymark/src/integration_support/mod.rs b/crates/integration-support/src/lib.rs similarity index 100% rename from crates/waymark/src/integration_support/mod.rs rename to crates/integration-support/src/lib.rs diff --git a/crates/waymark/src/integration_support/postgres.rs b/crates/integration-support/src/postgres.rs similarity index 96% rename from crates/waymark/src/integration_support/postgres.rs rename to crates/integration-support/src/postgres.rs index 5a876f2d..59c9caec 100644 --- a/crates/waymark/src/integration_support/postgres.rs +++ b/crates/integration-support/src/postgres.rs @@ -8,8 +8,6 @@ use sqlx::{PgPool, postgres::PgPoolOptions}; use tokio::process::Command; use tokio::sync::OnceCell; -use crate::db; - pub const LOCAL_POSTGRES_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark"; const READY_TIMEOUT: Duration = Duration::from_secs(45); @@ -41,7 +39,7 @@ pub async fn connect_pool(dsn: &str) -> Result { async fn ensure_local_postgres_impl() -> Result<()> { if let Ok(pool) = connect_pool(LOCAL_POSTGRES_DSN).await { - db::run_migrations(&pool) + waymark_backend_postgres_migrations::run(&pool) .await .context("run migrations for existing local postgres")?; pool.close().await; @@ -50,7 +48,7 @@ async fn ensure_local_postgres_impl() -> Result<()> { run_compose_up().await?; let pool = wait_for_postgres(LOCAL_POSTGRES_DSN).await?; - db::run_migrations(&pool) + waymark_backend_postgres_migrations::run(&pool) .await .context("run migrations for local postgres")?; pool.close().await; diff --git a/crates/ir-parser/Cargo.toml b/crates/ir-parser/Cargo.toml new file mode 100644 index 00000000..fee9b094 --- /dev/null +++ b/crates/ir-parser/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "waymark-ir-parser" +version = "0.1.0" +edition = "2024" + +[dependencies] +waymark-proto = { workspace = true } +regex = { workspace = true } diff --git a/crates/waymark/src/waymark_core/ir_parser.rs b/crates/ir-parser/src/lib.rs similarity index 99% rename from crates/waymark/src/waymark_core/ir_parser.rs rename to crates/ir-parser/src/lib.rs index a3f2bbf7..1af43324 100644 --- a/crates/waymark/src/waymark_core/ir_parser.rs +++ b/crates/ir-parser/src/lib.rs @@ -2,7 +2,7 @@ use std::fmt; -use crate::messages::ast as ir; +use waymark_proto::ast as ir; /// Raised when parsing the IR source representation fails. #[derive(Debug, Clone)] diff --git a/crates/observability-macros/src/lib.rs b/crates/observability-macros/src/lib.rs index 9fc1df7b..e10c19ec 100644 --- a/crates/observability-macros/src/lib.rs +++ b/crates/observability-macros/src/lib.rs @@ -6,10 +6,10 @@ use syn::{ItemFn, parse_macro_input}; pub fn obs(args: TokenStream, input: TokenStream) -> TokenStream { let mut item = parse_macro_input!(input as ItemFn); let attr = if args.is_empty() { - syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(skip_all))]) + syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(skip_all))]) } else { let args = proc_macro2::TokenStream::from(args); - syn::parse_quote!(#[cfg_attr(feature = "trace", tracing::instrument(#args))]) + syn::parse_quote!(#[cfg_attr(feature = "trace", ::waymark_observability::__inner::tracing::instrument(#args))]) }; item.attrs.push(attr); TokenStream::from(quote!(#item)) diff --git a/crates/observability/Cargo.toml b/crates/observability/Cargo.toml new file mode 100644 index 00000000..bc27b66c --- /dev/null +++ b/crates/observability/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "waymark-observability" +version = "0.1.0" +edition = "2024" + +[dependencies] +waymark-observability-macros = { workspace = true } +tracing = { workspace = true } diff --git a/crates/observability/src/lib.rs b/crates/observability/src/lib.rs new file mode 100644 index 00000000..d2fa50f6 --- /dev/null +++ b/crates/observability/src/lib.rs @@ -0,0 +1,8 @@ +pub use waymark_observability_macros::obs; + +#[doc(hidden)] +pub mod __inner { + pub mod tracing { + pub use tracing::instrument; + } +} diff --git a/crates/proto/Cargo.toml b/crates/proto/Cargo.toml index 277e7df3..79ea21bf 100644 --- a/crates/proto/Cargo.toml +++ b/crates/proto/Cargo.toml @@ -3,6 +3,9 @@ name = "waymark-proto" version = "0.1.0" edition = "2024" +[package.metadata.cargo-shear] +ignored = ["prost"] + [dependencies] prost = "0.12" prost-types = "0.12" diff --git a/crates/runner-state/Cargo.toml b/crates/runner-state/Cargo.toml new file mode 100644 index 00000000..6a64d994 --- /dev/null +++ b/crates/runner-state/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "waymark-runner-state" +version = "0.1.0" +edition = "2024" + +[dependencies] +chrono = { workspace = true, features = ["serde", "clock"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +thiserror = { workspace = true } +uuid = { workspace = true } +waymark-dag = { workspace = true } +waymark-proto = { workspace = true } + +[features] +trace = [] diff --git a/crates/runner-state/src/lib.rs b/crates/runner-state/src/lib.rs new file mode 100644 index 00000000..5c7ae36b --- /dev/null +++ b/crates/runner-state/src/lib.rs @@ -0,0 +1,5 @@ +mod state; +mod util; +pub mod value_visitor; + +pub use self::state::*; diff --git a/crates/waymark/src/waymark_core/runner/state.rs b/crates/runner-state/src/state.rs similarity index 99% rename from crates/waymark/src/waymark_core/runner/state.rs rename to crates/runner-state/src/state.rs index dd9e68be..da418624 100644 --- a/crates/waymark/src/waymark_core/runner/state.rs +++ b/crates/runner-state/src/state.rs @@ -8,15 +8,13 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use crate::messages::ast as ir; -use crate::waymark_core::runner::expression_evaluator::is_truthy; -use crate::waymark_core::runner::value_visitor::{ - ValueExpr, collect_value_sources, resolve_value_tree, -}; +use crate::util::is_truthy; +use crate::value_visitor::{ValueExpr, collect_value_sources, resolve_value_tree}; use waymark_dag::{ ActionCallNode, AggregatorNode, AssignmentNode, DAG, DAGNode, EdgeType, FnCallNode, JoinNode, ReturnNode, SleepNode, }; +use waymark_proto::ast as ir; /// Raised when the runner state cannot be updated safely. #[derive(Debug, thiserror::Error)] @@ -335,11 +333,13 @@ impl RunnerState { state } - pub(crate) fn set_link_queued_nodes(&mut self, value: bool) { + /// TODO: make this `pub(crate)` again + pub fn set_link_queued_nodes(&mut self, value: bool) { self.link_queued_nodes = value; } - pub(crate) fn latest_assignment(&self, name: &str) -> Option { + /// TODO: make this `pub(crate)` again + pub fn latest_assignment(&self, name: &str) -> Option { self.latest_assignments.get(name).copied() } @@ -915,7 +915,9 @@ impl RunnerState { /// Example IR: /// - a, b = @pair() /// This yields ActionResultValue(node_id, result_index=0/1) for a and b. - pub(crate) fn assign_action_results( + /// + /// TODO: make this `pub(crate)` again + pub fn assign_action_results( &mut self, node: &ExecutionNode, action_name: &str, @@ -1038,7 +1040,7 @@ impl RunnerState { /// - xs = [1] /// - ys = xs + [2] /// Materialization turns ys into ListValue([1, 2]) rather than keeping xs. - pub(crate) fn materialize_value(&self, value: ValueExpr) -> ValueExpr { + pub fn materialize_value(&self, value: ValueExpr) -> ValueExpr { let resolved = resolve_value_tree(&value, &|name, seen| { self.resolve_variable_value(name, seen) }); @@ -1111,7 +1113,8 @@ impl RunnerState { assigned } - pub(crate) fn mark_latest_assignments( + /// TODO: make this `pub(crate)` again + pub fn mark_latest_assignments( &mut self, node_id: Uuid, assignments: &HashMap, @@ -1129,7 +1132,9 @@ impl RunnerState { /// Example IR: /// - total = @sum(values) /// A data-flow edge is added from the values assignment node to the action. - pub(crate) fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) { + /// + /// TODO: make this `pub(crate)` again + pub fn record_data_flow_from_value(&mut self, node_id: Uuid, value: &ValueExpr) { let source_ids = collect_value_sources(value, &|name| self.latest_assignments.get(name).copied()); self.record_data_flow_edges(node_id, &source_ids); @@ -1790,7 +1795,7 @@ fn format_literal(value: &serde_json::Value) -> String { /// /// Example IR: /// - Literal(int_value=3) -> 3 -pub(crate) fn literal_value(lit: &ir::Literal) -> serde_json::Value { +pub fn literal_value(lit: &ir::Literal) -> serde_json::Value { match lit.value.as_ref() { Some(ir::literal::Value::IntValue(value)) => serde_json::Value::Number((*value).into()), Some(ir::literal::Value::FloatValue(value)) => serde_json::Number::from_f64(*value) @@ -1892,8 +1897,8 @@ impl fmt::Display for NodeStatus { #[cfg(test)] mod tests { use super::*; - use crate::messages::ast as ir; use serde_json::Value; + use waymark_proto::ast as ir; fn action_plus_two_expr() -> ir::Expr { ir::Expr { diff --git a/crates/runner-state/src/util.rs b/crates/runner-state/src/util.rs new file mode 100644 index 00000000..20768070 --- /dev/null +++ b/crates/runner-state/src/util.rs @@ -0,0 +1,12 @@ +pub(crate) fn is_truthy(value: &serde_json::Value) -> bool { + match value { + serde_json::Value::Null => false, + serde_json::Value::Bool(value) => *value, + serde_json::Value::Number(number) => { + number.as_f64().map(|value| value != 0.0).unwrap_or(false) + } + serde_json::Value::String(value) => !value.is_empty(), + serde_json::Value::Array(values) => !values.is_empty(), + serde_json::Value::Object(map) => !map.is_empty(), + } +} diff --git a/crates/waymark/src/waymark_core/runner/value_visitor.rs b/crates/runner-state/src/value_visitor.rs similarity index 99% rename from crates/waymark/src/waymark_core/runner/value_visitor.rs rename to crates/runner-state/src/value_visitor.rs index 82f02db1..fbc7736a 100644 --- a/crates/waymark/src/waymark_core/runner/value_visitor.rs +++ b/crates/runner-state/src/value_visitor.rs @@ -362,7 +362,7 @@ mod tests { use uuid::Uuid; use super::*; - use crate::messages::ast as ir; + use waymark_proto::ast as ir; fn literal_int(value: i64) -> ValueExpr { ValueExpr::Literal(LiteralValue { diff --git a/crates/runner/Cargo.toml b/crates/runner/Cargo.toml new file mode 100644 index 00000000..115de256 --- /dev/null +++ b/crates/runner/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "waymark-runner" +version = "0.1.0" +edition = "2024" + +[dependencies] +chrono = { workspace = true, features = ["serde"] } +rustc-hash = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +uuid = { workspace = true } +waymark-dag = { workspace = true } +waymark-proto = { workspace = true } +waymark-observability = { workspace = true } +waymark-runner-state = { workspace = true } +waymark-core-backend = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +waymark-ir-parser = { workspace = true } +waymark-backend-memory = { workspace = true } + +[features] +trace = [] diff --git a/crates/waymark/src/waymark_core/runner/executor.rs b/crates/runner/src/executor.rs similarity index 98% rename from crates/waymark/src/waymark_core/runner/executor.rs rename to crates/runner/src/executor.rs index e0572688..bae9a9c2 100644 --- a/crates/waymark/src/waymark_core/runner/executor.rs +++ b/crates/runner/src/executor.rs @@ -10,24 +10,20 @@ use rustc_hash::FxHashMap; use serde_json::Value; use uuid::Uuid; -use crate::backends::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate}; -use crate::messages::ast as ir; -use crate::observability::obs; -use crate::waymark_core::runner::expression_evaluator::is_exception_value; -use crate::waymark_core::runner::retry::{ - RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies, +use crate::expression_evaluator::is_exception_value; +use crate::retry::{RetryDecision, RetryPolicyEvaluator, timeout_seconds_from_policies}; +use crate::synthetic_exceptions::{SyntheticExceptionType, build_synthetic_exception_value}; +use waymark_core_backend::{ActionAttemptStatus, ActionDone, CoreBackend, GraphUpdate}; +use waymark_dag::{ + ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType, }; -use crate::waymark_core::runner::state::{ +use waymark_observability::obs; +use waymark_proto::ast as ir; +use waymark_runner_state::value_visitor::ValueExpr; +use waymark_runner_state::{ ActionCallSpec, ExecutionEdge, ExecutionNode, ExecutionNodeType, IndexValue, ListValue, LiteralValue, NodeStatus, QueueNodeParams, RunnerState, RunnerStateError, }; -use crate::waymark_core::runner::synthetic_exceptions::{ - SyntheticExceptionType, build_synthetic_exception_value, -}; -use crate::waymark_core::runner::value_visitor::ValueExpr; -use waymark_dag::{ - ActionCallNode, AggregatorNode, DAG, DAGEdge, DagEdgeIndex, EXCEPTION_SCOPE_VAR, EdgeType, -}; /// Raised when the runner executor cannot advance safely. #[derive(Debug, thiserror::Error)] @@ -1505,16 +1501,14 @@ mod tests { use std::collections::{HashMap, HashSet}; use std::sync::Arc; - use crate::backends::MemoryBackend; - use crate::messages::ast as ir; - use crate::waymark_core::ir_parser::parse_program; - use crate::waymark_core::runner::state::{ - ExecutionEdge, ExecutionNode, NodeStatus, RunnerState, - }; + use waymark_backend_memory::MemoryBackend; use waymark_dag::{ ActionCallNode, ActionCallParams, AggregatorNode, AssignmentNode, DAG, DAGEdge, convert_to_dag, }; + use waymark_ir_parser::parse_program; + use waymark_proto::ast as ir; + use waymark_runner_state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState}; fn variable(name: &str) -> ir::Expr { ir::Expr { @@ -1992,16 +1986,12 @@ mod tests { rehydrated.state().ready_queue.is_empty() ); - let replay_canonical = crate::waymark_core::runner::replay_variables( - canonical.state(), - canonical.action_results(), - ) - .expect("replay canonical"); - let replay_rehydrated = crate::waymark_core::runner::replay_variables( - rehydrated.state(), - rehydrated.action_results(), - ) - .expect("replay rehydrated"); + let replay_canonical = + crate::replay_variables(canonical.state(), canonical.action_results()) + .expect("replay canonical"); + let replay_rehydrated = + crate::replay_variables(rehydrated.state(), rehydrated.action_results()) + .expect("replay rehydrated"); let mut assignment_counts: HashMap = HashMap::new(); for node in canonical.state().nodes.values() { @@ -2712,21 +2702,15 @@ fn main(input: [], output: [done]): let mut executor = RunnerExecutor::new(dag.clone(), state, action_results, None); executor.increment(&[exec1.node_id]).expect("increment"); - let orig_replay = crate::waymark_core::runner::replay_variables( - executor.state(), - executor.action_results(), - ) - .expect("replay"); + let orig_replay = + crate::replay_variables(executor.state(), executor.action_results()).expect("replay"); let (nodes_snap, edges_snap, results_snap) = snapshot_state(executor.state(), executor.action_results()); let rehydrated = create_rehydrated_executor(&dag, nodes_snap, edges_snap, results_snap); - let rehy_replay = crate::waymark_core::runner::replay_variables( - rehydrated.state(), - rehydrated.action_results(), - ) - .expect("replay"); + let rehy_replay = crate::replay_variables(rehydrated.state(), rehydrated.action_results()) + .expect("replay"); assert_eq!(orig_replay.variables, rehy_replay.variables); assert_eq!( rehy_replay.variables.get("doubled"), diff --git a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs b/crates/runner/src/expression_evaluator.rs similarity index 98% rename from crates/waymark/src/waymark_core/runner/expression_evaluator.rs rename to crates/runner/src/expression_evaluator.rs index 96d908e2..dac989a9 100644 --- a/crates/waymark/src/waymark_core/runner/expression_evaluator.rs +++ b/crates/runner/src/expression_evaluator.rs @@ -5,15 +5,15 @@ use std::rc::Rc; use serde_json::Value; use uuid::Uuid; -use crate::messages::ast as ir; -use crate::observability::obs; -use crate::waymark_core::runner::state::{ +use waymark_dag::{DAGEdge, EdgeType}; +use waymark_observability::obs; +use waymark_proto::ast as ir; +use waymark_runner_state::{ ActionCallSpec, ActionResultValue, BinaryOpValue, DictEntryValue, DictValue, DotValue, FunctionCallValue, IndexValue, ListValue, LiteralValue, UnaryOpValue, VariableValue, literal_value, + value_visitor::{ValueExpr, ValueExprEvaluator}, }; -use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator}; -use waymark_dag::{DAGEdge, EdgeType}; use super::{RunnerExecutor, RunnerExecutorError}; @@ -674,15 +674,13 @@ mod tests { use uuid::Uuid; use super::*; - use crate::messages::ast as ir; - use crate::waymark_core::ir_parser::IRParser; - use crate::waymark_core::runner::RunnerState; - use crate::waymark_core::runner::state::{ + use waymark_dag::{DAG, DAGEdge}; + use waymark_ir_parser::IRParser; + use waymark_proto::ast as ir; + use waymark_runner_state::{ ActionCallSpec, ActionResultValue, BinaryOpValue, FunctionCallValue, LiteralValue, - VariableValue, + RunnerState, VariableValue, value_visitor::ValueExpr, }; - use crate::waymark_core::runner::value_visitor::ValueExpr; - use waymark_dag::{DAG, DAGEdge}; fn parse_expr(source: &str) -> ir::Expr { IRParser::new(" ") @@ -801,7 +799,7 @@ mod tests { #[test] fn test_evaluate_value_expr_happy_path() { let executor = executor_with_assignment("x", literal_int(3)); - let expr = ValueExpr::BinaryOp(crate::waymark_core::runner::state::BinaryOpValue { + let expr = ValueExpr::BinaryOp(waymark_runner_state::BinaryOpValue { left: Box::new(ValueExpr::Variable(VariableValue { name: "x".to_string(), })), diff --git a/crates/waymark/src/waymark_core/runner/mod.rs b/crates/runner/src/lib.rs similarity index 53% rename from crates/waymark/src/waymark_core/runner/mod.rs rename to crates/runner/src/lib.rs index 4e7a491d..684a49a0 100644 --- a/crates/waymark/src/waymark_core/runner/mod.rs +++ b/crates/runner/src/lib.rs @@ -4,16 +4,11 @@ pub mod executor; pub mod expression_evaluator; pub mod replay; pub(crate) mod retry; -pub mod state; -pub(crate) mod synthetic_exceptions; -pub mod value_visitor; + +/// TODO: make `pub(crate)` +pub mod synthetic_exceptions; pub use executor::{ DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest, }; pub use replay::{ReplayError, ReplayResult, replay_action_kwargs, replay_variables}; -pub use state::{ - ActionCallSpec, ActionResultValue, ExecutionEdge, ExecutionNode, NodeStatus, RunnerState, - RunnerStateError, format_value, -}; -pub use value_visitor::ValueExpr; diff --git a/crates/waymark/src/waymark_core/runner/replay.rs b/crates/runner/src/replay.rs similarity index 98% rename from crates/waymark/src/waymark_core/runner/replay.rs rename to crates/runner/src/replay.rs index 246caace..ffb413a1 100644 --- a/crates/waymark/src/waymark_core/runner/replay.rs +++ b/crates/runner/src/replay.rs @@ -7,14 +7,16 @@ use std::rc::Rc; use serde_json::Value; use uuid::Uuid; -use crate::messages::ast as ir; -use crate::waymark_core::runner::expression_evaluator::{ +use crate::expression_evaluator::{ add_values, compare_values, int_value, is_exception_value, is_truthy, len_of_value, numeric_op, range_from_args, value_in, }; -use crate::waymark_core::runner::state::{ActionResultValue, FunctionCallValue, RunnerState}; -use crate::waymark_core::runner::value_visitor::{ValueExpr, ValueExprEvaluator}; use waymark_dag::{EXCEPTION_SCOPE_VAR, EdgeType}; +use waymark_proto::ast as ir; +use waymark_runner_state::{ + ActionResultValue, FunctionCallValue, RunnerState, + value_visitor::{ValueExpr, ValueExprEvaluator}, +}; /// Raised when replay cannot reconstruct variable values. #[derive(Debug, thiserror::Error)] @@ -515,9 +517,8 @@ pub fn replay_action_kwargs( #[cfg(test)] mod tests { use super::*; - use crate::messages::ast as ir; - use crate::waymark_core::runner::state::{RunnerState, VariableValue}; - use crate::waymark_core::runner::value_visitor::ValueExpr; + use waymark_proto::ast as ir; + use waymark_runner_state::{RunnerState, VariableValue, value_visitor::ValueExpr}; fn action_plus_two_expr() -> ir::Expr { ir::Expr { diff --git a/crates/waymark/src/waymark_core/runner/retry.rs b/crates/runner/src/retry.rs similarity index 99% rename from crates/waymark/src/waymark_core/runner/retry.rs rename to crates/runner/src/retry.rs index e6fb4d70..a24f7a2d 100644 --- a/crates/waymark/src/waymark_core/runner/retry.rs +++ b/crates/runner/src/retry.rs @@ -1,6 +1,6 @@ //! Retry/timeout policy helpers shared by runner components. -use crate::messages::ast as ir; +use waymark_proto::ast as ir; #[derive(Clone, Debug)] pub(crate) struct RetryDecision { diff --git a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs b/crates/runner/src/synthetic_exceptions.rs similarity index 91% rename from crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs rename to crates/runner/src/synthetic_exceptions.rs index df89b71f..5bd2be0d 100644 --- a/crates/waymark/src/waymark_core/runner/synthetic_exceptions.rs +++ b/crates/runner/src/synthetic_exceptions.rs @@ -3,13 +3,13 @@ use serde_json::Value; #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(crate) enum SyntheticExceptionType { +pub enum SyntheticExceptionType { ExecutorResume, ActionTimeout, } impl SyntheticExceptionType { - pub(crate) fn as_type_str(self) -> &'static str { + pub fn as_type_str(self) -> &'static str { match self { Self::ExecutorResume => "ExecutorResume", Self::ActionTimeout => "ActionTimeout", @@ -24,7 +24,7 @@ impl SyntheticExceptionType { } } - pub(crate) fn from_value(value: &Value) -> Option { + pub fn from_value(value: &Value) -> Option { let Value::Object(map) = value else { return None; }; @@ -34,7 +34,7 @@ impl SyntheticExceptionType { } } -pub(crate) fn build_synthetic_exception_value( +pub fn build_synthetic_exception_value( exception_type: SyntheticExceptionType, message: impl Into, fields: Vec<(String, Value)>, diff --git a/crates/scheduler-backend/Cargo.toml b/crates/scheduler-backend/Cargo.toml new file mode 100644 index 00000000..6af1c2bb --- /dev/null +++ b/crates/scheduler-backend/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "waymark-scheduler-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-scheduler-core = { workspace = true } diff --git a/crates/scheduler-backend/src/lib.rs b/crates/scheduler-backend/src/lib.rs new file mode 100644 index 00000000..613cc4ac --- /dev/null +++ b/crates/scheduler-backend/src/lib.rs @@ -0,0 +1,29 @@ +use uuid::Uuid; + +pub use waymark_backends_core::{BackendError, BackendResult}; +use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, WorkflowSchedule}; + +/// Backend capability for workflow schedule persistence. +#[async_trait::async_trait] +pub trait SchedulerBackend: Send + Sync { + async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult; + async fn get_schedule(&self, id: ScheduleId) -> BackendResult; + async fn get_schedule_by_name( + &self, + workflow_name: &str, + schedule_name: &str, + ) -> BackendResult>; + async fn list_schedules(&self, limit: i64, offset: i64) + -> BackendResult>; + async fn count_schedules(&self) -> BackendResult; + async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult; + async fn delete_schedule(&self, id: ScheduleId) -> BackendResult; + async fn find_due_schedules(&self, limit: i32) -> BackendResult>; + async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult; + async fn mark_schedule_executed( + &self, + schedule_id: ScheduleId, + instance_id: Uuid, + ) -> BackendResult<()>; + async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>; +} diff --git a/crates/scheduler-core/Cargo.toml b/crates/scheduler-core/Cargo.toml new file mode 100644 index 00000000..9659e878 --- /dev/null +++ b/crates/scheduler-core/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "waymark-scheduler-core" +version = "0.1.0" +edition = "2024" + +[dependencies] +uuid = { workspace = true, features = ["serde", "v4"] } +chrono = { workspace = true, features = ["serde"] } +serde = { workspace = true, features = ["derive"] } +cron = { workspace = true } +rand = { workspace = true } + +[dev-dependencies] +chrono = { workspace = true, features = ["clock"] } diff --git a/crates/scheduler-core/src/lib.rs b/crates/scheduler-core/src/lib.rs new file mode 100644 index 00000000..02d2783b --- /dev/null +++ b/crates/scheduler-core/src/lib.rs @@ -0,0 +1,6 @@ +mod types; +mod utils; + +pub use self::types::*; + +pub use self::utils::*; diff --git a/crates/waymark/src/scheduler/types.rs b/crates/scheduler-core/src/types.rs similarity index 100% rename from crates/waymark/src/scheduler/types.rs rename to crates/scheduler-core/src/types.rs diff --git a/crates/waymark/src/scheduler/utils.rs b/crates/scheduler-core/src/utils.rs similarity index 100% rename from crates/waymark/src/scheduler/utils.rs rename to crates/scheduler-core/src/utils.rs diff --git a/crates/test-support/Cargo.toml b/crates/test-support/Cargo.toml new file mode 100644 index 00000000..829d2395 --- /dev/null +++ b/crates/test-support/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "waymark-test-support" +version = "0.1.0" +edition = "2024" + +[dependencies] +sqlx = { workspace = true } +waymark-integration-support = { workspace = true } diff --git a/crates/waymark/src/test_support/mod.rs b/crates/test-support/src/lib.rs similarity index 100% rename from crates/waymark/src/test_support/mod.rs rename to crates/test-support/src/lib.rs diff --git a/crates/waymark/src/test_support/postgres.rs b/crates/test-support/src/postgres.rs similarity index 83% rename from crates/waymark/src/test_support/postgres.rs rename to crates/test-support/src/postgres.rs index 1fb9e50f..e76bf812 100644 --- a/crates/waymark/src/test_support/postgres.rs +++ b/crates/test-support/src/postgres.rs @@ -2,7 +2,7 @@ use sqlx::PgPool; -use crate::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres}; +use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres}; /// Ensure test Postgres is available and migrated, then return a pooled connection. pub async fn postgres_setup() -> PgPool { diff --git a/crates/waymark/Cargo.toml b/crates/waymark/Cargo.toml index d04b394a..91b817bf 100644 --- a/crates/waymark/Cargo.toml +++ b/crates/waymark/Cargo.toml @@ -17,8 +17,25 @@ name = "smoke" path = "src/bin/smoke.rs" [dependencies] -waymark-proto = { workspace = true, features = ["serde", "client", "server"] } +waymark-core-backend = { workspace = true } waymark-dag = { workspace = true } +waymark-ir-parser = { workspace = true } +waymark-observability = { workspace = true } +waymark-proto = { workspace = true, features = ["serde", "client", "server"] } +waymark-runner = { workspace = true } +waymark-runner-state = { workspace = true } +waymark-webapp-backend = { workspace = true } +waymark-webapp-core = { workspace = true } +waymark-garbage-collector-backend = { workspace = true } +waymark-scheduler-backend = { workspace = true } +waymark-scheduler-core = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-integration-support = { workspace = true } +waymark-backend-postgres = { workspace = true } +waymark-backend-postgres-migrations = { workspace = true } +waymark-workflow-registry-backend = { workspace = true } +waymark-worker-status-backend = { workspace = true } +waymark-backend-memory = { workspace = true } anyhow = "1" axum = "0.8" @@ -50,7 +67,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-chrome = "0.7" metrics = "0.24" regex = "1" -waymark-observability-macros = { path = "../observability-macros" } console-subscriber = { version = "0.5", optional = true } [features] @@ -58,6 +74,10 @@ trace = [] observability = ["trace", "dep:console-subscriber"] [dev-dependencies] +waymark-backend-fault-injection = { workspace = true } +waymark-backend-memory = { workspace = true } +waymark-test-support = { workspace = true } + serial_test = "2" tower = { version = "0.5", features = ["util"] } http-body-util = "0.1" diff --git a/crates/waymark/src/backends/base.rs b/crates/waymark/src/backends/base.rs deleted file mode 100644 index 92c17a3f..00000000 --- a/crates/waymark/src/backends/base.rs +++ /dev/null @@ -1,366 +0,0 @@ -//! Backend interfaces for persisting runner state and action results. - -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Deserializer, Serialize}; -use serde_json::Value; -use tonic::async_trait; -use uuid::Uuid; - -use crate::scheduler::{CreateScheduleParams, ScheduleId, WorkflowSchedule}; -use crate::waymark_core::runner::state::{ExecutionEdge, ExecutionNode, NodeStatus, RunnerState}; -use crate::webapp::{ - ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, - ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus, -}; -use waymark_dag::DAG; - -#[derive(Debug, thiserror::Error)] -pub enum BackendError { - #[error("{0}")] - Message(String), - #[error(transparent)] - Sqlx(#[from] sqlx::Error), - #[error(transparent)] - Serialization(#[from] serde_json::Error), -} - -pub type BackendResult = Result; - -fn default_instance_id() -> Uuid { - Uuid::new_v4() -} - -fn default_action_results() -> HashMap { - HashMap::new() -} - -fn deserialize_action_results<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Option::>::deserialize(deserializer)?; - Ok(value.unwrap_or_default()) -} - -// The models that we use for our backends are similar to the ones that we -// have specified in our database/Postgres backend, but not 1:1. It's better for -// us to internally convert within the given backend - -#[derive(Clone, Debug, Serialize, Deserialize)] -/// Queued instance payload for the run loop. -pub struct QueuedInstance { - pub workflow_version_id: Uuid, - #[serde(default)] - pub schedule_id: Option, - #[serde(skip, default)] - pub dag: Option>, - pub entry_node: Uuid, - pub state: Option, - #[serde( - default = "default_action_results", - deserialize_with = "deserialize_action_results" - )] - pub action_results: HashMap, - #[serde(default = "default_instance_id")] - pub instance_id: Uuid, - #[serde(default)] - pub scheduled_at: Option>, -} - -#[derive(Clone, Debug)] -/// Result payload for queued instance polling. -pub struct QueuedInstanceBatch { - pub instances: Vec, -} - -#[derive(Clone, Debug)] -/// Lock claim settings for owned instances. -pub struct LockClaim { - pub lock_uuid: Uuid, - pub lock_expires_at: DateTime, -} - -#[derive(Clone, Debug)] -/// Current lock status for an instance. -pub struct InstanceLockStatus { - pub instance_id: Uuid, - pub lock_uuid: Option, - pub lock_expires_at: Option>, -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -/// Completed instance payload with result or exception. -pub struct InstanceDone { - pub executor_id: Uuid, - pub entry_node: Uuid, - pub result: Option, - pub error: Option, -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -/// Batch payload representing an updated execution graph snapshot. -/// -/// This intentionally stores only runtime nodes and edges (no DAG template or -/// derived caches) so persistence stays lightweight. -pub struct GraphUpdate { - pub instance_id: Uuid, - pub nodes: HashMap, - pub edges: HashSet, -} - -impl GraphUpdate { - pub fn from_state(instance_id: Uuid, state: &RunnerState) -> Self { - Self { - instance_id, - nodes: state.nodes.clone(), - edges: state.edges.clone(), - } - } - - pub fn next_scheduled_at(&self) -> DateTime { - let mut next: Option> = None; - for node in self.nodes.values() { - if matches!(node.status, NodeStatus::Completed | NodeStatus::Failed) { - continue; - } - if let Some(scheduled_at) = node.scheduled_at { - next = Some(match next { - Some(existing) => existing.min(scheduled_at), - None => scheduled_at, - }); - } - } - next.unwrap_or_else(Utc::now) - } -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -/// Batch payload representing a finished action attempt (success or failure). -pub struct ActionDone { - pub execution_id: Uuid, - pub attempt: i32, - pub status: ActionAttemptStatus, - pub started_at: Option>, - pub completed_at: Option>, - pub duration_ms: Option, - pub result: Value, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ActionAttemptStatus { - Completed, - Failed, - TimedOut, -} - -impl std::fmt::Display for ActionAttemptStatus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Completed => write!(f, "completed"), - Self::Failed => write!(f, "failed"), - Self::TimedOut => write!(f, "timed_out"), - } - } -} - -/// Worker status update for persistence. -#[derive(Clone, Debug)] -pub struct WorkerStatusUpdate { - pub pool_id: Uuid, - pub throughput_per_min: f64, - pub total_completed: i64, - pub last_action_at: Option>, - pub median_dequeue_ms: Option, - pub median_handling_ms: Option, - pub dispatch_queue_size: i64, - pub total_in_flight: i64, - pub active_workers: i32, - pub actions_per_sec: f64, - pub median_instance_duration_secs: Option, - pub active_instance_count: i32, - pub total_instances_completed: i64, - pub instances_per_sec: f64, - pub instances_per_min: f64, - pub time_series: Option>, -} - -/// Backend capability for recording worker status metrics. -#[async_trait] -pub trait WorkerStatusBackend: Send + Sync { - async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>; -} - -/// Abstract persistence backend for runner state. -#[async_trait] -pub trait CoreBackend: Send + Sync { - fn clone_box(&self) -> Box; - - /// Persist updated execution graphs. - async fn save_graphs( - &self, - claim: LockClaim, - graphs: &[GraphUpdate], - ) -> BackendResult>; - - /// Persist finished action attempts (success or failure). - async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()>; - - /// Return up to size queued instances without blocking. - async fn get_queued_instances( - &self, - size: usize, - claim: LockClaim, - ) -> BackendResult; - - /// Refresh lock expiry for owned instances. - async fn refresh_instance_locks( - &self, - claim: LockClaim, - instance_ids: &[Uuid], - ) -> BackendResult>; - - /// Release instance locks when evicting from memory. - async fn release_instance_locks( - &self, - lock_uuid: Uuid, - instance_ids: &[Uuid], - ) -> BackendResult<()>; - - /// Persist completed workflow instances. - async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()>; - - /// Insert queued instances for run-loop consumption. - async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()>; -} - -/// Registration payload for storing workflow DAG metadata. -#[derive(Clone, Debug)] -pub struct WorkflowRegistration { - pub workflow_name: String, - pub workflow_version: String, - pub ir_hash: String, - pub program_proto: Vec, - pub concurrent: bool, -} - -#[derive(Clone, Debug)] -/// Stored workflow version metadata and IR payload. -pub struct WorkflowVersion { - pub id: Uuid, - pub workflow_name: String, - pub workflow_version: String, - pub ir_hash: String, - pub program_proto: Vec, - pub concurrent: bool, -} - -/// Backend capability for registering workflow DAGs. -#[async_trait] -pub trait WorkflowRegistryBackend: Send + Sync { - async fn upsert_workflow_version( - &self, - registration: &WorkflowRegistration, - ) -> BackendResult; - - async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult>; -} - -/// Backend capability for workflow schedule persistence. -#[async_trait] -pub trait SchedulerBackend: Send + Sync { - async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult; - async fn get_schedule(&self, id: ScheduleId) -> BackendResult; - async fn get_schedule_by_name( - &self, - workflow_name: &str, - schedule_name: &str, - ) -> BackendResult>; - async fn list_schedules(&self, limit: i64, offset: i64) - -> BackendResult>; - async fn count_schedules(&self) -> BackendResult; - async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult; - async fn delete_schedule(&self, id: ScheduleId) -> BackendResult; - async fn find_due_schedules(&self, limit: i32) -> BackendResult>; - async fn has_running_instance(&self, schedule_id: ScheduleId) -> BackendResult; - async fn mark_schedule_executed( - &self, - schedule_id: ScheduleId, - instance_id: Uuid, - ) -> BackendResult<()>; - async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()>; -} - -#[derive(Clone, Copy, Debug, Default)] -/// Summary of a garbage collection sweep. -pub struct GarbageCollectionResult { - pub deleted_instances: usize, - pub deleted_actions: usize, -} - -/// Backend capability for deleting old finished workflow data. -#[async_trait] -pub trait GarbageCollectorBackend: Send + Sync { - async fn collect_done_instances( - &self, - older_than: DateTime, - limit: usize, - ) -> BackendResult; -} - -/// Backend capability for webapp-specific queries. -#[async_trait] -pub trait WebappBackend: Send + Sync { - async fn count_instances(&self, search: Option<&str>) -> BackendResult; - async fn list_instances( - &self, - search: Option<&str>, - limit: i64, - offset: i64, - ) -> BackendResult>; - async fn get_instance(&self, instance_id: Uuid) -> BackendResult; - async fn get_execution_graph( - &self, - instance_id: Uuid, - ) -> BackendResult>; - async fn get_workflow_graph( - &self, - instance_id: Uuid, - ) -> BackendResult>; - async fn get_action_results(&self, instance_id: Uuid) -> BackendResult>; - async fn get_distinct_workflows(&self) -> BackendResult>; - async fn get_distinct_statuses(&self) -> BackendResult>; - async fn count_schedules(&self) -> BackendResult; - async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult>; - async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult; - async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult; - async fn list_schedule_invocations( - &self, - schedule_id: Uuid, - limit: i64, - offset: i64, - ) -> BackendResult>; - async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult; - async fn get_distinct_schedule_statuses(&self) -> BackendResult>; - async fn get_distinct_schedule_types(&self) -> BackendResult>; - async fn get_worker_action_stats( - &self, - window_minutes: i64, - ) -> BackendResult>; - async fn get_worker_aggregate_stats( - &self, - window_minutes: i64, - ) -> BackendResult; - async fn worker_status_table_exists(&self) -> bool; - async fn schedules_table_exists(&self) -> bool; - async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult>; -} - -impl Clone for Box { - fn clone(&self) -> Self { - self.clone_box() - } -} diff --git a/crates/waymark/src/backends/memory.rs b/crates/waymark/src/backends/memory.rs deleted file mode 100644 index c49bc6e0..00000000 --- a/crates/waymark/src/backends/memory.rs +++ /dev/null @@ -1,814 +0,0 @@ -//! In-memory backend that prints persistence operations. - -use std::collections::{HashMap, VecDeque}; -use std::sync::{Arc, Mutex}; - -use chrono::{DateTime, Utc}; -use uuid::Uuid; - -use super::base::{ - ActionDone, BackendError, BackendResult, CoreBackend, GarbageCollectionResult, - GarbageCollectorBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, - QueuedInstance, QueuedInstanceBatch, SchedulerBackend, WebappBackend, WorkerStatusBackend, - WorkerStatusUpdate, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, -}; -use crate::scheduler::compute_next_run; -use crate::scheduler::{CreateScheduleParams, ScheduleId, ScheduleType, WorkflowSchedule}; -use crate::webapp::{ - ExecutionGraphView, InstanceDetail, InstanceStatus, InstanceSummary, ScheduleDetail, - ScheduleInvocationSummary, ScheduleSummary, TimelineEntry, WorkerActionRow, - WorkerAggregateStats, WorkerStatus, -}; -use tonic::async_trait; - -type WorkflowVersionKey = (String, String); -type WorkflowVersionValue = (Uuid, WorkflowRegistration); -type WorkflowVersionStore = HashMap; -type InstanceLockStore = HashMap, Option>)>; - -/// Backend that stores updates in memory for tests or local runs. -#[derive(Clone)] -pub struct MemoryBackend { - instance_queue: Option>>>, - graph_updates: Arc>>, - actions_done: Arc>>, - instances_done: Arc>>, - worker_status_updates: Arc>>, - workflow_versions: Arc>, - schedules: Arc>>, - instance_locks: Arc>, -} - -impl Default for MemoryBackend { - fn default() -> Self { - Self { - instance_queue: None, - graph_updates: Arc::new(Mutex::new(Vec::new())), - actions_done: Arc::new(Mutex::new(Vec::new())), - instances_done: Arc::new(Mutex::new(Vec::new())), - worker_status_updates: Arc::new(Mutex::new(Vec::new())), - workflow_versions: Arc::new(Mutex::new(HashMap::new())), - schedules: Arc::new(Mutex::new(HashMap::new())), - instance_locks: Arc::new(Mutex::new(HashMap::new())), - } - } -} - -impl MemoryBackend { - pub fn new() -> Self { - Self::default() - } - - pub fn with_queue(queue: Arc>>) -> Self { - Self { - instance_queue: Some(queue), - ..Self::default() - } - } - - pub fn instance_queue(&self) -> Option>>> { - self.instance_queue.clone() - } - - pub fn graph_updates(&self) -> Vec { - self.graph_updates - .lock() - .expect("graph updates poisoned") - .clone() - } - - pub fn actions_done(&self) -> Vec { - self.actions_done - .lock() - .expect("actions done poisoned") - .clone() - } - - pub fn instances_done(&self) -> Vec { - self.instances_done - .lock() - .expect("instances done poisoned") - .clone() - } - - pub fn worker_status_updates(&self) -> Vec { - self.worker_status_updates - .lock() - .expect("worker status updates poisoned") - .clone() - } -} - -#[async_trait] -impl CoreBackend for MemoryBackend { - fn clone_box(&self) -> Box { - Box::new(self.clone()) - } - - async fn save_graphs( - &self, - claim: LockClaim, - graphs: &[GraphUpdate], - ) -> BackendResult> { - let mut stored = self.graph_updates.lock().expect("graph updates poisoned"); - stored.extend(graphs.iter().cloned()); - let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); - let mut locks = Vec::with_capacity(graphs.len()); - for graph in graphs { - if let Some((Some(lock_uuid), lock_expires_at)) = guard.get_mut(&graph.instance_id) - && *lock_uuid == claim.lock_uuid - && lock_expires_at.is_none_or(|expires_at| expires_at < claim.lock_expires_at) - { - *lock_expires_at = Some(claim.lock_expires_at); - } - let (lock_uuid, lock_expires_at) = guard - .get(&graph.instance_id) - .cloned() - .unwrap_or((None, None)); - locks.push(InstanceLockStatus { - instance_id: graph.instance_id, - lock_uuid, - lock_expires_at, - }); - } - Ok(locks) - } - - async fn save_actions_done(&self, actions: &[ActionDone]) -> BackendResult<()> { - let mut stored = self.actions_done.lock().expect("actions done poisoned"); - stored.extend(actions.iter().cloned()); - Ok(()) - } - - async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> { - let mut stored = self.instances_done.lock().expect("instances done poisoned"); - stored.extend(instances.iter().cloned()); - if !instances.is_empty() { - let mut locks = self.instance_locks.lock().expect("instance locks poisoned"); - for instance in instances { - locks.remove(&instance.executor_id); - } - } - Ok(()) - } - - async fn get_queued_instances( - &self, - size: usize, - claim: LockClaim, - ) -> BackendResult { - if size == 0 { - return Ok(QueuedInstanceBatch { - instances: Vec::new(), - }); - } - let queue = match &self.instance_queue { - Some(queue) => queue, - None => { - return Ok(QueuedInstanceBatch { - instances: Vec::new(), - }); - } - }; - let mut guard = queue.lock().expect("instance queue poisoned"); - let now = Utc::now(); - let mut instances = Vec::new(); - while instances.len() < size { - let Some(instance) = guard.front() else { - break; - }; - if let Some(scheduled_at) = instance.scheduled_at - && scheduled_at > now - { - break; - } - let instance = guard.pop_front().expect("instance queue empty"); - instances.push(instance); - } - if !instances.is_empty() { - let mut locks = self.instance_locks.lock().expect("instance locks poisoned"); - for instance in &instances { - locks.insert( - instance.instance_id, - (Some(claim.lock_uuid), Some(claim.lock_expires_at)), - ); - } - } - Ok(QueuedInstanceBatch { instances }) - } - - async fn queue_instances(&self, instances: &[QueuedInstance]) -> BackendResult<()> { - if instances.is_empty() { - return Ok(()); - } - let queue = self.instance_queue.as_ref().ok_or_else(|| { - BackendError::Message("memory backend missing instance queue".to_string()) - })?; - let mut guard = queue.lock().expect("instance queue poisoned"); - for instance in instances { - guard.push_back(instance.clone()); - } - Ok(()) - } - - async fn refresh_instance_locks( - &self, - claim: LockClaim, - instance_ids: &[Uuid], - ) -> BackendResult> { - let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); - let mut locks = Vec::new(); - for instance_id in instance_ids { - let entry = guard - .entry(*instance_id) - .or_insert((Some(claim.lock_uuid), Some(claim.lock_expires_at))); - if entry.0 == Some(claim.lock_uuid) { - entry.1 = Some(claim.lock_expires_at); - } - locks.push(InstanceLockStatus { - instance_id: *instance_id, - lock_uuid: entry.0, - lock_expires_at: entry.1, - }); - } - Ok(locks) - } - - async fn release_instance_locks( - &self, - lock_uuid: Uuid, - instance_ids: &[Uuid], - ) -> BackendResult<()> { - let mut guard = self.instance_locks.lock().expect("instance locks poisoned"); - for instance_id in instance_ids { - if let Some((current_lock, _)) = guard.get(instance_id) - && *current_lock == Some(lock_uuid) - { - guard.remove(instance_id); - } - } - Ok(()) - } -} - -#[async_trait] -impl WorkerStatusBackend for MemoryBackend { - async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()> { - let mut stored = self - .worker_status_updates - .lock() - .expect("worker status updates poisoned"); - stored.push(status.clone()); - Ok(()) - } -} - -#[async_trait] -impl WorkflowRegistryBackend for MemoryBackend { - async fn upsert_workflow_version( - &self, - registration: &WorkflowRegistration, - ) -> BackendResult { - let mut guard = self - .workflow_versions - .lock() - .expect("workflow versions poisoned"); - let key = ( - registration.workflow_name.clone(), - registration.workflow_version.clone(), - ); - if let Some((id, existing)) = guard.get(&key) { - if existing.ir_hash != registration.ir_hash { - return Err(BackendError::Message(format!( - "workflow version already exists with different IR hash: {}@{}", - registration.workflow_name, registration.workflow_version - ))); - } - return Ok(*id); - } - - let id = Uuid::new_v4(); - guard.insert(key, (id, registration.clone())); - Ok(id) - } - - async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult> { - if ids.is_empty() { - return Ok(Vec::new()); - } - let guard = self - .workflow_versions - .lock() - .expect("workflow versions poisoned"); - let mut versions = Vec::new(); - for (id, registration) in guard.values() { - if ids.contains(id) { - versions.push(WorkflowVersion { - id: *id, - workflow_name: registration.workflow_name.clone(), - workflow_version: registration.workflow_version.clone(), - ir_hash: registration.ir_hash.clone(), - program_proto: registration.program_proto.clone(), - concurrent: registration.concurrent, - }); - } - } - Ok(versions) - } -} - -#[async_trait] -impl SchedulerBackend for MemoryBackend { - async fn upsert_schedule(&self, params: &CreateScheduleParams) -> BackendResult { - let mut guard = self.schedules.lock().expect("schedules poisoned"); - let existing_schedule = guard.iter().find_map(|(id, schedule)| { - if schedule.workflow_name == params.workflow_name - && schedule.schedule_name == params.schedule_name - { - Some((*id, schedule.clone())) - } else { - None - } - }); - let schedule_id = existing_schedule - .as_ref() - .map(|(id, _)| *id) - .unwrap_or_else(ScheduleId::new); - let now = Utc::now(); - let next_run_at = match existing_schedule - .as_ref() - .and_then(|(_, schedule)| schedule.next_run_at) - { - Some(next_run_at) => Some(next_run_at), - None => Some( - compute_next_run( - params.schedule_type, - params.cron_expression.as_deref(), - params.interval_seconds, - params.jitter_seconds, - None, - ) - .map_err(BackendError::Message)?, - ), - }; - let schedule = WorkflowSchedule { - id: schedule_id.0, - workflow_name: params.workflow_name.clone(), - schedule_name: params.schedule_name.clone(), - schedule_type: params.schedule_type.as_str().to_string(), - cron_expression: params.cron_expression.clone(), - interval_seconds: params.interval_seconds, - jitter_seconds: params.jitter_seconds, - input_payload: params.input_payload.clone(), - status: "active".to_string(), - next_run_at, - last_run_at: existing_schedule - .as_ref() - .and_then(|(_, schedule)| schedule.last_run_at), - last_instance_id: existing_schedule - .as_ref() - .and_then(|(_, schedule)| schedule.last_instance_id), - created_at: existing_schedule - .as_ref() - .map(|(_, schedule)| schedule.created_at) - .unwrap_or(now), - updated_at: now, - priority: params.priority, - allow_duplicate: params.allow_duplicate, - }; - guard.insert(schedule_id, schedule); - Ok(schedule_id) - } - - async fn get_schedule(&self, id: ScheduleId) -> BackendResult { - let guard = self.schedules.lock().expect("schedules poisoned"); - guard - .get(&id) - .cloned() - .ok_or_else(|| BackendError::Message(format!("schedule not found: {id}"))) - } - - async fn get_schedule_by_name( - &self, - workflow_name: &str, - schedule_name: &str, - ) -> BackendResult> { - let guard = self.schedules.lock().expect("schedules poisoned"); - Ok(guard - .values() - .find(|schedule| { - schedule.workflow_name == workflow_name - && schedule.schedule_name == schedule_name - && schedule.status != "deleted" - }) - .cloned()) - } - - async fn list_schedules( - &self, - limit: i64, - offset: i64, - ) -> BackendResult> { - let guard = self.schedules.lock().expect("schedules poisoned"); - let mut schedules: Vec<_> = guard - .values() - .filter(|schedule| schedule.status != "deleted") - .cloned() - .collect(); - schedules.sort_by(|a, b| { - (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name)) - }); - let start = offset.max(0) as usize; - let end = start.saturating_add(limit.max(0) as usize); - Ok(schedules - .into_iter() - .skip(start) - .take(end - start) - .collect()) - } - - async fn count_schedules(&self) -> BackendResult { - let guard = self.schedules.lock().expect("schedules poisoned"); - Ok(guard - .values() - .filter(|schedule| schedule.status != "deleted") - .count() as i64) - } - - async fn update_schedule_status(&self, id: ScheduleId, status: &str) -> BackendResult { - let mut guard = self.schedules.lock().expect("schedules poisoned"); - if let Some(schedule) = guard.get_mut(&id) { - schedule.status = status.to_string(); - schedule.updated_at = Utc::now(); - Ok(true) - } else { - Ok(false) - } - } - - async fn delete_schedule(&self, id: ScheduleId) -> BackendResult { - SchedulerBackend::update_schedule_status(self, id, "deleted").await - } - - async fn find_due_schedules(&self, limit: i32) -> BackendResult> { - let guard = self.schedules.lock().expect("schedules poisoned"); - let now = Utc::now(); - let mut schedules: Vec<_> = guard - .values() - .filter(|schedule| { - schedule.status == "active" - && schedule - .next_run_at - .map(|next| next <= now) - .unwrap_or(false) - }) - .cloned() - .collect(); - schedules.sort_by_key(|schedule| schedule.next_run_at); - Ok(schedules.into_iter().take(limit as usize).collect()) - } - - async fn has_running_instance(&self, _schedule_id: ScheduleId) -> BackendResult { - Ok(false) - } - - async fn mark_schedule_executed( - &self, - schedule_id: ScheduleId, - instance_id: Uuid, - ) -> BackendResult<()> { - let mut guard = self.schedules.lock().expect("schedules poisoned"); - let schedule = guard - .get_mut(&schedule_id) - .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; - let schedule_type = ScheduleType::parse(&schedule.schedule_type) - .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?; - let next_run_at = compute_next_run( - schedule_type, - schedule.cron_expression.as_deref(), - schedule.interval_seconds, - schedule.jitter_seconds, - Some(Utc::now()), - ) - .map_err(BackendError::Message)?; - schedule.last_run_at = Some(Utc::now()); - schedule.last_instance_id = Some(instance_id); - schedule.next_run_at = Some(next_run_at); - schedule.updated_at = Utc::now(); - Ok(()) - } - - async fn skip_schedule_run(&self, schedule_id: ScheduleId) -> BackendResult<()> { - let mut guard = self.schedules.lock().expect("schedules poisoned"); - let schedule = guard - .get_mut(&schedule_id) - .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; - let schedule_type = ScheduleType::parse(&schedule.schedule_type) - .ok_or_else(|| BackendError::Message("invalid schedule type".to_string()))?; - let next_run_at = compute_next_run( - schedule_type, - schedule.cron_expression.as_deref(), - schedule.interval_seconds, - schedule.jitter_seconds, - Some(Utc::now()), - ) - .map_err(BackendError::Message)?; - schedule.next_run_at = Some(next_run_at); - schedule.updated_at = Utc::now(); - Ok(()) - } -} - -#[async_trait] -impl GarbageCollectorBackend for MemoryBackend { - async fn collect_done_instances( - &self, - _older_than: DateTime, - _limit: usize, - ) -> BackendResult { - Ok(GarbageCollectionResult::default()) - } -} - -#[async_trait] -impl WebappBackend for MemoryBackend { - async fn count_instances(&self, _search: Option<&str>) -> BackendResult { - Ok(0) - } - - async fn list_instances( - &self, - _search: Option<&str>, - _limit: i64, - _offset: i64, - ) -> BackendResult> { - Ok(Vec::new()) - } - - async fn get_instance(&self, instance_id: Uuid) -> BackendResult { - Err(BackendError::Message(format!( - "instance not found: {instance_id}" - ))) - } - - async fn get_execution_graph( - &self, - _instance_id: Uuid, - ) -> BackendResult> { - Ok(None) - } - - async fn get_workflow_graph( - &self, - _instance_id: Uuid, - ) -> BackendResult> { - Ok(None) - } - - async fn get_action_results(&self, _instance_id: Uuid) -> BackendResult> { - Ok(Vec::new()) - } - - async fn get_distinct_workflows(&self) -> BackendResult> { - Ok(Vec::new()) - } - - async fn get_distinct_statuses(&self) -> BackendResult> { - Ok(vec![ - InstanceStatus::Queued.to_string(), - InstanceStatus::Running.to_string(), - InstanceStatus::Completed.to_string(), - InstanceStatus::Failed.to_string(), - ]) - } - - async fn count_schedules(&self) -> BackendResult { - let guard = self.schedules.lock().expect("schedules poisoned"); - Ok(guard - .values() - .filter(|schedule| schedule.status != "deleted") - .count() as i64) - } - - async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult> { - let guard = self.schedules.lock().expect("schedules poisoned"); - let mut schedules: Vec<_> = guard - .values() - .filter(|schedule| schedule.status != "deleted") - .cloned() - .collect(); - schedules.sort_by(|a, b| { - (&a.workflow_name, &a.schedule_name).cmp(&(&b.workflow_name, &b.schedule_name)) - }); - - let start = offset.max(0) as usize; - let page_limit = limit.max(0) as usize; - Ok(schedules - .into_iter() - .skip(start) - .take(page_limit) - .map(|schedule| ScheduleSummary { - id: schedule.id.to_string(), - workflow_name: schedule.workflow_name, - schedule_name: schedule.schedule_name, - schedule_type: schedule.schedule_type, - cron_expression: schedule.cron_expression, - interval_seconds: schedule.interval_seconds, - status: schedule.status, - next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()), - last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()), - created_at: schedule.created_at.to_rfc3339(), - }) - .collect()) - } - - async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult { - let guard = self.schedules.lock().expect("schedules poisoned"); - let schedule = guard - .values() - .find(|schedule| schedule.id == schedule_id) - .cloned() - .ok_or_else(|| BackendError::Message(format!("schedule not found: {schedule_id}")))?; - - let input_payload = schedule.input_payload.as_ref().and_then(|bytes| { - rmp_serde::from_slice::(bytes) - .ok() - .and_then(|value| serde_json::to_string_pretty(&value).ok()) - }); - - Ok(ScheduleDetail { - id: schedule.id.to_string(), - workflow_name: schedule.workflow_name, - schedule_name: schedule.schedule_name, - schedule_type: schedule.schedule_type, - cron_expression: schedule.cron_expression, - interval_seconds: schedule.interval_seconds, - jitter_seconds: schedule.jitter_seconds, - status: schedule.status, - next_run_at: schedule.next_run_at.map(|dt| dt.to_rfc3339()), - last_run_at: schedule.last_run_at.map(|dt| dt.to_rfc3339()), - last_instance_id: schedule.last_instance_id.map(|id| id.to_string()), - created_at: schedule.created_at.to_rfc3339(), - updated_at: schedule.updated_at.to_rfc3339(), - priority: schedule.priority, - allow_duplicate: schedule.allow_duplicate, - input_payload, - }) - } - - async fn count_schedule_invocations(&self, _schedule_id: Uuid) -> BackendResult { - Ok(0) - } - - async fn list_schedule_invocations( - &self, - _schedule_id: Uuid, - _limit: i64, - _offset: i64, - ) -> BackendResult> { - Ok(Vec::new()) - } - - async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult { - let mut guard = self.schedules.lock().expect("schedules poisoned"); - let Some(schedule) = guard - .values_mut() - .find(|schedule| schedule.id == schedule_id) - else { - return Ok(false); - }; - schedule.status = status.to_string(); - schedule.updated_at = Utc::now(); - Ok(true) - } - - async fn get_distinct_schedule_statuses(&self) -> BackendResult> { - Ok(vec!["active".to_string(), "paused".to_string()]) - } - - async fn get_distinct_schedule_types(&self) -> BackendResult> { - Ok(vec!["cron".to_string(), "interval".to_string()]) - } - - async fn get_worker_action_stats( - &self, - _window_minutes: i64, - ) -> BackendResult> { - let statuses = latest_worker_statuses( - &self - .worker_status_updates - .lock() - .expect("worker status updates poisoned"), - ); - - Ok(statuses - .into_iter() - .map(|status| WorkerActionRow { - pool_id: status.pool_id.to_string(), - active_workers: status.active_workers as i64, - actions_per_sec: format!("{:.1}", status.actions_per_sec), - throughput_per_min: status.throughput_per_min as i64, - total_completed: status.total_completed, - median_dequeue_ms: status.median_dequeue_ms, - median_handling_ms: status.median_handling_ms, - last_action_at: status.last_action_at.map(|dt| dt.to_rfc3339()), - updated_at: status.updated_at.to_rfc3339(), - }) - .collect()) - } - - async fn get_worker_aggregate_stats( - &self, - _window_minutes: i64, - ) -> BackendResult { - let statuses = latest_worker_statuses( - &self - .worker_status_updates - .lock() - .expect("worker status updates poisoned"), - ); - - let active_worker_count = statuses - .iter() - .map(|status| status.active_workers as i64) - .sum(); - let total_in_flight = statuses - .iter() - .filter_map(|status| status.total_in_flight) - .sum(); - let total_queue_depth = statuses - .iter() - .filter_map(|status| status.dispatch_queue_size) - .sum(); - let actions_per_sec = statuses - .iter() - .map(|status| status.actions_per_sec) - .sum::(); - - Ok(WorkerAggregateStats { - active_worker_count, - actions_per_sec: format!("{:.1}", actions_per_sec), - total_in_flight, - total_queue_depth, - }) - } - - async fn worker_status_table_exists(&self) -> bool { - !self - .worker_status_updates - .lock() - .expect("worker status updates poisoned") - .is_empty() - } - - async fn schedules_table_exists(&self) -> bool { - !self - .schedules - .lock() - .expect("schedules poisoned") - .is_empty() - } - - async fn get_worker_statuses(&self, _window_minutes: i64) -> BackendResult> { - Ok(latest_worker_statuses( - &self - .worker_status_updates - .lock() - .expect("worker status updates poisoned"), - )) - } -} - -fn latest_worker_statuses(updates: &[WorkerStatusUpdate]) -> Vec { - let mut by_pool: HashMap = HashMap::new(); - for update in updates { - by_pool.insert(update.pool_id, update.clone()); - } - - let now = Utc::now(); - let mut statuses: Vec<_> = by_pool - .into_values() - .map(|status| WorkerStatus { - pool_id: status.pool_id, - active_workers: status.active_workers, - throughput_per_min: status.throughput_per_min, - actions_per_sec: status.actions_per_sec, - total_completed: status.total_completed, - last_action_at: status.last_action_at, - updated_at: now, - median_dequeue_ms: status.median_dequeue_ms, - median_handling_ms: status.median_handling_ms, - dispatch_queue_size: Some(status.dispatch_queue_size), - total_in_flight: Some(status.total_in_flight), - median_instance_duration_secs: status.median_instance_duration_secs, - active_instance_count: status.active_instance_count, - total_instances_completed: status.total_instances_completed, - instances_per_sec: status.instances_per_sec, - instances_per_min: status.instances_per_min, - time_series: status.time_series, - }) - .collect(); - - statuses.sort_by(|left, right| right.actions_per_sec.total_cmp(&left.actions_per_sec)); - statuses -} diff --git a/crates/waymark/src/backends/mod.rs b/crates/waymark/src/backends/mod.rs deleted file mode 100644 index 7fbd84ad..00000000 --- a/crates/waymark/src/backends/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! Backend implementations for runner persistence. - -mod base; -mod memory; -mod postgres; - -pub use base::{ - ActionAttemptStatus, ActionDone, BackendError, BackendResult, CoreBackend, - GarbageCollectionResult, GarbageCollectorBackend, GraphUpdate, InstanceDone, - InstanceLockStatus, LockClaim, QueuedInstance, QueuedInstanceBatch, SchedulerBackend, - WebappBackend, WorkerStatusBackend, WorkerStatusUpdate, WorkflowRegistration, - WorkflowRegistryBackend, WorkflowVersion, -}; -pub use memory::MemoryBackend; -pub use postgres::PostgresBackend; diff --git a/crates/waymark/src/bin/integration_test.rs b/crates/waymark/src/bin/integration_test.rs index 35f86fdf..e7faa3ae 100644 --- a/crates/waymark/src/bin/integration_test.rs +++ b/crates/waymark/src/bin/integration_test.rs @@ -19,17 +19,16 @@ use serde_json::Value; use sqlx::Row; use uuid::Uuid; -use waymark::backends::{ - CoreBackend, MemoryBackend, PostgresBackend, QueuedInstance, WorkflowRegistration, - WorkflowRegistryBackend, -}; -use waymark::db; -use waymark::integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres}; use waymark::messages::ast as ir; use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig}; -use waymark::waymark_core::runner::RunnerState; use waymark::workers::{PythonWorkerConfig, RemoteWorkerPool}; +use waymark_backend_memory::MemoryBackend; +use waymark_backend_postgres::PostgresBackend; +use waymark_core_backend::{CoreBackend, QueuedInstance}; use waymark_dag::{DAG, convert_to_dag}; +use waymark_integration_support::{LOCAL_POSTGRES_DSN, connect_pool, ensure_local_postgres}; +use waymark_runner_state::RunnerState; +use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend}; #[derive(Parser, Debug)] #[command(name = "integration_test")] @@ -452,7 +451,7 @@ async fn connect_postgres_backend() -> Result { let pool = connect_pool(&dsn) .await .with_context(|| format!("connect postgres backend: {dsn}"))?; - db::run_migrations(&pool) + waymark_backend_postgres_migrations::run(&pool) .await .context("run postgres migrations for integration runner")?; Ok(PostgresBackend::new(pool)) diff --git a/crates/waymark/src/bin/soak-harness.rs b/crates/waymark/src/bin/soak-harness.rs index 3503fe94..2bccbb4e 100644 --- a/crates/waymark/src/bin/soak-harness.rs +++ b/crates/waymark/src/bin/soak-harness.rs @@ -29,14 +29,13 @@ use tokio::process::{Child, Command}; use tracing::{error, info, warn}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use uuid::Uuid; -use waymark::backends::{ - PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend, -}; -use waymark::db; use waymark::messages::ast as ir; -use waymark::waymark_core::ir_parser::parse_program; -use waymark::waymark_core::runner::RunnerState; +use waymark_backend_postgres::PostgresBackend; +use waymark_core_backend::QueuedInstance; use waymark_dag::{DAG, convert_to_dag}; +use waymark_ir_parser::parse_program; +use waymark_runner_state::RunnerState; +use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _}; const DEFAULT_DSN: &str = "postgresql://waymark:waymark@127.0.0.1:5433/waymark"; const DEFAULT_WORKFLOW_NAME: &str = "waymark_soak_timeout_mix_v1"; @@ -287,7 +286,7 @@ async fn main() -> Result<()> { } let pool = wait_for_database(&args.dsn, DB_READY_TIMEOUT).await?; - db::run_migrations(&pool) + waymark_backend_postgres_migrations::run(&pool) .await .context("run migrations before soak")?; diff --git a/crates/waymark/src/bin/start-workers.rs b/crates/waymark/src/bin/start-workers.rs index 2bf9733d..8aa1c493 100644 --- a/crates/waymark/src/bin/start-workers.rs +++ b/crates/waymark/src/bin/start-workers.rs @@ -43,13 +43,12 @@ use tracing::{error, info, warn}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use uuid::Uuid; -use waymark::backends::PostgresBackend; use waymark::config::WorkerConfig; -use waymark::db; use waymark::messages::ast as ir; use waymark::scheduler::{DagResolver, WorkflowDag}; use waymark::waymark_core::runloop::{RunLoopSupervisorConfig, runloop_supervisor}; use waymark::{PythonWorkerConfig, RemoteWorkerPool, WebappServer, spawn_status_reporter}; +use waymark_backend_postgres::PostgresBackend; use waymark_dag::convert_to_dag; #[tokio::main] @@ -87,7 +86,7 @@ async fn main() -> Result<()> { // Initialize the database and backend. let pool = PgPool::connect(&config.database_url).await?; - db::run_migrations(&pool).await?; + waymark_backend_postgres_migrations::run(&pool).await?; let backend = PostgresBackend::new(pool); // Start the worker pool (bridge + python workers). diff --git a/crates/waymark/src/bin/waymark-bridge.rs b/crates/waymark/src/bin/waymark-bridge.rs index 1bc6ac18..878e6a6e 100644 --- a/crates/waymark/src/bin/waymark-bridge.rs +++ b/crates/waymark/src/bin/waymark-bridge.rs @@ -29,18 +29,22 @@ use tracing::{debug, info}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use uuid::Uuid; -use waymark::backends::{ - ActionDone, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone, - InstanceLockStatus, LockClaim, PostgresBackend, QueuedInstance, QueuedInstanceBatch, - SchedulerBackend, WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, -}; -use waymark::db; use waymark::messages::{self, ast as ir, proto}; -use waymark::scheduler::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType}; use waymark::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig}; -use waymark::waymark_core::runner::RunnerState; use waymark::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError}; +use waymark_backend_postgres::PostgresBackend; +use waymark_backends_core::{BackendError, BackendResult}; +use waymark_core_backend::{ + ActionDone, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, + QueuedInstance, QueuedInstanceBatch, +}; use waymark_dag::convert_to_dag; +use waymark_runner_state::RunnerState; +use waymark_scheduler_backend::SchedulerBackend as _; +use waymark_scheduler_core::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType}; +use waymark_workflow_registry_backend::{ + WorkflowRegistration, WorkflowRegistryBackend, WorkflowVersion, +}; const DEFAULT_GRPC_ADDR: &str = "127.0.0.1:24117"; @@ -52,7 +56,7 @@ struct WorkflowStore { impl WorkflowStore { async fn connect(dsn: &str) -> Result { let pool = PgPool::connect(dsn).await?; - db::run_migrations(&pool).await?; + waymark_backend_postgres_migrations::run(&pool).await?; let backend = PostgresBackend::new(pool); Ok(Self { backend }) } diff --git a/crates/waymark/src/db.rs b/crates/waymark/src/db.rs deleted file mode 100644 index f89f0e04..00000000 --- a/crates/waymark/src/db.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Database helpers shared across services. - -use sqlx::PgPool; - -use crate::backends::{BackendError, BackendResult}; - -/// Run the embedded SQLx migrations. -pub async fn run_migrations(pool: &PgPool) -> BackendResult<()> { - sqlx::migrate!() - .run(pool) - .await - .map_err(|err| BackendError::Message(err.to_string()))?; - Ok(()) -} diff --git a/crates/waymark/src/garbage_collector/task.rs b/crates/waymark/src/garbage_collector/task.rs index a96a280d..e1f673ef 100644 --- a/crates/waymark/src/garbage_collector/task.rs +++ b/crates/waymark/src/garbage_collector/task.rs @@ -6,8 +6,7 @@ use std::time::Duration; use chrono::Utc; use tracing::{debug, error, info}; - -use crate::backends::{GarbageCollectionResult, GarbageCollectorBackend}; +use waymark_garbage_collector_backend::{GarbageCollectionResult, GarbageCollectorBackend}; /// Configuration for the garbage collector task. #[derive(Debug, Clone)] @@ -120,9 +119,9 @@ mod tests { use chrono::{Duration as ChronoDuration, Utc}; use tonic::async_trait; + use waymark_backends_core::BackendResult; use super::*; - use crate::backends::{BackendResult, GarbageCollectorBackend}; #[derive(Clone)] struct StubGarbageCollectorBackend { diff --git a/crates/waymark/src/lib.rs b/crates/waymark/src/lib.rs index 66945900..568c1ba0 100644 --- a/crates/waymark/src/lib.rs +++ b/crates/waymark/src/lib.rs @@ -1,17 +1,12 @@ //! Waymark - worker pool infrastructure plus the core IR/runtime port. -pub mod backends; pub mod config; -pub mod db; pub mod garbage_collector; -pub mod integration_support; pub mod messages; pub mod observability; pub mod pool_status; pub mod scheduler; pub mod server_worker; -#[cfg(test)] -pub mod test_support; pub mod waymark_core; pub mod webapp; pub mod workers; @@ -21,10 +16,7 @@ pub use garbage_collector::{GarbageCollectorConfig, GarbageCollectorTask}; pub use messages::{MessageError, ast as ir_ast, proto, workflow_argument_value_to_json}; pub use observability::obs; pub use pool_status::{PoolTimeSeries, TimeSeriesEntry, TimeSeriesJsonEntry}; -pub use scheduler::{ - CreateScheduleParams, ScheduleId, ScheduleType, SchedulerConfig, SchedulerTask, - WorkflowSchedule, -}; +pub use scheduler::{SchedulerConfig, SchedulerTask}; pub use server_worker::{WorkerBridgeChannels, WorkerBridgeServer}; pub use webapp::{WebappConfig, WebappServer}; pub use workers::{ diff --git a/crates/waymark/src/observability.rs b/crates/waymark/src/observability.rs index dbb8a7af..49c1700f 100644 --- a/crates/waymark/src/observability.rs +++ b/crates/waymark/src/observability.rs @@ -1,6 +1,6 @@ //! Observability helpers for optional tracing instrumentation. -pub use waymark_observability_macros::obs; +pub use waymark_observability::obs; #[cfg(feature = "trace")] use std::sync::OnceLock; diff --git a/crates/waymark/src/scheduler/mod.rs b/crates/waymark/src/scheduler/mod.rs index d1ba2abb..d14adfc4 100644 --- a/crates/waymark/src/scheduler/mod.rs +++ b/crates/waymark/src/scheduler/mod.rs @@ -6,9 +6,5 @@ //! - Cron and interval utilities mod task; -mod types; -mod utils; pub use task::{DagResolver, SchedulerConfig, SchedulerTask, WorkflowDag}; -pub use types::{CreateScheduleParams, ScheduleId, ScheduleStatus, ScheduleType, WorkflowSchedule}; -pub use utils::{apply_jitter, compute_next_run, next_cron_run, next_interval_run, validate_cron}; diff --git a/crates/waymark/src/scheduler/task.rs b/crates/waymark/src/scheduler/task.rs index 9f9550cd..283745cc 100644 --- a/crates/waymark/src/scheduler/task.rs +++ b/crates/waymark/src/scheduler/task.rs @@ -9,9 +9,9 @@ use std::time::Duration; use serde_json::Value; use tracing::{debug, error, info}; use uuid::Uuid; +use waymark_core_backend::QueuedInstance; +use waymark_scheduler_core::{ScheduleId, WorkflowSchedule}; -use super::types::{ScheduleId, WorkflowSchedule}; -use crate::backends::{CoreBackend, QueuedInstance, SchedulerBackend}; use crate::messages; use crate::messages::ast as ir; use waymark_dag::DAG; @@ -53,7 +53,8 @@ pub struct SchedulerTask { impl SchedulerTask where - B: CoreBackend + SchedulerBackend + Clone + Send + Sync + 'static, + B: waymark_core_backend::CoreBackend + waymark_scheduler_backend::SchedulerBackend, + B: Clone + Send + Sync + 'static, { /// Run the scheduler loop. pub async fn run(self, shutdown: tokio_util::sync::WaitForCancellationFutureOwned) { @@ -153,12 +154,8 @@ where .as_ref() .ok_or_else(|| "DAG has no entry node".to_string())?; - let mut state = crate::waymark_core::runner::RunnerState::new( - Some(Arc::clone(&dag)), - None, - None, - false, - ); + let mut state = + waymark_runner_state::RunnerState::new(Some(Arc::clone(&dag)), None, None, false); if let Some(input_payload) = schedule.input_payload.as_deref() { let inputs = messages::workflow_arguments_to_json(input_payload) .ok_or_else(|| "failed to decode schedule input payload".to_string())?; @@ -278,14 +275,16 @@ mod tests { use chrono::{Duration as ChronoDuration, Utc}; use prost::Message; use serde_json::Value; + use waymark_backend_memory::MemoryBackend; + use waymark_core_backend::{CoreBackend, LockClaim}; + use waymark_scheduler_backend::SchedulerBackend; + use waymark_scheduler_core::{CreateScheduleParams, ScheduleType}; use super::*; - use crate::backends::{CoreBackend, LockClaim, MemoryBackend, SchedulerBackend}; use crate::messages::proto; - use crate::scheduler::{CreateScheduleParams, ScheduleType}; - use crate::waymark_core::ir_parser::parse_program; - use crate::waymark_core::runner::RunnerExecutor; use waymark_dag::convert_to_dag; + use waymark_ir_parser::parse_program; + use waymark_runner::RunnerExecutor; fn workflow_args_payload(key: &str, value: i64) -> Vec { proto::WorkflowArguments { @@ -374,11 +373,8 @@ fn main(input: [number], output: [result]): let state = queued.state.clone().expect("queued state"); let mut executor = RunnerExecutor::new(Arc::clone(&dag), state, queued.action_results.clone(), None); - let replay = crate::waymark_core::runner::replay_variables( - executor.state(), - executor.action_results(), - ) - .expect("replay inputs"); + let replay = waymark_runner::replay_variables(executor.state(), executor.action_results()) + .expect("replay inputs"); assert_eq!( replay.variables.get("number"), Some(&Value::Number(7.into())) diff --git a/crates/waymark/src/waymark_core/cli/benchmark.rs b/crates/waymark/src/waymark_core/cli/benchmark.rs index 99558eea..241afdd1 100644 --- a/crates/waymark/src/waymark_core/cli/benchmark.rs +++ b/crates/waymark/src/waymark_core/cli/benchmark.rs @@ -12,12 +12,11 @@ use serde_json::Value; use sha2::{Digest, Sha256}; use sqlx::PgPool; use uuid::Uuid; +use waymark_backend_postgres::PostgresBackend; +use waymark_core_backend::QueuedInstance; +use waymark_integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres}; +use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _}; -use crate::backends::{ - PostgresBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend, -}; -use crate::db; -use crate::integration_support::{LOCAL_POSTGRES_DSN, ensure_local_postgres}; use crate::messages::ast as ir; use crate::observability::obs; use crate::waymark_core::cli::smoke::{ @@ -25,9 +24,9 @@ use crate::waymark_core::cli::smoke::{ build_try_except_program, build_while_loop_program, literal_from_value, }; use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig}; -use crate::waymark_core::runner::RunnerState; use crate::workers::{ActionCallable, InlineWorkerPool, WorkerPoolError}; use waymark_dag::convert_to_dag; +use waymark_runner_state::RunnerState; const DEFAULT_DSN: &str = LOCAL_POSTGRES_DSN; const DEFAULT_MAX_CONCURRENT_INSTANCES: usize = 500; @@ -318,7 +317,9 @@ async fn run_benchmark( } let pool = PgPool::connect(dsn).await.expect("connect postgres"); drop_benchmark_tables(&pool).await; - db::run_migrations(&pool).await.expect("run migrations"); + waymark_backend_postgres_migrations::run(&pool) + .await + .expect("run migrations"); let backend = PostgresBackend::new(pool); backend.clear_all().await.expect("clear all"); let total = queue_benchmark_instances(&backend, &cases, count_per_case, batch_size).await; diff --git a/crates/waymark/src/waymark_core/cli/smoke.rs b/crates/waymark/src/waymark_core/cli/smoke.rs index 3625e952..bb5a49c3 100644 --- a/crates/waymark/src/waymark_core/cli/smoke.rs +++ b/crates/waymark/src/waymark_core/cli/smoke.rs @@ -11,18 +11,18 @@ use prost::Message; use serde_json::Value; use sha2::{Digest, Sha256}; use uuid::Uuid; +use waymark_backend_memory::MemoryBackend; +use waymark_core_backend::QueuedInstance; +use waymark_workflow_registry_backend::{WorkflowRegistration, WorkflowRegistryBackend as _}; -use crate::backends::{ - MemoryBackend, QueuedInstance, WorkflowRegistration, WorkflowRegistryBackend, -}; use crate::messages::ast as ir; use crate::waymark_core::dag_viz::render_dag_image; use crate::waymark_core::ir_format::format_program; -use crate::waymark_core::ir_parser::parse_program; use crate::waymark_core::runloop::{RunLoop, RunLoopSupervisorConfig}; -use crate::waymark_core::runner::RunnerState; use crate::workers::{PythonWorkerConfig, RemoteWorkerPool}; use waymark_dag::convert_to_dag; +use waymark_ir_parser::parse_program; +use waymark_runner_state::RunnerState; #[derive(Parser, Debug)] #[command(name = "waymark-smoke", about = "Smoke check core-python components.")] diff --git a/crates/waymark/src/waymark_core/ir_format.rs b/crates/waymark/src/waymark_core/ir_format.rs index 57a9460a..45795f0e 100644 --- a/crates/waymark/src/waymark_core/ir_format.rs +++ b/crates/waymark/src/waymark_core/ir_format.rs @@ -569,7 +569,7 @@ pub fn format_program(program: &ir::Program) -> String { #[cfg(test)] mod tests { use super::{DEFAULT_INDENT, format_program}; - use crate::waymark_core::ir_parser::IRParser; + use waymark_ir_parser::IRParser; #[test] fn test_format_program_happy_path() { diff --git a/crates/waymark/src/waymark_core/lock.rs b/crates/waymark/src/waymark_core/lock.rs index 6838407d..3f3acd5d 100644 --- a/crates/waymark/src/waymark_core/lock.rs +++ b/crates/waymark/src/waymark_core/lock.rs @@ -8,8 +8,7 @@ use chrono::{Duration as ChronoDuration, Utc}; use uuid::Uuid; use tracing::{debug, info, warn}; - -use crate::backends::{CoreBackend, LockClaim}; +use waymark_core_backend::LockClaim; #[derive(Clone)] pub struct InstanceLockTracker { @@ -60,7 +59,7 @@ impl InstanceLockTracker { } pub fn spawn_lock_heartbeat( - backend: Arc, + backend: Arc, tracker: InstanceLockTracker, heartbeat_interval: Duration, lock_ttl: Duration, diff --git a/crates/waymark/src/waymark_core/mod.rs b/crates/waymark/src/waymark_core/mod.rs index 85f0c008..d5a4ec27 100644 --- a/crates/waymark/src/waymark_core/mod.rs +++ b/crates/waymark/src/waymark_core/mod.rs @@ -4,14 +4,10 @@ pub mod cli; pub mod commit_barrier; pub mod dag_viz; pub mod ir_format; -pub mod ir_parser; pub mod lock; pub mod runloop; -pub mod runner; -pub use crate::backends::{InstanceDone, QueuedInstance}; pub use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, InlineWorkerPool}; pub use dag_viz::{build_dag_graph, render_dag_image}; pub use ir_format::format_program; pub use runloop::RunLoop; -pub use runner::RunnerState; diff --git a/crates/waymark/src/waymark_core/runloop.rs b/crates/waymark/src/waymark_core/runloop.rs index 6257bda0..407e4a3b 100644 --- a/crates/waymark/src/waymark_core/runloop.rs +++ b/crates/waymark/src/waymark_core/runloop.rs @@ -15,24 +15,26 @@ use serde_json::Value; use tokio::sync::mpsc; use tracing::{debug, error, info, warn}; use uuid::Uuid; - -use crate::backends::{ - ActionDone, BackendError, CoreBackend, GraphUpdate, InstanceDone, InstanceLockStatus, - LockClaim, QueuedInstance, QueuedInstanceBatch, WorkflowRegistryBackend, +use waymark_backends_core::BackendError; +use waymark_core_backend::{ + ActionDone, GraphUpdate, InstanceDone, InstanceLockStatus, LockClaim, QueuedInstance, + QueuedInstanceBatch, }; +use waymark_workflow_registry_backend::WorkflowRegistryBackend; + use crate::messages::ast as ir; use crate::observability::obs; use crate::waymark_core::commit_barrier::{CommitBarrier, DeferredInstanceEvent}; use crate::waymark_core::lock::{InstanceLockTracker, spawn_lock_heartbeat}; -use crate::waymark_core::runner::synthetic_exceptions::{ +use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError}; +use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag}; +use waymark_runner::synthetic_exceptions::{ SyntheticExceptionType, build_synthetic_exception_value, }; -use crate::waymark_core::runner::{ +use waymark_runner::{ DurableUpdates, ExecutorStep, RunnerExecutor, RunnerExecutorError, SleepRequest, replay_variables, }; -use crate::workers::{ActionCompletion, ActionRequest, BaseWorkerPool, WorkerPoolError}; -use waymark_dag::{DAG, DAGNode, OutputNode, ReturnNode, convert_to_dag}; /// Raised when the run loop cannot coordinate execution. #[derive(Debug, thiserror::Error)] @@ -351,7 +353,7 @@ impl ShardExecutor { fn run_executor_shard( shard_id: usize, - backend: Arc, + backend: Arc, receiver: std_mpsc::Receiver, sender: mpsc::UnboundedSender, ) { @@ -530,7 +532,7 @@ fn run_executor_shard( /// Run loop that fans out executor work across CPU-bound shard threads. pub struct RunLoop { worker_pool: Arc, - core_backend: Arc, + core_backend: Arc, registry_backend: Arc, workflow_cache: HashMap>, max_concurrent_instances: usize, @@ -566,7 +568,7 @@ pub struct RunLoopSupervisorConfig { impl RunLoop { pub fn new( worker_pool: impl BaseWorkerPool + 'static, - backend: impl CoreBackend + WorkflowRegistryBackend + 'static, + backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static, config: RunLoopSupervisorConfig, ) -> Self { Self::new_internal( @@ -580,7 +582,7 @@ impl RunLoop { pub fn new_with_shutdown( worker_pool: impl BaseWorkerPool + 'static, - backend: impl CoreBackend + WorkflowRegistryBackend + 'static, + backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static, config: RunLoopSupervisorConfig, shutdown_token: tokio_util::sync::CancellationToken, ) -> Self { @@ -589,14 +591,14 @@ impl RunLoop { fn new_internal( worker_pool: impl BaseWorkerPool + 'static, - backend: impl CoreBackend + WorkflowRegistryBackend + 'static, + backend: impl waymark_core_backend::CoreBackend + WorkflowRegistryBackend + 'static, config: RunLoopSupervisorConfig, shutdown_token: tokio_util::sync::CancellationToken, exit_on_idle: bool, ) -> Self { let max_concurrent_instances = std::cmp::max(1, config.max_concurrent_instances); let backend = Arc::new(backend); - let core_backend: Arc = backend.clone(); + let core_backend: Arc = backend.clone(); let registry_backend: Arc = backend; Self { worker_pool: Arc::new(worker_pool), @@ -1766,7 +1768,7 @@ pub async fn runloop_supervisor( config: RunLoopSupervisorConfig, shutdown_token: tokio_util::sync::CancellationToken, ) where - B: CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static, + B: waymark_core_backend::CoreBackend + WorkflowRegistryBackend + Clone + Send + Sync + 'static, W: BaseWorkerPool + Clone + Send + Sync + 'static, { let mut backoff = Duration::from_millis(200); diff --git a/crates/waymark/src/waymark_core/runloop/tests.rs b/crates/waymark/src/waymark_core/runloop/tests.rs index 24b0ea34..53d6c634 100644 --- a/crates/waymark/src/waymark_core/runloop/tests.rs +++ b/crates/waymark/src/waymark_core/runloop/tests.rs @@ -1,141 +1,23 @@ use super::*; use std::collections::{HashMap, VecDeque}; -use std::sync::{ - Arc, Mutex, - atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}, -}; +use std::sync::{Arc, Mutex}; use std::time::Duration; use chrono::Utc; use prost::Message; use sha2::{Digest, Sha256}; -use tonic::async_trait; +use waymark_backend_fault_injection::FaultInjectingBackend; +use waymark_backend_memory::MemoryBackend; +use waymark_core_backend::{ActionAttemptStatus, CoreBackend}; +use waymark_workflow_registry_backend::WorkflowRegistration; -use crate::backends::{ - ActionAttemptStatus, BackendError, BackendResult, CoreBackend, GraphUpdate, InstanceDone, - InstanceLockStatus, LockClaim, MemoryBackend, QueuedInstanceBatch, WorkflowRegistration, - WorkflowRegistryBackend, WorkflowVersion, -}; use crate::messages::ast as ir; -use crate::waymark_core::ir_parser::parse_program; -use crate::waymark_core::runner::RunnerState; -use crate::waymark_core::runner::state::NodeStatus; use crate::workers::ActionCallable; -use waymark_dag::convert_to_dag; - -#[derive(Clone)] -struct FaultInjectingBackend { - inner: MemoryBackend, - fail_get_queued_instances_with_depth_limit: Arc, - get_queued_instances_calls: Arc, -} - -impl FaultInjectingBackend { - fn with_depth_limit_poll_failures(inner: MemoryBackend) -> Self { - Self { - inner, - fail_get_queued_instances_with_depth_limit: Arc::new(AtomicBool::new(true)), - get_queued_instances_calls: Arc::new(AtomicUsize::new(0)), - } - } - - fn get_queued_instances_calls(&self) -> usize { - self.get_queued_instances_calls.load(AtomicOrdering::SeqCst) - } - - fn queue_len(&self) -> usize { - self.inner - .instance_queue() - .as_ref() - .map(|queue| queue.lock().expect("queue poisoned").len()) - .unwrap_or(0) - } - - fn instances_done_len(&self) -> usize { - self.inner.instances_done().len() - } -} - -#[async_trait] -impl CoreBackend for FaultInjectingBackend { - fn clone_box(&self) -> Box { - Box::new(self.clone()) - } - - async fn save_graphs( - &self, - claim: LockClaim, - graphs: &[GraphUpdate], - ) -> BackendResult> { - self.inner.save_graphs(claim, graphs).await - } - - async fn save_actions_done( - &self, - actions: &[crate::backends::ActionDone], - ) -> BackendResult<()> { - self.inner.save_actions_done(actions).await - } - async fn save_instances_done(&self, instances: &[InstanceDone]) -> BackendResult<()> { - self.inner.save_instances_done(instances).await - } - - async fn get_queued_instances( - &self, - size: usize, - claim: LockClaim, - ) -> BackendResult { - self.get_queued_instances_calls - .fetch_add(1, AtomicOrdering::SeqCst); - if self - .fail_get_queued_instances_with_depth_limit - .load(AtomicOrdering::SeqCst) - { - return Err(BackendError::Message("depth limit exceeded".to_string())); - } - self.inner.get_queued_instances(size, claim).await - } - - async fn queue_instances( - &self, - instances: &[crate::backends::QueuedInstance], - ) -> BackendResult<()> { - self.inner.queue_instances(instances).await - } - - async fn refresh_instance_locks( - &self, - claim: LockClaim, - instance_ids: &[Uuid], - ) -> BackendResult> { - self.inner.refresh_instance_locks(claim, instance_ids).await - } - - async fn release_instance_locks( - &self, - lock_uuid: Uuid, - instance_ids: &[Uuid], - ) -> BackendResult<()> { - self.inner - .release_instance_locks(lock_uuid, instance_ids) - .await - } -} - -#[async_trait] -impl WorkflowRegistryBackend for FaultInjectingBackend { - async fn upsert_workflow_version( - &self, - registration: &WorkflowRegistration, - ) -> BackendResult { - self.inner.upsert_workflow_version(registration).await - } - - async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult> { - self.inner.get_workflow_versions(ids).await - } -} +use waymark_dag::convert_to_dag; +use waymark_ir_parser::parse_program; +use waymark_runner_state::NodeStatus; +use waymark_runner_state::RunnerState; fn default_test_config(lock_uuid: Uuid) -> RunLoopSupervisorConfig { RunLoopSupervisorConfig { diff --git a/crates/waymark/src/webapp/server.rs b/crates/waymark/src/webapp/server.rs index 43818ca3..9afda350 100644 --- a/crates/waymark/src/webapp/server.rs +++ b/crates/waymark/src/webapp/server.rs @@ -16,12 +16,15 @@ use tera::{Context as TeraContext, Tera}; use tokio::net::TcpListener; use tracing::{error, info}; use uuid::Uuid; +use waymark_webapp_backend::WebappBackend; +use waymark_webapp_core::WorkerStatus; -use super::types::{ +use waymark_webapp_core::{ ActionLogsResponse, FilterValuesResponse, HealthResponse, InstanceExportInfo, TimelineEntry, - WebappConfig, WorkflowInstanceExport, WorkflowRunDataResponse, + WorkflowInstanceExport, WorkflowRunDataResponse, }; -use crate::backends::WebappBackend; + +use crate::WebappConfig; // Embed templates at compile time const TEMPLATE_BASE: &str = include_str!("../../templates/base.html"); @@ -367,7 +370,7 @@ async fn get_action_logs( let logs: Vec<_> = timeline .into_iter() .filter(|e| e.action_id == action_id_str) - .map(|e| super::types::ActionLogEntry { + .map(|e| waymark_webapp_core::ActionLogEntry { action_id: e.action_id, action_name: e.action_name, module_name: e.module_name, @@ -736,7 +739,7 @@ struct InvocationRow { fn render_invocations_page( templates: &Tera, - instances: &[super::types::InstanceSummary], + instances: &[waymark_webapp_core::InstanceSummary], current_page: i64, total_pages: i64, search_query: Option, @@ -812,8 +815,8 @@ struct GraphNode { fn render_instance_detail_page( templates: &Tera, - instance: &super::types::InstanceDetail, - graph: Option, + instance: &waymark_webapp_core::InstanceDetail, + graph: Option, ) -> String { let graph_data = graph .as_ref() @@ -843,8 +846,8 @@ fn render_instance_detail_page( render_template(templates, "workflow_run.html", &context) } -fn build_graph_data(graph: &super::types::ExecutionGraphView) -> GraphData { - let action_nodes: Vec<&super::types::ExecutionNodeView> = graph +fn build_graph_data(graph: &waymark_webapp_core::ExecutionGraphView) -> GraphData { + let action_nodes: Vec<&waymark_webapp_core::ExecutionNodeView> = graph .nodes .iter() .filter(|node| is_action_node(&node.node_type)) @@ -1055,7 +1058,7 @@ struct ScheduleRow { fn render_schedules_page( templates: &Tera, - schedules: &[super::types::ScheduleSummary], + schedules: &[waymark_webapp_core::ScheduleSummary], current_page: i64, total_pages: i64, total_count: i64, @@ -1136,8 +1139,8 @@ struct ScheduleInvocationRow { fn render_schedule_detail_page( templates: &Tera, - schedule: &super::types::ScheduleDetail, - invocations: &[super::types::ScheduleInvocationSummary], + schedule: &waymark_webapp_core::ScheduleDetail, + invocations: &[waymark_webapp_core::ScheduleInvocationSummary], current_page: i64, total_pages: i64, ) -> String { @@ -1234,11 +1237,7 @@ struct WorkerInstanceRowView { updated_at: String, } -fn render_workers_page( - templates: &Tera, - statuses: &[super::WorkerStatus], - window_minutes: i64, -) -> String { +fn render_workers_page(templates: &Tera, statuses: &[WorkerStatus], window_minutes: i64) -> String { use crate::pool_status::PoolTimeSeries; // Build action rows @@ -1373,13 +1372,15 @@ mod tests { use sqlx::postgres::PgPoolOptions; use tower::util::ServiceExt; use uuid::Uuid; + use waymark_backend_memory::MemoryBackend; + use waymark_backend_postgres::PostgresBackend; + use waymark_webapp_backend::WebappBackend; + use waymark_worker_status_backend::{WorkerStatusBackend as _, WorkerStatusUpdate}; use super::{WebappState, build_graph_data, build_router, init_templates}; - use crate::backends::{ - MemoryBackend, PostgresBackend, WebappBackend, WorkerStatusBackend, WorkerStatusUpdate, - }; - use crate::test_support::postgres_setup; - use crate::webapp::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView}; + + use waymark_test_support::postgres_setup; + use waymark_webapp_core::{ExecutionEdgeView, ExecutionGraphView, ExecutionNodeView}; #[test] fn build_graph_data_projects_internal_nodes_to_action_dependencies() { diff --git a/crates/waymark/src/webapp/types.rs b/crates/waymark/src/webapp/types.rs index 7805c428..0b2ec6e8 100644 --- a/crates/waymark/src/webapp/types.rs +++ b/crates/waymark/src/webapp/types.rs @@ -1,8 +1,4 @@ -//! Shared types for the webapp. - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; +//! Shared types for the webapp server. /// Configuration for the webapp server. #[derive(Debug, Clone)] @@ -55,245 +51,3 @@ impl WebappConfig { format!("{}:{}", self.host, self.port) } } - -/// Instance status. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum InstanceStatus { - Queued, - Running, - Completed, - Failed, -} - -impl std::fmt::Display for InstanceStatus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Queued => write!(f, "queued"), - Self::Running => write!(f, "running"), - Self::Completed => write!(f, "completed"), - Self::Failed => write!(f, "failed"), - } - } -} - -/// Summary of a workflow instance for listing. -#[derive(Debug, Clone, Serialize)] -pub struct InstanceSummary { - pub id: Uuid, - pub entry_node: Uuid, - pub created_at: DateTime, - pub status: InstanceStatus, - pub workflow_name: Option, - pub input_preview: String, -} - -/// Full details of a workflow instance. -#[derive(Debug, Clone, Serialize)] -pub struct InstanceDetail { - pub id: Uuid, - pub entry_node: Uuid, - pub created_at: DateTime, - pub status: InstanceStatus, - pub workflow_name: Option, - pub input_payload: String, - pub result_payload: String, - pub error_payload: Option, -} - -/// Node in the execution graph for display. -#[derive(Debug, Clone, Serialize)] -pub struct ExecutionNodeView { - pub id: String, - pub node_type: String, - pub label: String, - pub status: String, - pub action_name: Option, - pub module_name: Option, -} - -/// Edge in the execution graph for display. -#[derive(Debug, Clone, Serialize)] -pub struct ExecutionEdgeView { - pub source: String, - pub target: String, - pub edge_type: String, -} - -/// Execution graph data for rendering. -#[derive(Debug, Clone, Serialize)] -pub struct ExecutionGraphView { - pub nodes: Vec, - pub edges: Vec, -} - -/// Timeline entry for an action execution. -#[derive(Debug, Clone, Serialize)] -pub struct TimelineEntry { - pub action_id: String, - pub action_name: String, - pub module_name: Option, - pub status: String, - pub attempt_number: i32, - pub dispatched_at: Option, - pub completed_at: Option, - pub duration_ms: Option, - pub request_preview: String, - pub response_preview: String, - pub error: Option, -} - -/// Action log entry with full details. -#[derive(Debug, Clone, Serialize)] -pub struct ActionLogEntry { - pub action_id: String, - pub action_name: String, - pub module_name: Option, - pub status: String, - pub attempt_number: i32, - pub dispatched_at: Option, - pub completed_at: Option, - pub duration_ms: Option, - pub request: String, - pub response: String, - pub error: Option, -} - -/// Response for the workflow run data API. -#[derive(Debug, Serialize)] -pub struct WorkflowRunDataResponse { - pub nodes: Vec, - pub timeline: Vec, - pub page: i64, - pub per_page: i64, - pub total: i64, - pub has_more: bool, -} - -/// Response for action logs API. -#[derive(Debug, Serialize)] -pub struct ActionLogsResponse { - pub logs: Vec, -} - -/// Filter values response. -#[derive(Debug, Serialize)] -pub struct FilterValuesResponse { - pub values: Vec, -} - -/// Health check response. -#[derive(Debug, Serialize)] -pub struct HealthResponse { - pub status: &'static str, - pub service: &'static str, -} - -/// Export format for a workflow instance. -#[derive(Debug, Serialize)] -pub struct WorkflowInstanceExport { - pub export_version: &'static str, - pub exported_at: String, - pub instance: InstanceExportInfo, - pub nodes: Vec, - pub timeline: Vec, -} - -/// Full worker status for webapp display. -#[derive(Debug, Clone)] -pub struct WorkerStatus { - pub pool_id: Uuid, - pub active_workers: i32, - pub throughput_per_min: f64, - pub actions_per_sec: f64, - pub total_completed: i64, - pub last_action_at: Option>, - pub updated_at: DateTime, - pub median_dequeue_ms: Option, - pub median_handling_ms: Option, - pub dispatch_queue_size: Option, - pub total_in_flight: Option, - pub median_instance_duration_secs: Option, - pub active_instance_count: i32, - pub total_instances_completed: i64, - pub instances_per_sec: f64, - pub instances_per_min: f64, - pub time_series: Option>, -} - -/// Worker action stats row for display. -#[derive(Debug, Clone)] -pub struct WorkerActionRow { - pub pool_id: String, - pub active_workers: i64, - pub actions_per_sec: String, - pub throughput_per_min: i64, - pub total_completed: i64, - pub median_dequeue_ms: Option, - pub median_handling_ms: Option, - pub last_action_at: Option, - pub updated_at: String, -} - -/// Aggregate worker stats for overview cards. -#[derive(Debug, Clone)] -pub struct WorkerAggregateStats { - pub active_worker_count: i64, - pub actions_per_sec: String, - pub total_in_flight: i64, - pub total_queue_depth: i64, -} - -/// Instance info for export. -#[derive(Debug, Serialize)] -pub struct InstanceExportInfo { - pub id: String, - pub status: String, - pub created_at: String, - pub input_payload: String, - pub result_payload: String, -} - -/// Schedule summary for listing. -#[derive(Debug, Clone, Serialize)] -pub struct ScheduleSummary { - pub id: String, - pub workflow_name: String, - pub schedule_name: String, - pub schedule_type: String, - pub cron_expression: Option, - pub interval_seconds: Option, - pub status: String, - pub next_run_at: Option, - pub last_run_at: Option, - pub created_at: String, -} - -/// Full schedule details. -#[derive(Debug, Clone, Serialize)] -pub struct ScheduleDetail { - pub id: String, - pub workflow_name: String, - pub schedule_name: String, - pub schedule_type: String, - pub cron_expression: Option, - pub interval_seconds: Option, - pub jitter_seconds: i64, - pub status: String, - pub next_run_at: Option, - pub last_run_at: Option, - pub last_instance_id: Option, - pub created_at: String, - pub updated_at: String, - pub priority: i32, - pub allow_duplicate: bool, - pub input_payload: Option, -} - -/// Invocation summary row for schedule detail pages. -#[derive(Debug, Clone, Serialize)] -pub struct ScheduleInvocationSummary { - pub id: Uuid, - pub created_at: DateTime, - pub status: InstanceStatus, -} diff --git a/crates/waymark/src/workers/status.rs b/crates/waymark/src/workers/status.rs index c9428602..03fec671 100644 --- a/crates/waymark/src/workers/status.rs +++ b/crates/waymark/src/workers/status.rs @@ -9,8 +9,8 @@ use std::time::Duration; use chrono::{DateTime, Utc}; use tracing::{info, warn}; use uuid::Uuid; +use waymark_worker_status_backend::{WorkerStatusBackend, WorkerStatusUpdate}; -use crate::backends::{WorkerStatusBackend, WorkerStatusUpdate}; use crate::pool_status::{PoolTimeSeries, TimeSeriesEntry}; #[derive(Debug, Clone)] diff --git a/crates/webapp-backend/Cargo.toml b/crates/webapp-backend/Cargo.toml new file mode 100644 index 00000000..735810b3 --- /dev/null +++ b/crates/webapp-backend/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "waymark-webapp-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +waymark-backends-core = { workspace = true } +waymark-webapp-core = { workspace = true } diff --git a/crates/webapp-backend/src/lib.rs b/crates/webapp-backend/src/lib.rs new file mode 100644 index 00000000..bc8f365c --- /dev/null +++ b/crates/webapp-backend/src/lib.rs @@ -0,0 +1,54 @@ +use uuid::Uuid; +use waymark_backends_core::BackendResult; +use waymark_webapp_core::{ + ExecutionGraphView, InstanceDetail, InstanceSummary, ScheduleDetail, ScheduleInvocationSummary, + ScheduleSummary, TimelineEntry, WorkerActionRow, WorkerAggregateStats, WorkerStatus, +}; + +/// Backend capability for webapp-specific queries. +#[async_trait::async_trait] +pub trait WebappBackend: Send + Sync { + async fn count_instances(&self, search: Option<&str>) -> BackendResult; + async fn list_instances( + &self, + search: Option<&str>, + limit: i64, + offset: i64, + ) -> BackendResult>; + async fn get_instance(&self, instance_id: Uuid) -> BackendResult; + async fn get_execution_graph( + &self, + instance_id: Uuid, + ) -> BackendResult>; + async fn get_workflow_graph( + &self, + instance_id: Uuid, + ) -> BackendResult>; + async fn get_action_results(&self, instance_id: Uuid) -> BackendResult>; + async fn get_distinct_workflows(&self) -> BackendResult>; + async fn get_distinct_statuses(&self) -> BackendResult>; + async fn count_schedules(&self) -> BackendResult; + async fn list_schedules(&self, limit: i64, offset: i64) -> BackendResult>; + async fn get_schedule(&self, schedule_id: Uuid) -> BackendResult; + async fn count_schedule_invocations(&self, schedule_id: Uuid) -> BackendResult; + async fn list_schedule_invocations( + &self, + schedule_id: Uuid, + limit: i64, + offset: i64, + ) -> BackendResult>; + async fn update_schedule_status(&self, schedule_id: Uuid, status: &str) -> BackendResult; + async fn get_distinct_schedule_statuses(&self) -> BackendResult>; + async fn get_distinct_schedule_types(&self) -> BackendResult>; + async fn get_worker_action_stats( + &self, + window_minutes: i64, + ) -> BackendResult>; + async fn get_worker_aggregate_stats( + &self, + window_minutes: i64, + ) -> BackendResult; + async fn worker_status_table_exists(&self) -> bool; + async fn schedules_table_exists(&self) -> bool; + async fn get_worker_statuses(&self, window_minutes: i64) -> BackendResult>; +} diff --git a/crates/webapp-core/Cargo.toml b/crates/webapp-core/Cargo.toml new file mode 100644 index 00000000..2b51dc6d --- /dev/null +++ b/crates/webapp-core/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "waymark-webapp-core" +version = "0.1.0" +edition = "2024" + +[dependencies] +uuid = { workspace = true, features = ["serde"] } +chrono = { workspace = true, features = ["serde"] } +serde = { workspace = true, features = ["derive"] } diff --git a/crates/webapp-core/src/lib.rs b/crates/webapp-core/src/lib.rs new file mode 100644 index 00000000..61a4a453 --- /dev/null +++ b/crates/webapp-core/src/lib.rs @@ -0,0 +1,247 @@ +//! Shared types for the webapp. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Instance status. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum InstanceStatus { + Queued, + Running, + Completed, + Failed, +} + +impl std::fmt::Display for InstanceStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Queued => write!(f, "queued"), + Self::Running => write!(f, "running"), + Self::Completed => write!(f, "completed"), + Self::Failed => write!(f, "failed"), + } + } +} + +/// Summary of a workflow instance for listing. +#[derive(Debug, Clone, Serialize)] +pub struct InstanceSummary { + pub id: Uuid, + pub entry_node: Uuid, + pub created_at: DateTime, + pub status: InstanceStatus, + pub workflow_name: Option, + pub input_preview: String, +} + +/// Full details of a workflow instance. +#[derive(Debug, Clone, Serialize)] +pub struct InstanceDetail { + pub id: Uuid, + pub entry_node: Uuid, + pub created_at: DateTime, + pub status: InstanceStatus, + pub workflow_name: Option, + pub input_payload: String, + pub result_payload: String, + pub error_payload: Option, +} + +/// Node in the execution graph for display. +#[derive(Debug, Clone, Serialize)] +pub struct ExecutionNodeView { + pub id: String, + pub node_type: String, + pub label: String, + pub status: String, + pub action_name: Option, + pub module_name: Option, +} + +/// Edge in the execution graph for display. +#[derive(Debug, Clone, Serialize)] +pub struct ExecutionEdgeView { + pub source: String, + pub target: String, + pub edge_type: String, +} + +/// Execution graph data for rendering. +#[derive(Debug, Clone, Serialize)] +pub struct ExecutionGraphView { + pub nodes: Vec, + pub edges: Vec, +} + +/// Timeline entry for an action execution. +#[derive(Debug, Clone, Serialize)] +pub struct TimelineEntry { + pub action_id: String, + pub action_name: String, + pub module_name: Option, + pub status: String, + pub attempt_number: i32, + pub dispatched_at: Option, + pub completed_at: Option, + pub duration_ms: Option, + pub request_preview: String, + pub response_preview: String, + pub error: Option, +} + +/// Action log entry with full details. +#[derive(Debug, Clone, Serialize)] +pub struct ActionLogEntry { + pub action_id: String, + pub action_name: String, + pub module_name: Option, + pub status: String, + pub attempt_number: i32, + pub dispatched_at: Option, + pub completed_at: Option, + pub duration_ms: Option, + pub request: String, + pub response: String, + pub error: Option, +} + +/// Response for the workflow run data API. +#[derive(Debug, Serialize)] +pub struct WorkflowRunDataResponse { + pub nodes: Vec, + pub timeline: Vec, + pub page: i64, + pub per_page: i64, + pub total: i64, + pub has_more: bool, +} + +/// Response for action logs API. +#[derive(Debug, Serialize)] +pub struct ActionLogsResponse { + pub logs: Vec, +} + +/// Filter values response. +#[derive(Debug, Serialize)] +pub struct FilterValuesResponse { + pub values: Vec, +} + +/// Health check response. +#[derive(Debug, Serialize)] +pub struct HealthResponse { + pub status: &'static str, + pub service: &'static str, +} + +/// Export format for a workflow instance. +#[derive(Debug, Serialize)] +pub struct WorkflowInstanceExport { + pub export_version: &'static str, + pub exported_at: String, + pub instance: InstanceExportInfo, + pub nodes: Vec, + pub timeline: Vec, +} + +/// Full worker status for webapp display. +#[derive(Debug, Clone)] +pub struct WorkerStatus { + pub pool_id: Uuid, + pub active_workers: i32, + pub throughput_per_min: f64, + pub actions_per_sec: f64, + pub total_completed: i64, + pub last_action_at: Option>, + pub updated_at: DateTime, + pub median_dequeue_ms: Option, + pub median_handling_ms: Option, + pub dispatch_queue_size: Option, + pub total_in_flight: Option, + pub median_instance_duration_secs: Option, + pub active_instance_count: i32, + pub total_instances_completed: i64, + pub instances_per_sec: f64, + pub instances_per_min: f64, + pub time_series: Option>, +} + +/// Worker action stats row for display. +#[derive(Debug, Clone)] +pub struct WorkerActionRow { + pub pool_id: String, + pub active_workers: i64, + pub actions_per_sec: String, + pub throughput_per_min: i64, + pub total_completed: i64, + pub median_dequeue_ms: Option, + pub median_handling_ms: Option, + pub last_action_at: Option, + pub updated_at: String, +} + +/// Aggregate worker stats for overview cards. +#[derive(Debug, Clone)] +pub struct WorkerAggregateStats { + pub active_worker_count: i64, + pub actions_per_sec: String, + pub total_in_flight: i64, + pub total_queue_depth: i64, +} + +/// Instance info for export. +#[derive(Debug, Serialize)] +pub struct InstanceExportInfo { + pub id: String, + pub status: String, + pub created_at: String, + pub input_payload: String, + pub result_payload: String, +} + +/// Schedule summary for listing. +#[derive(Debug, Clone, Serialize)] +pub struct ScheduleSummary { + pub id: String, + pub workflow_name: String, + pub schedule_name: String, + pub schedule_type: String, + pub cron_expression: Option, + pub interval_seconds: Option, + pub status: String, + pub next_run_at: Option, + pub last_run_at: Option, + pub created_at: String, +} + +/// Full schedule details. +#[derive(Debug, Clone, Serialize)] +pub struct ScheduleDetail { + pub id: String, + pub workflow_name: String, + pub schedule_name: String, + pub schedule_type: String, + pub cron_expression: Option, + pub interval_seconds: Option, + pub jitter_seconds: i64, + pub status: String, + pub next_run_at: Option, + pub last_run_at: Option, + pub last_instance_id: Option, + pub created_at: String, + pub updated_at: String, + pub priority: i32, + pub allow_duplicate: bool, + pub input_payload: Option, +} + +/// Invocation summary row for schedule detail pages. +#[derive(Debug, Clone, Serialize)] +pub struct ScheduleInvocationSummary { + pub id: Uuid, + pub created_at: DateTime, + pub status: InstanceStatus, +} diff --git a/crates/worker-status-backend/Cargo.toml b/crates/worker-status-backend/Cargo.toml new file mode 100644 index 00000000..ff50466a --- /dev/null +++ b/crates/worker-status-backend/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "waymark-worker-status-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +chrono = { workspace = true } +waymark-backends-core = { workspace = true } diff --git a/crates/worker-status-backend/src/lib.rs b/crates/worker-status-backend/src/lib.rs new file mode 100644 index 00000000..bc23eb4e --- /dev/null +++ b/crates/worker-status-backend/src/lib.rs @@ -0,0 +1,32 @@ +//! Worker status backend. + +use uuid::Uuid; + +pub use waymark_backends_core::{BackendError, BackendResult}; + +/// Worker status update for persistence. +#[derive(Clone, Debug)] +pub struct WorkerStatusUpdate { + pub pool_id: Uuid, + pub throughput_per_min: f64, + pub total_completed: i64, + pub last_action_at: Option>, + pub median_dequeue_ms: Option, + pub median_handling_ms: Option, + pub dispatch_queue_size: i64, + pub total_in_flight: i64, + pub active_workers: i32, + pub actions_per_sec: f64, + pub median_instance_duration_secs: Option, + pub active_instance_count: i32, + pub total_instances_completed: i64, + pub instances_per_sec: f64, + pub instances_per_min: f64, + pub time_series: Option>, +} + +/// Backend capability for recording worker status metrics. +#[async_trait::async_trait] +pub trait WorkerStatusBackend: Send + Sync { + async fn upsert_worker_status(&self, status: &WorkerStatusUpdate) -> BackendResult<()>; +} diff --git a/crates/workflow-registry-backend/Cargo.toml b/crates/workflow-registry-backend/Cargo.toml new file mode 100644 index 00000000..2dc85a4d --- /dev/null +++ b/crates/workflow-registry-backend/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "waymark-workflow-registry-backend" +version = "0.1.0" +edition = "2024" + +[dependencies] +async-trait = { workspace = true } +uuid = { workspace = true } +waymark-backends-core = { workspace = true } diff --git a/crates/workflow-registry-backend/src/lib.rs b/crates/workflow-registry-backend/src/lib.rs new file mode 100644 index 00000000..041c8482 --- /dev/null +++ b/crates/workflow-registry-backend/src/lib.rs @@ -0,0 +1,35 @@ +use uuid::Uuid; + +pub use waymark_backends_core::{BackendError, BackendResult}; + +/// Registration payload for storing workflow DAG metadata. +#[derive(Clone, Debug)] +pub struct WorkflowRegistration { + pub workflow_name: String, + pub workflow_version: String, + pub ir_hash: String, + pub program_proto: Vec, + pub concurrent: bool, +} + +#[derive(Clone, Debug)] +/// Stored workflow version metadata and IR payload. +pub struct WorkflowVersion { + pub id: Uuid, + pub workflow_name: String, + pub workflow_version: String, + pub ir_hash: String, + pub program_proto: Vec, + pub concurrent: bool, +} + +/// Backend capability for registering workflow DAGs. +#[async_trait::async_trait] +pub trait WorkflowRegistryBackend: Send + Sync { + async fn upsert_workflow_version( + &self, + registration: &WorkflowRegistration, + ) -> BackendResult; + + async fn get_workflow_versions(&self, ids: &[Uuid]) -> BackendResult>; +}