diff --git a/Cargo.lock b/Cargo.lock
index c5272da93..c8e223ff9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,7 @@ name = "agent-os-execution"
 version = "0.1.0"
 dependencies = [
  "agent-os-bridge",
+ "serde",
  "serde_json",
  "tempfile",
  "wat",
diff --git a/crates/execution/Cargo.toml b/crates/execution/Cargo.toml
index f314b1c51..9a3c6c120 100644
--- a/crates/execution/Cargo.toml
+++ b/crates/execution/Cargo.toml
@@ -7,6 +7,7 @@ description = "Native execution plane scaffold for Agent OS"
 
 [dependencies]
 agent-os-bridge = { path = "../bridge" }
+serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 
 [dev-dependencies]
diff --git a/crates/execution/src/benchmark.rs b/crates/execution/src/benchmark.rs
index b7d5abe67..f51f68a86 100644
--- a/crates/execution/src/benchmark.rs
+++ b/crates/execution/src/benchmark.rs
@@ -2,6 +2,7 @@ use crate::{
     CreateJavascriptContextRequest, JavascriptExecutionEngine, JavascriptExecutionError,
     StartJavascriptExecutionRequest,
 };
+use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
 use std::env;
 use std::fmt;
@@ -9,12 +10,18 @@ use std::fmt::Write as _;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::Command;
-use std::time::{Instant, SystemTime, UNIX_EPOCH};
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 
 const BENCHMARK_MARKER_PREFIX: &str = "__AGENT_OS_BENCH__:";
 const LOCAL_GRAPH_MODULE_COUNT: usize = 24;
-
-#[derive(Debug, Clone, PartialEq, Eq)]
+const BENCHMARK_ARTIFACT_VERSION: u32 = 5;
+const BENCHMARK_ARTIFACT_DIR: &str = "target/benchmark-reports/node-import-bench";
+const BENCHMARK_RUN_STATE_FILE: &str = "run-state.json";
+const TRANSPORT_RTT_CHANNEL: &str = "execution-stdio-echo";
+const TRANSPORT_RTT_PAYLOAD_BYTES: [usize; 3] = [32, 4 * 1024, 64 * 1024];
+const TRANSPORT_POLL_TIMEOUT: Duration = Duration::from_secs(5);
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct JavascriptBenchmarkConfig {
     pub iterations: usize,
     pub warmup_iterations: usize,
@@ -29,7 +36,7 @@ impl Default for JavascriptBenchmarkConfig {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
 pub struct BenchmarkHost {
     pub node_binary: String,
     pub node_version: String,
@@ -38,18 +45,63 @@ pub struct BenchmarkHost {
     pub logical_cpus: usize,
 }
 
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct BenchmarkScenarioPhases<T> {
+    pub context_setup_ms: T,
+    pub startup_ms: T,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub guest_execution_ms: Option<T>,
+    pub completion_ms: T,
+}
+
+#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
 pub struct BenchmarkStats {
     pub mean_ms: f64,
     pub p50_ms: f64,
     pub p95_ms: f64,
     pub min_ms: f64,
     pub max_ms: f64,
+    pub stddev_ms: f64,
+}
+
+#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
+pub struct BenchmarkDistributionStats {
+    pub mean: f64,
+    pub p50: f64,
+    pub p95: f64,
+    pub min: f64,
+    pub max: f64,
+    pub stddev: f64,
+}
+
+#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
+pub struct BenchmarkResourceUsage<T> {
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub rss_bytes: Option<T>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub heap_used_bytes: Option<T>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub cpu_user_us: Option<T>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub cpu_system_us: Option<T>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub cpu_total_us: Option<T>,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkTransportRttReport {
+    pub channel: &'static str,
+    pub payload_bytes: usize,
+    pub samples_ms: Vec<f64>,
+    pub stats: BenchmarkStats,
 }
 
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct BenchmarkScenarioReport {
     pub id: &'static str,
+    pub workload: &'static str,
+    pub runtime: &'static str,
+    pub mode: &'static str,
     pub description: &'static str,
     pub fixture: &'static str,
     pub compile_cache: &'static str,
@@ -59,19 +111,86 @@ pub struct BenchmarkScenarioReport {
     pub guest_import_stats: Option<BenchmarkStats>,
     pub startup_overhead_samples_ms: Option<Vec<f64>>,
     pub startup_overhead_stats: Option<BenchmarkStats>,
+    pub phase_samples_ms: BenchmarkScenarioPhases<Vec<f64>>,
+    pub phase_stats: BenchmarkScenarioPhases<BenchmarkStats>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub resource_usage_samples: Option<BenchmarkResourceUsage<Vec<f64>>>,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub resource_usage_stats: Option<BenchmarkResourceUsage<BenchmarkDistributionStats>>,
 }
 
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct JavascriptBenchmarkReport {
     pub generated_at_unix_ms: u128,
     pub config: JavascriptBenchmarkConfig,
     pub host: BenchmarkHost,
     pub repo_root: PathBuf,
+    pub transport_rtt: Vec<BenchmarkTransportRttReport>,
     pub scenarios: Vec<BenchmarkScenarioReport>,
 }
 
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkComparison {
+    pub baseline: BenchmarkComparisonBaseline,
+    pub summary: BenchmarkComparisonSummary,
+    pub scenario_deltas: Vec<BenchmarkScenarioDelta>,
+    pub scenarios_missing_from_baseline: Vec<String>,
+    pub baseline_only_scenarios: Vec<String>,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkComparisonBaseline {
+    pub artifact_version: u32,
+    pub generated_at_unix_ms: u128,
+    pub path: PathBuf,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkComparisonSummary {
+    pub compared_scenario_count: usize,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub largest_wall_improvement: Option<BenchmarkDeltaHighlight>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub largest_wall_regression: Option<BenchmarkDeltaHighlight>,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkDeltaHighlight {
+    pub id: String,
+    pub delta_ms: f64,
+    pub delta_pct: f64,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkScenarioDelta {
+    pub id: String,
+    pub description: String,
+    pub wall_mean_ms: BenchmarkMetricDelta,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub guest_import_mean_ms: Option<BenchmarkMetricDelta>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub startup_overhead_mean_ms: Option<BenchmarkMetricDelta>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub phase_mean_ms: Option<BenchmarkScenarioPhases<BenchmarkMetricDelta>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct BenchmarkMetricDelta {
+    pub baseline_ms: f64,
+    pub current_ms: f64,
+    pub delta_ms: f64,
+    pub delta_pct: f64,
+}
+
 impl JavascriptBenchmarkReport {
     pub fn render_markdown(&self) -> String {
+        self.render_markdown_with_comparison(None)
+    }
+
+    pub fn render_markdown_with_comparison(
+        &self,
+        comparison: Option<&BenchmarkComparison>,
+    ) -> String {
         let mut markdown = String::new();
         let _ = writeln!(&mut markdown, "# Agent OS Node Import Benchmark");
         let _ = writeln!(&mut markdown);
@@ -103,22 +222,60 @@ impl JavascriptBenchmarkReport {
             self.config.iterations, self.config.warmup_iterations
         );
         let _ = writeln!(&mut markdown);
+        let _ = writeln!(&mut markdown, "## Transport RTT");
+        let _ = writeln!(&mut markdown);
+        let _ = writeln!(
+            &mut markdown,
+            "| Channel | Payload (bytes) | Mean RTT (ms) | P50 | P95 |"
+        );
+        let _ = writeln!(&mut markdown, "| --- | ---: | ---: | ---: | ---: |");
+
+        for transport in &self.transport_rtt {
+            let _ = writeln!(
+                &mut markdown,
+                "| `{}` | {} | {} | {} | {} |",
+                transport.channel,
+                transport.payload_bytes,
+                format_ms(transport.stats.mean_ms),
+                format_ms(transport.stats.p50_ms),
+                format_ms(transport.stats.p95_ms),
+            );
+        }
+
+        let _ = writeln!(&mut markdown, "## Control Matrix");
+        let _ = writeln!(&mut markdown);
+
+        for row in self.control_matrix() {
+            let _ = writeln!(
+                &mut markdown,
+                "- Workload `{}`: runtimes {}, modes {}, scenarios {}",
+                row.workload,
+                format_label_list(&row.runtimes),
+                format_label_list(&row.modes),
+                format_label_list(&row.scenario_ids),
+            );
+        }
+
+        let _ = writeln!(&mut markdown);
+        let _ = writeln!(&mut markdown, "## Scenario Summary");
+        let _ = writeln!(&mut markdown);
         let _ = writeln!(
             &mut markdown,
-            "| Scenario | Fixture | Cache | Mean wall (ms) | P50 | P95 | Mean import (ms) | Mean startup overhead (ms) |"
+            "| Scenario | Workload | Runtime | Mode | Fixture | Cache | Mean wall (ms) | Mean context (ms) | Mean startup (ms) | Mean guest exec (ms) | Mean completion (ms) | Mean startup overhead (ms) |"
         );
         let _ = writeln!(
             &mut markdown,
-            "| --- | --- | --- | ---: | ---: | ---: | ---: | ---: |"
+            "| --- | --- | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |"
         );
 
         for scenario in &self.scenarios {
-            let import_mean = scenario
-                .guest_import_stats
+            let guest_execution_mean = scenario
+                .phase_stats
+                .guest_execution_ms
                 .as_ref()
                 .map(|stats| format_ms(stats.mean_ms))
                 .unwrap_or_else(|| String::from("n/a"));
-            let startup_mean = scenario
+            let startup_overhead_mean = scenario
                 .startup_overhead_stats
                 .as_ref()
                 .map(|stats| format_ms(stats.mean_ms))
@@ -126,19 +283,106 @@ impl JavascriptBenchmarkReport {
 
             let _ = writeln!(
                 &mut markdown,
-                "| `{}` | {} | {} | {} | {} | {} | {} | {} |",
+                "| `{}` | `{}` | `{}` | `{}` | {} | {} | {} | {} | {} | {} | {} | {} |",
                 scenario.id,
+                scenario.workload,
+                scenario.runtime,
+                scenario.mode,
                 scenario.fixture,
                 scenario.compile_cache,
                 format_ms(scenario.wall_stats.mean_ms),
+                format_ms(scenario.phase_stats.context_setup_ms.mean_ms),
+                format_ms(scenario.phase_stats.startup_ms.mean_ms),
+                guest_execution_mean,
+                format_ms(scenario.phase_stats.completion_ms.mean_ms),
+                startup_overhead_mean,
+            );
+        }
+
+        let _ = writeln!(&mut markdown);
+        let _ = writeln!(&mut markdown, "## Stability And Resource Summary");
+        let _ = writeln!(&mut markdown);
+        let _ = writeln!(
+            &mut markdown,
+            "| Scenario | Wall P50 (ms) | Wall min-max (ms) | Wall stddev (ms) | Mean RSS (MiB) | Mean heap (MiB) | Mean total CPU (ms) |"
+        );
+        let _ = writeln!(
+            &mut markdown,
+            "| --- | ---: | --- | ---: | ---: | ---: | ---: |"
+        );
+
+        for scenario in &self.scenarios {
+            let _ = writeln!(
+                &mut markdown,
+                "| `{}` | {} | {}-{} | {} | {} | {} | {} |",
+                scenario.id,
                 format_ms(scenario.wall_stats.p50_ms),
-                format_ms(scenario.wall_stats.p95_ms),
-                import_mean,
-                startup_mean,
+                format_ms(scenario.wall_stats.min_ms),
+                format_ms(scenario.wall_stats.max_ms),
+                format_ms(scenario.wall_stats.stddev_ms),
+                scenario
+                    .resource_usage_stats
+                    .as_ref()
+                    .and_then(|stats| stats.rss_bytes.as_ref())
+                    .map(|stats| format_mib(bytes_to_mib(stats.mean)))
+                    .unwrap_or_else(|| String::from("n/a")),
+                scenario
+                    .resource_usage_stats
+                    .as_ref()
+                    .and_then(|stats| stats.heap_used_bytes.as_ref())
+                    .map(|stats| format_mib(bytes_to_mib(stats.mean)))
+                    .unwrap_or_else(|| String::from("n/a")),
+                scenario
+                    .resource_usage_stats
+                    .as_ref()
+                    .and_then(|stats| stats.cpu_total_us.as_ref())
+                    .map(|stats| format_ms(micros_to_ms(stats.mean)))
+                    .unwrap_or_else(|| String::from("n/a")),
             );
         }
 
         let _ = writeln!(&mut markdown);
+        let _ = writeln!(&mut markdown, "## Ranked Hotspots");
+        let _ = writeln!(&mut markdown);
+
+        for ranking in self.hotspot_rankings() {
+            let _ = writeln!(
+                &mut markdown,
+                "### {} (`{}`, `{}`)",
+                ranking.label, ranking.dimension, ranking.unit
+            );
+            let _ = writeln!(&mut markdown);
+            let _ = writeln!(
+                &mut markdown,
+                "| Rank | Scenario | Workload | Runtime | Mode | Value |"
+            );
+            let _ = writeln!(&mut markdown, "| ---: | --- | --- | --- | --- | ---: |");
+
+            for scenario in &ranking.ranked_scenarios {
+                let _ = writeln!(
+                    &mut markdown,
+                    "| {} | `{}` | `{}` | `{}` | `{}` | {} |",
+                    scenario.rank,
+                    scenario.id,
+                    scenario.workload,
+                    scenario.runtime,
+                    scenario.mode,
+                    format_hotspot_value(ranking.unit, scenario.value),
+                );
+            }
+
+            if !ranking.scenarios_without_metric.is_empty() {
+                let _ = writeln!(&mut markdown);
+                let _ = writeln!(
+                    &mut markdown,
+                    "Missing metric for: {}",
+                    format_string_label_list(&ranking.scenarios_without_metric),
+                );
+            }
+
+            let _ = writeln!(&mut markdown);
+        }
+
         let _ = writeln!(&mut markdown, "## Hotspot Guidance");
         let _ = writeln!(&mut markdown);
 
@@ -146,12 +390,112 @@ impl JavascriptBenchmarkReport {
             let _ = writeln!(&mut markdown, "- {line}");
         }
 
+        if let Some(comparison) = comparison {
+            let _ = writeln!(&mut markdown);
+            let _ = writeln!(&mut markdown, "## Baseline Comparison");
+            let _ = writeln!(&mut markdown);
+            let _ = writeln!(
+                &mut markdown,
+                "- Baseline artifact: `{}`",
+                comparison.baseline.path.display()
+            );
+            let _ = writeln!(
+                &mut markdown,
+                "- Baseline generated at unix ms: `{}`",
+                comparison.baseline.generated_at_unix_ms
+            );
+            let _ = writeln!(
+                &mut markdown,
+                "- Compared scenarios: `{}`",
+                comparison.summary.compared_scenario_count
+            );
+            if let Some(improvement) = &comparison.summary.largest_wall_improvement {
+                let _ = writeln!(
+                    &mut markdown,
+                    "- Largest wall-time improvement: `{}` at {} ({})",
+                    improvement.id,
+                    format_delta_ms(improvement.delta_ms),
+                    format_delta_pct(improvement.delta_pct),
+                );
+            }
+            if let Some(regression) = &comparison.summary.largest_wall_regression {
+                let _ = writeln!(
+                    &mut markdown,
+                    "- Largest wall-time regression: `{}` at {} ({})",
+                    regression.id,
+                    format_delta_ms(regression.delta_ms),
+                    format_delta_pct(regression.delta_pct),
+                );
+            }
+            if !comparison.scenarios_missing_from_baseline.is_empty() {
+                let _ = writeln!(
+                    &mut markdown,
+                    "- Scenarios missing from baseline: {}",
+                    comparison.scenarios_missing_from_baseline.join(", ")
+                );
+            }
+            if !comparison.baseline_only_scenarios.is_empty() {
+                let _ = writeln!(
+                    &mut markdown,
+                    "- Baseline-only scenarios: {}",
+                    comparison.baseline_only_scenarios.join(", ")
+                );
+            }
+            let _ = writeln!(&mut markdown);
+            let _ = writeln!(
+                &mut markdown,
+                "| Scenario | Wall delta (ms) | Wall delta % | Import delta (ms) | Startup delta (ms) | Context delta (ms) | Completion delta (ms) |"
+            );
+            let _ = writeln!(
+                &mut markdown,
+                "| --- | ---: | ---: | ---: | ---: | ---: | ---: |"
+            );
+
+            for scenario in &comparison.scenario_deltas {
+                let import_delta = scenario
+                    .guest_import_mean_ms
+                    .as_ref()
+                    .map(|delta| format_delta_ms(delta.delta_ms))
+                    .unwrap_or_else(|| String::from("n/a"));
+                let startup_delta = scenario
+                    .startup_overhead_mean_ms
+                    .as_ref()
+                    .map(|delta| format_delta_ms(delta.delta_ms))
+                    .unwrap_or_else(|| String::from("n/a"));
+                let context_delta = scenario
+                    .phase_mean_ms
+                    .as_ref()
+                    .map(|delta| format_delta_ms(delta.context_setup_ms.delta_ms))
+                    .unwrap_or_else(|| String::from("n/a"));
+                let completion_delta = scenario
+                    .phase_mean_ms
+                    .as_ref()
+                    .map(|delta| format_delta_ms(delta.completion_ms.delta_ms))
+                    .unwrap_or_else(|| String::from("n/a"));
+
+                let _ = writeln!(
+                    &mut markdown,
+                    "| `{}` | {} | {} | {} | {} | {} | {} |",
+                    scenario.id,
+                    format_delta_ms(scenario.wall_mean_ms.delta_ms),
+                    format_delta_pct(scenario.wall_mean_ms.delta_pct),
+                    import_delta,
+                    startup_delta,
+                    context_delta,
+                    completion_delta,
+                );
+            }
+        }
+
         let _ = writeln!(&mut markdown);
         let _ = writeln!(&mut markdown, "## Raw Samples");
         let _ = writeln!(&mut markdown);
 
         for scenario in &self.scenarios {
             let _ = writeln!(&mut markdown, "### `{}`", scenario.id);
+            let _ = writeln!(&mut markdown, "- Workload: `{}`", scenario.workload);
+            let _ = writeln!(&mut markdown, "- Runtime: `{}`", scenario.runtime);
+            let _ = writeln!(&mut markdown, "- Mode: `{}`", scenario.mode);
             let _ = writeln!(&mut markdown, "- Description: {}", scenario.description);
             let _ = writeln!(
                 &mut markdown,
@@ -172,16 +516,113 @@ impl JavascriptBenchmarkReport {
                     format_sample_list(samples)
                 );
             }
+            let _ = writeln!(
+                &mut markdown,
+                "- Context setup samples (ms): {}",
+                format_sample_list(&scenario.phase_samples_ms.context_setup_ms)
+            );
+            let _ = writeln!(
+                &mut markdown,
+                "- Startup samples (ms): {}",
+                format_sample_list(&scenario.phase_samples_ms.startup_ms)
+            );
+            if let Some(samples) = &scenario.phase_samples_ms.guest_execution_ms {
+                let _ = writeln!(
+                    &mut markdown,
+                    "- Guest execution samples (ms): {}",
+                    format_sample_list(samples)
+                );
+            }
+            let _ = writeln!(
+                &mut markdown,
+                "- Completion samples (ms): {}",
+                format_sample_list(&scenario.phase_samples_ms.completion_ms)
+            );
+            if let Some(samples) = &scenario.resource_usage_samples {
+                if let Some(rss_samples) = &samples.rss_bytes {
+                    let _ = writeln!(
+                        &mut markdown,
+                        "- RSS samples (MiB): {}",
+                        format_scaled_sample_list(rss_samples, bytes_to_mib)
+                    );
+                }
+                if let Some(heap_samples) = &samples.heap_used_bytes {
+                    let _ = writeln!(
+                        &mut markdown,
+                        "- Heap samples (MiB): {}",
+                        format_scaled_sample_list(heap_samples, bytes_to_mib)
+                    );
+                }
+                if let Some(cpu_samples) = &samples.cpu_total_us {
+                    let _ = writeln!(
+                        &mut markdown,
+                        "- Total CPU samples (ms): {}",
+                        format_scaled_sample_list(cpu_samples, micros_to_ms)
+                    );
+                }
+            }
             let _ = writeln!(&mut markdown);
         }
 
         markdown
     }
 
+    pub fn render_json(&self) -> Result<String, serde_json::Error> {
+        self.render_json_with_comparison(None)
+    }
+
+    pub fn render_json_with_comparison(
+        &self,
+        comparison: Option<&BenchmarkComparison>,
+    ) -> Result<String, serde_json::Error> {
+        serde_json::to_string_pretty(&self.json_artifact(comparison))
+    }
+
+    pub fn write_artifacts(
+        &self,
+        output_dir: &Path,
+    ) -> Result<JavascriptBenchmarkArtifactPaths, JavascriptBenchmarkError> {
+        self.write_artifacts_with_comparison(output_dir, None)
+    }
+
+    pub fn write_artifacts_with_comparison(
+        &self,
+        output_dir: &Path,
+        comparison: Option<&BenchmarkComparison>,
+    ) -> Result<JavascriptBenchmarkArtifactPaths, JavascriptBenchmarkError> {
+        fs::create_dir_all(output_dir)?;
+
+        let markdown_path = output_dir.join("report.md");
+        let json_path = output_dir.join("report.json");
+        write_string_atomic(
+            &markdown_path,
+            &self.render_markdown_with_comparison(comparison),
+        )?;
+        write_string_atomic(&json_path, &self.render_json_with_comparison(comparison)?)?;
+
+        Ok(JavascriptBenchmarkArtifactPaths {
+            markdown_path,
+            json_path,
+        })
+    }
+
+    pub fn compare_to_baseline_path(
+        &self,
+        baseline_path: &Path,
+    ) -> Result<BenchmarkComparison, JavascriptBenchmarkError> {
+        let baseline = load_benchmark_artifact(baseline_path)?;
+        Ok(BenchmarkComparison::from_reports(
+            self,
+            baseline_path,
+            &baseline,
+        ))
+    }
+
     fn guidance_lines(&self) -> Vec<String> {
         let isolate = self.scenario("isolate-startup");
         let cold_local = self.scenario("cold-local-import");
         let warm_local = self.scenario("warm-local-import");
+        let prewarmed_local = self.scenario("prewarmed-local-import");
         let builtin = self.scenario("builtin-import");
         let large = self.scenario("large-package-import");
 
@@ -190,7 +631,10 @@ impl JavascriptBenchmarkReport {
         if let (
             Some(cold_import),
             Some(warm_import),
-            Some(warm_startup),
+            Some(warm_context),
+            Some(warm_startup_phase),
+            Some(warm_completion),
+            Some(warm_startup_overhead),
             Some(warm_wall),
             Some(isolate_wall),
         ) = (
@@ -200,6 +644,9 @@ impl JavascriptBenchmarkReport {
             warm_local
                 .and_then(|scenario| scenario.guest_import_stats.as_ref())
                 .map(|stats| stats.mean_ms),
+            warm_local.map(|scenario| scenario.phase_stats.context_setup_ms.mean_ms),
+            warm_local.map(|scenario| scenario.phase_stats.startup_ms.mean_ms),
+            warm_local.map(|scenario| scenario.phase_stats.completion_ms.mean_ms),
             warm_local
                 .and_then(|scenario| scenario.startup_overhead_stats.as_ref())
                 .map(|stats| stats.mean_ms),
@@ -211,15 +658,45 @@ impl JavascriptBenchmarkReport {
                 format_ms(cold_import),
                 format_ms(warm_import),
                 percentage_reduction(cold_import, warm_import),
-                format_ms(warm_startup),
+                format_ms(warm_startup_overhead),
                 format_ms(isolate_wall),
             ));
             if warm_wall > 0.0 {
                 guidance.push(format!(
                     "Warm local imports still spend {:.1}% of wall time in process startup, wrapper evaluation, and stdio handling instead of guest import work. Optimizations that only touch module compilation will not remove that floor.",
-                    percentage_share(warm_startup, warm_wall),
+                    percentage_share(warm_startup_overhead, warm_wall),
                 ));
             }
+            let warm_guest = warm_local
+                .and_then(|scenario| scenario.phase_stats.guest_execution_ms.as_ref())
+                .map(|stats| stats.mean_ms)
+                .unwrap_or(0.0);
+            guidance.push(format!(
+                "The warm path phase split is {} context setup, {} runtime startup, {} guest execution, and {} completion/stdio work. Future attribution can now separate bootstrap wins from pure transport/collection wins instead of treating them as one startup bucket.",
+                format_ms(warm_context),
+                format_ms(warm_startup_phase),
+                format_ms(warm_guest),
+                format_ms(warm_completion),
+            ));
+        }
+
+        if let (Some(warm_startup_overhead), Some(prewarmed_startup_overhead), Some(isolate_wall)) = (
+            warm_local
+                .and_then(|scenario| scenario.startup_overhead_stats.as_ref())
+                .map(|stats| stats.mean_ms),
+            prewarmed_local
+                .and_then(|scenario| scenario.startup_overhead_stats.as_ref())
+                .map(|stats| stats.mean_ms),
+            isolate.map(|scenario| scenario.wall_stats.mean_ms),
+        ) {
+            guidance.push(format!(
+                "Keeping the current import-cache materialization and builtin/polyfill prewarm alive inside one execution engine cuts warm local startup overhead from {} to {} ({:.1}% faster). The remaining {} of non-import work is the post-prewarm floor that broader warm-pool/snapshot work would still need to attack above the `{}` empty-isolate baseline.",
+                format_ms(warm_startup_overhead),
+                format_ms(prewarmed_startup_overhead),
+                percentage_reduction(warm_startup_overhead, prewarmed_startup_overhead),
+                format_ms(prewarmed_startup_overhead),
+                format_ms(isolate_wall),
+            ));
         }
 
         if let (Some(builtin_import), Some(large_import)) = (
@@ -238,6 +715,59 @@ impl JavascriptBenchmarkReport {
             ));
         }
 
+        if let (Some(smallest), Some(largest)) =
+            (self.transport_rtt.first(), self.transport_rtt.last())
+        {
+            guidance.push(format!(
+                "Execution-transport RTT over the stdio bridge rises from {} at {} bytes to {} at {} bytes. That gives later work a direct transport floor to compare against the larger startup and import phases.",
+                format_ms(smallest.stats.mean_ms),
+                smallest.payload_bytes,
+                format_ms(largest.stats.mean_ms),
+                largest.payload_bytes,
+            ));
+        }
+
+        if let Some(noisiest) = self.scenarios.iter().max_by(|lhs, rhs| {
+            lhs.wall_stats
+                .stddev_ms
+                .total_cmp(&rhs.wall_stats.stddev_ms)
+        }) {
+            guidance.push(format!(
+                "Wall-time noise is now surfaced directly in the same artifact set: `{}` currently shows the largest spread at {} stddev over a {}-{} wall range, so future deltas on that path should be judged against stability as well as mean time.",
+                noisiest.id,
+                format_ms(noisiest.wall_stats.stddev_ms),
+                format_ms(noisiest.wall_stats.min_ms),
+                format_ms(noisiest.wall_stats.max_ms),
+            ));
+        }
+
+        if let Some(heaviest) = self.scenarios.iter().max_by(|lhs, rhs| {
+            lhs.resource_usage_stats
+                .as_ref()
+                .and_then(|stats| stats.rss_bytes.as_ref())
+                .map(|stats| stats.mean)
+                .unwrap_or(f64::NEG_INFINITY)
+                .total_cmp(
+                    &rhs.resource_usage_stats
+                        .as_ref()
+                        .and_then(|stats| stats.rss_bytes.as_ref())
+                        .map(|stats| stats.mean)
+                        .unwrap_or(f64::NEG_INFINITY),
+                )
+        }) {
+            if let Some(rss_mean) = heaviest
+                .resource_usage_stats
+                .as_ref()
+                .and_then(|stats| stats.rss_bytes.as_ref())
+            {
+                guidance.push(format!(
+                    "Per-scenario resource reporting is now attached to the benchmark rows themselves: `{}` currently has the highest mean RSS at {} MiB, so import-path changes can now be judged for memory regressions without a separate memory-only pass.",
+                    heaviest.id,
+                    format_mib(bytes_to_mib(rss_mean.mean)),
+                ));
+            }
+        }
+
         guidance.push(String::from(
             "No new PRD stories were added from this run. The measured hotspots already map cleanly onto existing follow-ons: `ARC-021C` for safe resolution and metadata caches, `ARC-021D` for builtin/polyfill prewarm, and `ARC-022` for broader warm-pool and timing-mitigation execution work.",
         ));
@@ -248,84 +778,703 @@ impl JavascriptBenchmarkReport {
     fn scenario(&self, id: &str) -> Option<&BenchmarkScenarioReport> {
         self.scenarios.iter().find(|scenario| scenario.id == id)
     }
-}
-
-#[derive(Debug)]
-pub enum JavascriptBenchmarkError {
-    InvalidConfig(&'static str),
-    InvalidWorkspaceRoot(PathBuf),
-    Io(std::io::Error),
-    Utf8(std::string::FromUtf8Error),
-    Execution(JavascriptExecutionError),
-    NodeVersion(std::io::Error),
-    MissingBenchmarkMetric(&'static str),
-    InvalidBenchmarkMetric {
-        scenario: &'static str,
-        raw_value: String,
-    },
-    NonZeroExit {
-        scenario: &'static str,
-        exit_code: i32,
-        stderr: String,
-    },
-}
 
-impl fmt::Display for JavascriptBenchmarkError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::InvalidConfig(message) => write!(f, "invalid benchmark config: {message}"),
-            Self::InvalidWorkspaceRoot(path) => {
-                write!(
-                    f,
-                    "failed to resolve workspace root from execution crate path: {}",
-                    path.display()
-                )
-            }
-            Self::Io(err) => write!(f, "benchmark I/O failure: {err}"),
-            Self::Utf8(err) => write!(f, "benchmark output was not valid UTF-8: {err}"),
-            Self::Execution(err) => write!(f, "benchmark execution failed: {err}"),
-            Self::NodeVersion(err) => write!(f, "failed to query node version: {err}"),
-            Self::MissingBenchmarkMetric(scenario) => {
-                write!(
-                    f,
-                    "benchmark scenario `{scenario}` did not emit a metric marker"
-                )
-            }
-            Self::InvalidBenchmarkMetric {
-                scenario,
-                raw_value,
-            } => write!(
-                f,
-                "benchmark scenario `{scenario}` emitted an invalid metric: {raw_value}"
-            ),
-            Self::NonZeroExit {
-                scenario,
-                exit_code,
-                stderr,
-            } => write!(
-                f,
-                "benchmark scenario `{scenario}` exited with code {exit_code}: {stderr}"
+    fn json_artifact<'a>(
+        &'a self,
+        comparison: Option<&'a BenchmarkComparison>,
+    ) -> JavascriptBenchmarkArtifact<'a> {
+        JavascriptBenchmarkArtifact {
+            artifact_version: BENCHMARK_ARTIFACT_VERSION,
+            generated_at_unix_ms: self.generated_at_unix_ms,
+            command: format!(
+                "cargo run -p agent-os-execution --bin node-import-bench -- --iterations {} --warmup-iterations {}",
+                self.config.iterations, self.config.warmup_iterations
             ),
+            config: &self.config,
+            host: &self.host,
+            repo_root: &self.repo_root,
+            summary: self.summary(),
+            comparison,
+            transport_rtt: self
+                .transport_rtt
+                .iter()
+                .map(|transport| BenchmarkTransportRttArtifact {
+                    channel: transport.channel,
+                    payload_bytes: transport.payload_bytes,
+                    samples_ms: &transport.samples_ms,
+                    stats: &transport.stats,
+                })
+                .collect(),
+            scenarios: self
+                .scenarios
+                .iter()
+                .map(|scenario| BenchmarkScenarioArtifact {
+                    id: scenario.id,
+                    workload: scenario.workload,
+                    runtime: scenario.runtime,
+                    mode: scenario.mode,
+                    description: scenario.description,
+                    fixture: scenario.fixture,
+                    compile_cache: scenario.compile_cache,
+                    wall_samples_ms: &scenario.wall_samples_ms,
+                    wall_stats: &scenario.wall_stats,
+                    guest_import_samples_ms: scenario.guest_import_samples_ms.as_deref(),
+                    guest_import_stats: scenario.guest_import_stats.as_ref(),
+                    startup_overhead_samples_ms: scenario.startup_overhead_samples_ms.as_deref(),
+                    startup_overhead_stats: scenario.startup_overhead_stats.as_ref(),
+                    mean_startup_share_pct: scenario.mean_startup_share_pct(),
+                    phase_samples_ms: &scenario.phase_samples_ms,
+                    phase_stats: &scenario.phase_stats,
+                    resource_usage_samples: scenario.resource_usage_samples.as_ref(),
+                    resource_usage_stats: scenario.resource_usage_stats.as_ref(),
+                })
+                .collect(),
         }
     }
-}
-
-impl std::error::Error for JavascriptBenchmarkError {}
 
-impl From<std::io::Error> for JavascriptBenchmarkError {
-    fn from(err: std::io::Error) -> Self {
-        Self::Io(err)
+    fn summary(&self) -> BenchmarkSummaryArtifact<'_> {
+        BenchmarkSummaryArtifact {
+            scenario_count: self.scenarios.len(),
+            recorded_samples_per_scenario: self.config.iterations,
+            warmup_iterations: self.config.warmup_iterations,
+            control_matrix: self.control_matrix(),
+            slowest_wall_scenario: self.slowest_scenario_by(|scenario| scenario.wall_stats.mean_ms),
+            slowest_guest_import_scenario: self.slowest_scenario_by(|scenario| {
+                scenario
+                    .guest_import_stats
+                    .as_ref()
+                    .map(|stats| stats.mean_ms)
+                    .unwrap_or(f64::NEG_INFINITY)
+            }),
+            highest_startup_share_scenario: self.scenarios.iter().max_by(|lhs, rhs| {
+                lhs.mean_startup_share_pct()
+                    .unwrap_or(f64::NEG_INFINITY)
+                    .total_cmp(&rhs.mean_startup_share_pct().unwrap_or(f64::NEG_INFINITY))
+            }),
+            hotspot_rankings: self.hotspot_rankings(),
+            guidance_lines: self.guidance_lines(),
+        }
     }
-}
 
-impl From<std::string::FromUtf8Error> for JavascriptBenchmarkError {
-    fn from(err: std::string::FromUtf8Error) -> Self {
-        Self::Utf8(err)
-    }
-}
+    fn control_matrix(&self) -> Vec<BenchmarkControlMatrixArtifact<'_>> {
+        let mut rows = Vec::new();
+        let mut row_indexes = BTreeMap::new();
 
-impl From<JavascriptExecutionError> for JavascriptBenchmarkError {
-    fn from(err: JavascriptExecutionError) -> Self {
+        for scenario in &self.scenarios {
+            let row_index = *row_indexes.entry(scenario.workload).or_insert_with(|| {
+                rows.push(BenchmarkControlMatrixArtifact {
+                    workload: scenario.workload,
+                    runtimes: Vec::new(),
+                    modes: Vec::new(),
+                    scenario_ids: Vec::new(),
+                });
+                rows.len() - 1
+            });
+            let row = &mut rows[row_index];
+            push_unique_label(&mut row.runtimes, scenario.runtime);
+            push_unique_label(&mut row.modes, scenario.mode);
+            row.scenario_ids.push(scenario.id);
+        }
+
+        rows
+    }
+
+    fn slowest_scenario_by(
+        &self,
+        value: impl Fn(&BenchmarkScenarioReport) -> f64,
+    ) -> Option<&BenchmarkScenarioReport> {
+        self.scenarios
+            .iter()
+            .max_by(|lhs, rhs| value(lhs).total_cmp(&value(rhs)))
+    }
+
+    fn hotspot_rankings(&self) -> Vec<BenchmarkHotspotRankingArtifact<'_>> {
+        HOTSPOT_METRICS
+            .iter()
+            .map(|metric| {
+                let mut ranked_scenarios = self
+                    .scenarios
+                    .iter()
+                    .filter_map(|scenario| {
+                        (metric.value)(scenario).map(|value| BenchmarkHotspotScenarioArtifact {
+                            rank: 0,
+                            id: scenario.id,
+                            workload: scenario.workload,
+                            runtime: scenario.runtime,
+                            mode: scenario.mode,
+                            value,
+                        })
+                    })
+                    .collect::<Vec<_>>();
+                ranked_scenarios.sort_by(|lhs, rhs| {
+                    rhs.value
+                        .total_cmp(&lhs.value)
+                        .then_with(|| lhs.id.cmp(rhs.id))
+                });
+                for (index, scenario) in ranked_scenarios.iter_mut().enumerate() {
+                    scenario.rank = index + 1;
+                }
+
+                BenchmarkHotspotRankingArtifact {
+                    metric: metric.metric,
+                    label: metric.label,
+                    dimension: metric.dimension,
+                    unit: metric.unit,
+                    ranked_scenarios,
+                    scenarios_without_metric: self
+                        .scenarios
+                        .iter()
+                        .filter(|scenario| (metric.value)(scenario).is_none())
+                        .map(|scenario| scenario.id)
+                        .collect(),
+                }
+            })
+            .collect()
+    }
+}
+
+impl BenchmarkScenarioReport {
+    fn mean_startup_share_pct(&self) -> Option<f64> {
+        let startup_mean = self.startup_overhead_stats.as_ref()?.mean_ms;
+        let wall_mean = self.wall_stats.mean_ms;
+        if wall_mean <= 0.0 {
+            Some(0.0)
+        } else {
+            Some((startup_mean / wall_mean) * 100.0)
+        }
+    }
+
+    fn wall_range_ms(&self) -> f64 {
+        self.wall_stats.max_ms - self.wall_stats.min_ms
+    }
+}
+
+impl BenchmarkResourceUsage<Vec<f64>> {
+    fn push_sample(&mut self, sample: &BenchmarkResourceUsage<f64>) {
+        push_optional_sample(&mut self.rss_bytes, sample.rss_bytes);
+        push_optional_sample(&mut self.heap_used_bytes, sample.heap_used_bytes);
+        push_optional_sample(&mut self.cpu_user_us, sample.cpu_user_us);
+        push_optional_sample(&mut self.cpu_system_us, sample.cpu_system_us);
+        push_optional_sample(&mut self.cpu_total_us, sample.cpu_total_us);
+    }
+
+    fn into_populated(self) -> Option<Self> {
+        (!self.is_empty()).then_some(self)
+    }
+}
+
+impl<T> BenchmarkResourceUsage<T> {
+    fn is_empty(&self) -> bool {
+        self.rss_bytes.is_none()
+            && self.heap_used_bytes.is_none()
+            && self.cpu_user_us.is_none()
+            && self.cpu_system_us.is_none()
+            && self.cpu_total_us.is_none()
+    }
+}
+
+impl BenchmarkComparison {
+    fn from_reports(
+        current: &JavascriptBenchmarkReport,
+        baseline_path: &Path,
+        baseline: &StoredBenchmarkArtifact,
+    ) -> Self {
+        let baseline_path =
+            fs::canonicalize(baseline_path).unwrap_or_else(|_| baseline_path.to_path_buf());
+        let baseline_by_id = baseline
+            .scenarios
+            .iter()
+            .map(|scenario| (scenario.id.as_str(), scenario))
+            .collect::<BTreeMap<_, _>>();
+
+        let mut scenario_deltas = Vec::new();
+        let mut scenarios_missing_from_baseline = Vec::new();
+
+        for scenario in &current.scenarios {
+            if let Some(baseline_scenario) = baseline_by_id.get(scenario.id) {
+                scenario_deltas.push(BenchmarkScenarioDelta {
+                    id: scenario.id.to_owned(),
+                    description: scenario.description.to_owned(),
+                    wall_mean_ms: BenchmarkMetricDelta::from_means(
+                        baseline_scenario.wall_stats.mean_ms,
+                        scenario.wall_stats.mean_ms,
+                    ),
+                    guest_import_mean_ms: match (
+                        baseline_scenario.guest_import_stats.as_ref(),
+                        scenario.guest_import_stats.as_ref(),
+                    ) {
+                        (Some(baseline_stats), Some(current_stats)) => {
+                            Some(BenchmarkMetricDelta::from_means(
+                                baseline_stats.mean_ms,
+                                current_stats.mean_ms,
+                            ))
+                        }
+                        _ => None,
+                    },
+                    startup_overhead_mean_ms: match (
+                        baseline_scenario.startup_overhead_stats.as_ref(),
+                        scenario.startup_overhead_stats.as_ref(),
+                    ) {
+                        (Some(baseline_stats), Some(current_stats)) => {
+                            Some(BenchmarkMetricDelta::from_means(
+                                baseline_stats.mean_ms,
+                                current_stats.mean_ms,
+                            ))
+                        }
+                        _ => None,
+                    },
+                    phase_mean_ms: match (
+                        baseline_scenario.phase_stats.as_ref(),
+                        Some(&scenario.phase_stats),
+                    ) {
+                        (Some(baseline_phase), Some(current_phase)) => {
+                            Some(BenchmarkScenarioPhases {
+                                context_setup_ms: BenchmarkMetricDelta::from_means(
+                                    baseline_phase.context_setup_ms.mean_ms,
+                                    current_phase.context_setup_ms.mean_ms,
+                                ),
+                                startup_ms: BenchmarkMetricDelta::from_means(
+                                    baseline_phase.startup_ms.mean_ms,
+                                    current_phase.startup_ms.mean_ms,
+                                ),
+                                guest_execution_ms: match (
+                                    baseline_phase.guest_execution_ms.as_ref(),
+                                    current_phase.guest_execution_ms.as_ref(),
+                                ) {
+                                    (Some(baseline_stats), Some(current_stats)) => {
+                                        Some(BenchmarkMetricDelta::from_means(
+                                            baseline_stats.mean_ms,
+                                            current_stats.mean_ms,
+                                        ))
+                                    }
+                                    _ => None,
+                                },
+                                completion_ms: BenchmarkMetricDelta::from_means(
+                                    baseline_phase.completion_ms.mean_ms,
+                                    current_phase.completion_ms.mean_ms,
+                                ),
+                            })
+                        }
+                        _ => None,
+                    },
+                });
+            } else {
+                scenarios_missing_from_baseline.push(scenario.id.to_owned());
+            }
+        }
+
+        let current_ids = current
+            .scenarios
+            .iter()
+            .map(|scenario| (scenario.id, ()))
+            .collect::<BTreeMap<_, _>>();
+        let baseline_only_scenarios = baseline
+            .scenarios
+            .iter()
+            .filter_map(|scenario| {
+                (!current_ids.contains_key(scenario.id.as_str())).then(|| scenario.id.clone())
+            })
+            .collect::<Vec<_>>();
+
+        let largest_wall_improvement = scenario_deltas
+            .iter()
+            .filter(|scenario| scenario.wall_mean_ms.delta_ms < 0.0)
+            .min_by(|lhs, rhs| {
+                lhs.wall_mean_ms
+                    .delta_ms
+                    .total_cmp(&rhs.wall_mean_ms.delta_ms)
+            })
+            .map(BenchmarkDeltaHighlight::from_wall_delta);
+        let largest_wall_regression = scenario_deltas
+            .iter()
+            .filter(|scenario| scenario.wall_mean_ms.delta_ms > 0.0)
+            .max_by(|lhs, rhs| {
+                lhs.wall_mean_ms
+                    .delta_ms
+                    .total_cmp(&rhs.wall_mean_ms.delta_ms)
+            })
+            .map(BenchmarkDeltaHighlight::from_wall_delta);
+
+        Self {
+            baseline: BenchmarkComparisonBaseline {
+                artifact_version: baseline.artifact_version,
+                generated_at_unix_ms: baseline.generated_at_unix_ms,
+                path: baseline_path,
+            },
+            summary: BenchmarkComparisonSummary {
+                compared_scenario_count: scenario_deltas.len(),
+                largest_wall_improvement,
+                largest_wall_regression,
+            },
+            scenario_deltas,
+            scenarios_missing_from_baseline,
+            baseline_only_scenarios,
+        }
+    }
+}
+
+impl BenchmarkDeltaHighlight {
+    fn from_wall_delta(delta: &BenchmarkScenarioDelta) -> Self {
+        Self {
+            id: delta.id.clone(),
+            delta_ms: delta.wall_mean_ms.delta_ms,
+            delta_pct: delta.wall_mean_ms.delta_pct,
+        }
+    }
+}
+
+impl BenchmarkMetricDelta {
+    fn from_means(baseline_ms: f64, current_ms: f64) -> Self {
+        let delta_ms = current_ms - baseline_ms;
+        let delta_pct = if baseline_ms <= 0.0 {
+            0.0
+        } else {
+            (delta_ms / baseline_ms) * 100.0
+        };
+
+        Self {
+            baseline_ms,
+            current_ms,
+            delta_ms,
+            delta_pct,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct JavascriptBenchmarkArtifactPaths {
+    pub markdown_path: PathBuf,
+    pub json_path: PathBuf,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct JavascriptBenchmarkRunOutput {
+    pub artifact_paths: JavascriptBenchmarkArtifactPaths,
+    pub resumed_stage_count: usize,
+}
+
+#[derive(Debug, Serialize)]
+struct JavascriptBenchmarkArtifact<'a> {
+    artifact_version: u32,
+    generated_at_unix_ms: u128,
+    command: String,
+    config: &'a JavascriptBenchmarkConfig,
+    host: &'a BenchmarkHost,
+    repo_root: &'a Path,
+    summary: BenchmarkSummaryArtifact<'a>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    comparison: Option<&'a BenchmarkComparison>,
+    transport_rtt: Vec<BenchmarkTransportRttArtifact<'a>>,
+    scenarios: Vec<BenchmarkScenarioArtifact<'a>>,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkSummaryArtifact<'a> {
+    scenario_count: usize,
+    recorded_samples_per_scenario: usize,
+    warmup_iterations: usize,
+    control_matrix: Vec<BenchmarkControlMatrixArtifact<'a>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    slowest_wall_scenario: Option<&'a BenchmarkScenarioReport>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    slowest_guest_import_scenario: Option<&'a BenchmarkScenarioReport>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    highest_startup_share_scenario: Option<&'a BenchmarkScenarioReport>,
+    hotspot_rankings: Vec<BenchmarkHotspotRankingArtifact<'a>>,
+    guidance_lines: Vec<String>,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkScenarioArtifact<'a> {
+    id: &'static str,
+    workload: &'static str,
+    runtime: &'static str,
+    mode: &'static str,
+    description: &'static str,
+    fixture: &'static str,
+    compile_cache: &'static str,
+    wall_samples_ms: &'a [f64],
+    wall_stats: &'a BenchmarkStats,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    guest_import_samples_ms: Option<&'a [f64]>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    guest_import_stats: Option<&'a BenchmarkStats>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    startup_overhead_samples_ms: Option<&'a [f64]>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    startup_overhead_stats: Option<&'a BenchmarkStats>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    mean_startup_share_pct: Option<f64>,
+    phase_samples_ms: &'a BenchmarkScenarioPhases<Vec<f64>>,
+    phase_stats: &'a BenchmarkScenarioPhases<BenchmarkStats>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    resource_usage_samples: Option<&'a BenchmarkResourceUsage<Vec<f64>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    resource_usage_stats: Option<&'a BenchmarkResourceUsage<BenchmarkDistributionStats>>,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkControlMatrixArtifact<'a> {
+    workload: &'a str,
+    runtimes: Vec<&'a str>,
+    modes: Vec<&'a str>,
+    scenario_ids: Vec<&'a str>,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkTransportRttArtifact<'a> {
+    channel: &'static str,
+    payload_bytes: usize,
+    samples_ms: &'a [f64],
+    stats: &'a BenchmarkStats,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkHotspotRankingArtifact<'a> {
+    metric: &'static str,
+    label: &'static str,
+    dimension: &'static str,
+    unit: &'static str,
+    ranked_scenarios: Vec<BenchmarkHotspotScenarioArtifact<'a>>,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    scenarios_without_metric: Vec<&'a str>,
+}
+
+#[derive(Debug, Serialize)]
+struct BenchmarkHotspotScenarioArtifact<'a> {
+    rank: usize,
+    id: &'a str,
+    workload: &'a str,
+    runtime: &'a str,
+    mode: &'a str,
+    value: f64,
+}
+
+struct HotspotMetricDefinition {
+    metric: &'static str,
+    label: &'static str,
+    dimension: &'static str,
+    unit: &'static str,
+    value: fn(&BenchmarkScenarioReport) -> Option<f64>,
+}
+
+const HOTSPOT_METRICS: [HotspotMetricDefinition; 13] = [
+    HotspotMetricDefinition {
+        metric: "wall_mean_ms",
+        label: "Wall Time",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_wall_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "wall_stddev_ms",
+        label: "Wall Time Stddev",
+        dimension: "stability",
+        unit: "ms",
+        value: hotspot_wall_stddev_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "wall_range_ms",
+        label: "Wall Time Range",
+        dimension: "stability",
+        unit: "ms",
+        value: hotspot_wall_range_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "guest_import_mean_ms",
+        label: "Guest Import Time",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_guest_import_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "startup_overhead_mean_ms",
+        label: "Startup Overhead",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_startup_overhead_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "context_setup_mean_ms",
+        label: "Context Setup Phase",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_context_setup_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "startup_phase_mean_ms",
+        label: "Runtime Startup Phase",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_startup_phase_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "guest_execution_mean_ms",
+        label: "Guest Execution Phase",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_guest_execution_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "completion_mean_ms",
+        label: "Completion/Stdio Phase",
+        dimension: "time",
+        unit: "ms",
+        value: hotspot_completion_mean_ms,
+    },
+    HotspotMetricDefinition {
+        metric: "startup_share_pct",
+        label: "Startup Share Of Wall",
+        dimension: "share",
+        unit: "pct",
+        value: hotspot_startup_share_pct,
+    },
+    HotspotMetricDefinition {
+        metric: "rss_mean_mib",
+        label: "RSS",
+        dimension: "memory",
+        unit: "MiB",
+        value: hotspot_rss_mean_mib,
+    },
+    HotspotMetricDefinition {
+        metric: "heap_mean_mib",
+        label: "Heap Used",
+        dimension: "memory",
+        unit: "MiB",
+        value: hotspot_heap_mean_mib,
+    },
+    HotspotMetricDefinition {
+        metric: "cpu_total_mean_ms",
+        label: "Total CPU",
+        dimension: "cpu",
+        unit: "ms",
+        value: hotspot_total_cpu_mean_ms,
+    },
+];
+
+#[derive(Debug)]
+pub enum JavascriptBenchmarkError {
+    InvalidConfig(&'static str),
+    InvalidWorkspaceRoot(PathBuf),
+    InvalidBaselineReport {
+        path: PathBuf,
+        message: String,
+    },
+    Io(std::io::Error),
+    Utf8(std::string::FromUtf8Error),
+    Execution(JavascriptExecutionError),
+    NodeVersion(std::io::Error),
+    MissingBenchmarkMetric(&'static str),
+    InvalidBenchmarkMetric {
+        scenario: &'static str,
+        raw_value: String,
+    },
+    TransportProbeTimeout {
+        payload_bytes: usize,
+    },
+    TransportProbeExited {
+        exit_code: i32,
+        stderr: String,
+    },
+    InvalidTransportProbeResponse {
+        payload_bytes: usize,
+        expected: String,
+        actual: String,
+    },
+    NonZeroExit {
+        scenario: &'static str,
+        exit_code: i32,
+        stderr: String,
+    },
+}
+
+impl fmt::Display for JavascriptBenchmarkError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::InvalidConfig(message) => write!(f, "invalid benchmark config: {message}"),
+            Self::InvalidWorkspaceRoot(path) => {
+                write!(
+                    f,
+                    "failed to resolve workspace root from execution crate path: {}",
+                    path.display()
+                )
+            }
+            Self::InvalidBaselineReport { path, message } => {
+                write!(
+                    f,
+                    "failed to parse benchmark baseline artifact {}: {message}",
+                    path.display()
+                )
+            }
+            Self::Io(err) => write!(f, "benchmark I/O failure: {err}"),
+            Self::Utf8(err) => write!(f, "benchmark output was not valid UTF-8: {err}"),
+            Self::Execution(err) => write!(f, "benchmark execution failed: {err}"),
+            Self::NodeVersion(err) => write!(f, "failed to query node version: {err}"),
+            Self::MissingBenchmarkMetric(scenario) => {
+                write!(
+                    f,
+                    "benchmark scenario `{scenario}` did not emit a metric marker"
+                )
+            }
+            Self::InvalidBenchmarkMetric {
+                scenario,
+                raw_value,
+            } => write!(
+                f,
+                "benchmark scenario `{scenario}` emitted an invalid metric: {raw_value}"
+            ),
+            Self::TransportProbeTimeout { payload_bytes } => {
+                write!(
+                    f,
+                    "transport probe timed out waiting for {payload_bytes}-byte round-trip"
+                )
+            }
+            Self::TransportProbeExited { exit_code, stderr } => write!(
+                f,
+                "transport probe exited with code {exit_code}: {stderr}"
+            ),
+            Self::InvalidTransportProbeResponse {
+                payload_bytes,
+                expected,
+                actual,
+            } => write!(
+                f,
+                "transport probe returned unexpected payload for {payload_bytes}-byte round-trip: expected {expected:?}, got {actual:?}"
+            ),
+            Self::NonZeroExit {
+                scenario,
+                exit_code,
+                stderr,
+            } => write!(
+                f,
+                "benchmark scenario `{scenario}` exited with code {exit_code}: {stderr}"
+            ),
+        }
+    }
+}
+
+impl std::error::Error for JavascriptBenchmarkError {}
+
+impl From<std::io::Error> for JavascriptBenchmarkError {
+    fn from(err: std::io::Error) -> Self {
+        Self::Io(err)
+    }
+}
+
+impl From<std::string::FromUtf8Error> for JavascriptBenchmarkError {
+    fn from(err: std::string::FromUtf8Error) -> Self {
+        Self::Utf8(err)
+    }
+}
+
+impl From<serde_json::Error> for JavascriptBenchmarkError {
+    fn from(err: serde_json::Error) -> Self {
+        Self::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
+    }
+}
+
+impl From<JavascriptExecutionError> for JavascriptBenchmarkError {
+    fn from(err: JavascriptExecutionError) -> Self {
         Self::Execution(err)
     }
 }
@@ -342,6 +1491,7 @@ pub fn run_javascript_benchmarks(
     let repo_root = workspace_root()?;
     let host = benchmark_host()?;
     let workspace = BenchmarkWorkspace::create(&repo_root)?;
+    let transport_rtt = measure_transport_rtt(&workspace, config)?;
 
     let mut scenarios = Vec::new();
 
@@ -357,18 +1507,100 @@ pub fn run_javascript_benchmarks(
         config: config.clone(),
         host,
         repo_root,
+        transport_rtt,
         scenarios,
     })
 }
 
-#[derive(Debug)]
+fn benchmark_artifact_dir(repo_root: &Path) -> PathBuf {
+    repo_root.join(BENCHMARK_ARTIFACT_DIR)
+}
+
+fn benchmark_run_state_path(artifact_dir: &Path) -> PathBuf {
+    artifact_dir.join(BENCHMARK_RUN_STATE_FILE)
+}
+
+fn load_benchmark_run_state(
+    state_path: &Path,
+    config: &JavascriptBenchmarkConfig,
+    host: &BenchmarkHost,
+    repo_root: &Path,
+    definitions: &[ScenarioDefinition],
+) -> Result<StoredBenchmarkRunState, JavascriptBenchmarkError> {
+    match fs::read_to_string(state_path) {
+        Ok(raw) => match serde_json::from_str::<StoredBenchmarkRunState>(&raw) {
+            Ok(state) if state.is_compatible(config, host, repo_root) => {
+                Ok(state.sanitized(definitions))
+            }
+            Ok(_) | Err(_) => Ok(StoredBenchmarkRunState::new(config, host, repo_root)),
+        },
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+            Ok(StoredBenchmarkRunState::new(config, host, repo_root))
+        }
+        Err(err) => Err(JavascriptBenchmarkError::Io(err)),
+    }
+}
+
+fn persist_benchmark_run_state(
+    state_path: &Path,
+    state: &StoredBenchmarkRunState,
+) -> Result<(), JavascriptBenchmarkError> {
+    write_string_atomic(state_path, &serde_json::to_string_pretty(state)?)
+}
+
+fn write_string_atomic(path: &Path, contents: &str) -> Result<(), JavascriptBenchmarkError> {
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+
+    let temp_path = path.with_file_name(format!(
+        ".{}.tmp-{}-{}",
+        path.file_name()
+            .and_then(|name| name.to_str())
+            .unwrap_or("artifact"),
+        std::process::id(),
+        SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_nanos()
+    ));
+    fs::write(&temp_path, contents)?;
+    if let Err(err) = fs::rename(&temp_path, path) {
+        let _ = fs::remove_file(&temp_path);
+        return Err(JavascriptBenchmarkError::Io(err));
+    }
+
+    Ok(())
+}
+
+fn remove_file_if_exists(path: &Path) -> Result<(), JavascriptBenchmarkError> {
+    match fs::remove_file(path) {
+        Ok(()) => Ok(()),
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
+        Err(err) => Err(JavascriptBenchmarkError::Io(err)),
+    }
+}
+
+fn current_unix_ms() -> u128 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_millis()
+}
+
+#[derive(Debug, Clone, Copy)]
 struct ScenarioDefinition {
     id: &'static str,
+    workload: &'static str,
+    runtime: ScenarioRuntime,
+    mode: ScenarioMode,
     description: &'static str,
     fixture: &'static str,
     entrypoint: &'static str,
     compile_cache: CompileCacheStrategy,
+    engine_reuse: EngineReuseStrategy,
     expect_import_metric: bool,
+    env: ScenarioEnvironment,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -386,15 +1618,139 @@ impl CompileCacheStrategy {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum EngineReuseStrategy {
+    FreshPerSample,
+    SharedAcrossScenario,
+    SharedContextAcrossScenario,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ScenarioEnvironment {
+    None,
+    ProjectedWorkspaceNodeModules,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ScenarioRuntime {
+    NativeExecution,
+    HostNode,
+}
+
+impl ScenarioRuntime {
+    fn label(self) -> &'static str {
+        match self {
+            Self::NativeExecution => "native-execution",
+            Self::HostNode => "host-node",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ScenarioMode {
+    BaselineControl,
+    TrueColdStart,
+    NewSessionReplay,
+    SameSessionReplay,
+    SameEngineReplay,
+    HostControl,
+}
+
+impl ScenarioMode {
+    fn label(self) -> &'static str {
+        match self {
+            Self::BaselineControl => "baseline-control",
+            Self::TrueColdStart => "true-cold-start",
+            Self::NewSessionReplay => "new-session-replay",
+            Self::SameSessionReplay => "same-session-replay",
+            Self::SameEngineReplay => "same-engine-replay",
+            Self::HostControl => "host-control",
+        }
+    }
+}
+
 #[derive(Debug)]
 struct SampleMeasurement {
     wall_ms: f64,
     guest_import_ms: Option<f64>,
+    context_setup_ms: f64,
+    startup_ms: f64,
+    completion_ms: f64,
+    resource_usage: Option<BenchmarkResourceUsage<f64>>,
 }
 
 #[derive(Debug)]
 struct BenchmarkWorkspace {
     root: PathBuf,
+    repo_root: PathBuf,
+}
+
+#[derive(Debug, Deserialize)]
+struct StoredBenchmarkArtifact {
+    artifact_version: u32,
+    generated_at_unix_ms: u128,
+    scenarios: Vec<StoredBenchmarkScenario>,
+}
+
+#[derive(Debug, Deserialize)]
+struct StoredBenchmarkScenario {
+    id: String,
+    wall_stats: BenchmarkStats,
+    #[serde(default)]
+    guest_import_stats: Option<BenchmarkStats>,
+    #[serde(default)]
+    startup_overhead_stats: Option<BenchmarkStats>,
+    #[serde(default)]
+    phase_stats: Option<BenchmarkScenarioPhases<BenchmarkStats>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+struct StoredBenchmarkRunHost {
+    node_binary: String,
+    node_version: String,
+    os: String,
+    arch: String,
+    logical_cpus: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+struct StoredBenchmarkRunState {
+    artifact_version: u32,
+    config: JavascriptBenchmarkConfig,
+    host: StoredBenchmarkRunHost,
+    repo_root: PathBuf,
+    #[serde(default)]
+    transport_rtt: Option<Vec<StoredBenchmarkTransportRttReport>>,
+    #[serde(default)]
+    scenarios: Vec<StoredBenchmarkScenarioReport>,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+struct StoredBenchmarkTransportRttReport {
+    payload_bytes: usize,
+    samples_ms: Vec<f64>,
+    stats: BenchmarkStats,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+struct StoredBenchmarkScenarioReport {
+    id: String,
+    wall_samples_ms: Vec<f64>,
+    wall_stats: BenchmarkStats,
+    #[serde(default)]
+    guest_import_samples_ms: Option<Vec<f64>>,
+    #[serde(default)]
+    guest_import_stats: Option<BenchmarkStats>,
+    #[serde(default)]
+    startup_overhead_samples_ms: Option<Vec<f64>>,
+    #[serde(default)]
+    startup_overhead_stats: Option<BenchmarkStats>,
+    phase_samples_ms: BenchmarkScenarioPhases<Vec<f64>>,
+    phase_stats: BenchmarkScenarioPhases<BenchmarkStats>,
+    #[serde(default)]
+    resource_usage_samples: Option<BenchmarkResourceUsage<Vec<f64>>>,
+    #[serde(default)]
+    resource_usage_stats: Option<BenchmarkResourceUsage<BenchmarkDistributionStats>>,
 }
 
 impl BenchmarkWorkspace {
@@ -409,7 +1765,10 @@ impl BenchmarkWorkspace {
         ));
         fs::create_dir_all(&root)?;
         write_benchmark_workspace(&root)?;
-        Ok(Self { root })
+        Ok(Self {
+            root,
+            repo_root: repo_root.to_path_buf(),
+        })
     }
 }
 
@@ -419,52 +1778,575 @@ impl Drop for BenchmarkWorkspace {
     }
 }
 
-fn benchmark_scenarios() -> [ScenarioDefinition; 5] {
+impl StoredBenchmarkRunHost {
+    fn from_host(host: &BenchmarkHost) -> Self {
+        Self {
+            node_binary: host.node_binary.clone(),
+            node_version: host.node_version.clone(),
+            os: host.os.to_owned(),
+            arch: host.arch.to_owned(),
+            logical_cpus: host.logical_cpus,
+        }
+    }
+
+    fn matches_host(&self, host: &BenchmarkHost) -> bool {
+        self.node_binary == host.node_binary
+            && self.node_version == host.node_version
+            && self.os == host.os
+            && self.arch == host.arch
+            && self.logical_cpus == host.logical_cpus
+    }
+}
+
+impl StoredBenchmarkRunState {
+    fn new(config: &JavascriptBenchmarkConfig, host: &BenchmarkHost, repo_root: &Path) -> Self {
+        Self {
+            artifact_version: BENCHMARK_ARTIFACT_VERSION,
+            config: config.clone(),
+            host: StoredBenchmarkRunHost::from_host(host),
+            repo_root: repo_root.to_path_buf(),
+            transport_rtt: None,
+            scenarios: Vec::new(),
+        }
+    }
+
+    fn is_compatible(
+        &self,
+        config: &JavascriptBenchmarkConfig,
+        host: &BenchmarkHost,
+        repo_root: &Path,
+    ) -> bool {
+        self.artifact_version == BENCHMARK_ARTIFACT_VERSION
+            && self.config == *config
+            && self.host.matches_host(host)
+            && self.repo_root == repo_root
+    }
+
+    fn sanitized(mut self, definitions: &[ScenarioDefinition]) -> Self {
+        if let Some(transport_rtt) = &self.transport_rtt {
+            let payloads = transport_rtt
+                .iter()
+                .map(|report| report.payload_bytes)
+                .collect::<Vec<_>>();
+            if payloads != TRANSPORT_RTT_PAYLOAD_BYTES {
+                self.transport_rtt = None;
+            }
+        }
+
+        let mut scenarios_by_id = self
+            .scenarios
+            .into_iter()
+            .map(|scenario| (scenario.id.clone(), scenario))
+            .collect::<BTreeMap<_, _>>();
+        self.scenarios = definitions
+            .iter()
+            .filter_map(|definition| scenarios_by_id.remove(definition.id))
+            .collect();
+        self
+    }
+
+    fn resumed_stage_count(&self, definitions: &[ScenarioDefinition]) -> usize {
+        usize::from(self.transport_rtt.is_some())
+            + definitions
+                .iter()
+                .filter(|definition| self.has_scenario(definition.id))
+                .count()
+    }
+
+    fn has_scenario(&self, id: &str) -> bool {
+        self.scenarios.iter().any(|scenario| scenario.id == id)
+    }
+
+    fn record_transport_rtt(&mut self, transport_rtt: &[BenchmarkTransportRttReport]) {
+        self.transport_rtt = Some(
+            transport_rtt
+                .iter()
+                .map(StoredBenchmarkTransportRttReport::from_report)
+                .collect(),
+        );
+    }
+
+    fn record_scenario(&mut self, scenario: &BenchmarkScenarioReport) {
+        self.scenarios.retain(|stored| stored.id != scenario.id);
+        self.scenarios
+            .push(StoredBenchmarkScenarioReport::from_report(scenario));
+    }
+
+    fn to_report(
+        &self,
+        config: &JavascriptBenchmarkConfig,
+        host: &BenchmarkHost,
+        repo_root: &Path,
+        definitions: &[ScenarioDefinition],
+    ) -> JavascriptBenchmarkReport {
+        let scenarios_by_id = self
+            .scenarios
+            .iter()
+            .map(|scenario| (scenario.id.as_str(), scenario))
+            .collect::<BTreeMap<_, _>>();
+
+        JavascriptBenchmarkReport {
+            generated_at_unix_ms: current_unix_ms(),
+            config: config.clone(),
+            host: host.clone(),
+            repo_root: repo_root.to_path_buf(),
+            transport_rtt: self
+                .transport_rtt
+                .clone()
+                .unwrap_or_default()
+                .into_iter()
+                .map(StoredBenchmarkTransportRttReport::into_report)
+                .collect(),
+            scenarios: definitions
+                .iter()
+                .filter_map(|definition| {
+                    scenarios_by_id
+                        .get(definition.id)
+                        .map(|scenario| scenario.to_report(*definition))
+                })
+                .collect(),
+        }
+    }
+}
+
+impl StoredBenchmarkTransportRttReport {
+    fn from_report(report: &BenchmarkTransportRttReport) -> Self {
+        Self {
+            payload_bytes: report.payload_bytes,
+            samples_ms: report.samples_ms.clone(),
+            stats: report.stats.clone(),
+        }
+    }
+
+    fn into_report(self) -> BenchmarkTransportRttReport {
+        BenchmarkTransportRttReport {
+            channel: TRANSPORT_RTT_CHANNEL,
+            payload_bytes: self.payload_bytes,
+            samples_ms: self.samples_ms,
+            stats: self.stats,
+        }
+    }
+}
+
+impl StoredBenchmarkScenarioReport {
+    fn from_report(report: &BenchmarkScenarioReport) -> Self {
+        Self {
+            id: report.id.to_owned(),
+            wall_samples_ms: report.wall_samples_ms.clone(),
+            wall_stats: report.wall_stats.clone(),
+            guest_import_samples_ms: report.guest_import_samples_ms.clone(),
+            guest_import_stats: report.guest_import_stats.clone(),
+            startup_overhead_samples_ms: report.startup_overhead_samples_ms.clone(),
+            startup_overhead_stats: report.startup_overhead_stats.clone(),
+            phase_samples_ms: report.phase_samples_ms.clone(),
+            phase_stats: report.phase_stats.clone(),
+            resource_usage_samples: report.resource_usage_samples.clone(),
+            resource_usage_stats: report.resource_usage_stats.clone(),
+        }
+    }
+
+    fn to_report(&self, definition: ScenarioDefinition) -> BenchmarkScenarioReport {
+        BenchmarkScenarioReport {
+            id: definition.id,
+            workload: definition.workload,
+            runtime: definition.runtime.label(),
+            mode: definition.mode.label(),
+            description: definition.description,
+            fixture: definition.fixture,
+            compile_cache: definition.compile_cache.label(),
+            wall_samples_ms: self.wall_samples_ms.clone(),
+            wall_stats: self.wall_stats.clone(),
+            guest_import_samples_ms: self.guest_import_samples_ms.clone(),
+            guest_import_stats: self.guest_import_stats.clone(),
+            startup_overhead_samples_ms: self.startup_overhead_samples_ms.clone(),
+            startup_overhead_stats: self.startup_overhead_stats.clone(),
+            phase_samples_ms: self.phase_samples_ms.clone(),
+            phase_stats: self.phase_stats.clone(),
+            resource_usage_samples: self.resource_usage_samples.clone(),
+            resource_usage_stats: self.resource_usage_stats.clone(),
+        }
+    }
+}
+
+pub fn run_javascript_benchmarks_with_recovery(
+    config: &JavascriptBenchmarkConfig,
+    baseline_path: Option<&Path>,
+) -> Result<JavascriptBenchmarkRunOutput, JavascriptBenchmarkError> {
+    if config.iterations == 0 {
+        return Err(JavascriptBenchmarkError::InvalidConfig(
+            "iterations must be greater than zero",
+        ));
+    }
+
+    let repo_root = workspace_root()?;
+    let host = benchmark_host()?;
+    let artifact_dir = benchmark_artifact_dir(&repo_root);
+    let workspace = BenchmarkWorkspace::create(&repo_root)?;
+    let (report, resumed_stage_count, state_path) = orchestrate_javascript_benchmark_report(
+        config,
+        &repo_root,
+        &host,
+        &artifact_dir,
+        || measure_transport_rtt(&workspace, config),
+        |scenario| run_scenario(&workspace, config, scenario),
+    )?;
+    let comparison = baseline_path
+        .map(|path| report.compare_to_baseline_path(path))
+        .transpose()?;
+    let artifact_paths =
+        report.write_artifacts_with_comparison(&artifact_dir, comparison.as_ref())?;
+    remove_file_if_exists(&state_path)?;
+
+    Ok(JavascriptBenchmarkRunOutput {
+        artifact_paths,
+        resumed_stage_count,
+    })
+}
+
+fn orchestrate_javascript_benchmark_report<MeasureTransport, RunScenario>(
+    config: &JavascriptBenchmarkConfig,
+    repo_root: &Path,
+    host: &BenchmarkHost,
+    artifact_dir: &Path,
+    mut measure_transport: MeasureTransport,
+    mut run_scenario: RunScenario,
+) -> Result<(JavascriptBenchmarkReport, usize, PathBuf), JavascriptBenchmarkError>
+where
+    MeasureTransport: FnMut() -> Result<Vec<BenchmarkTransportRttReport>, JavascriptBenchmarkError>,
+    RunScenario:
+        FnMut(ScenarioDefinition) -> Result<BenchmarkScenarioReport, JavascriptBenchmarkError>,
+{
+    if config.iterations == 0 {
+        return Err(JavascriptBenchmarkError::InvalidConfig(
+            "iterations must be greater than zero",
+        ));
+    }
+
+    fs::create_dir_all(artifact_dir)?;
+
+    let definitions = benchmark_scenarios();
+    let state_path = benchmark_run_state_path(artifact_dir);
+    let mut state = load_benchmark_run_state(&state_path, config, host, repo_root, &definitions)?;
+    let resumed_stage_count = state.resumed_stage_count(&definitions);
+
+    if state.transport_rtt.is_none() {
+        let transport_rtt = measure_transport()?;
+        state.record_transport_rtt(&transport_rtt);
+        persist_benchmark_run_state(&state_path, &state)?;
+    }
+
+    for definition in definitions {
+        if state.has_scenario(definition.id) {
+            continue;
+        }
+
+        let scenario = run_scenario(definition)?;
+        state.record_scenario(&scenario);
+        persist_benchmark_run_state(&state_path, &state)?;
+    }
+
+    Ok((
+        state.to_report(config, host, repo_root, &benchmark_scenarios()),
+        resumed_stage_count,
+        state_path,
+    ))
+}
+
+fn benchmark_scenarios() -> [ScenarioDefinition; 21] {
     [
         ScenarioDefinition {
             id: "isolate-startup",
+            workload: "startup-floor",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::BaselineControl,
             description:
                 "Minimal guest with no extra imports. Measures the current startup floor for create-context plus node process bootstrap.",
             fixture: "empty entrypoint",
             entrypoint: "./bench/isolate-startup.mjs",
             compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: false,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "prewarmed-isolate-startup",
+            workload: "startup-floor",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Minimal guest after a priming pass while one execution engine keeps materialized assets and builtin/polyfill prewarm state alive, isolating the hot startup floor from import work.",
+            fixture: "empty entrypoint",
+            entrypoint: "./bench/isolate-startup.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
             expect_import_metric: false,
+            env: ScenarioEnvironment::None,
         },
         ScenarioDefinition {
             id: "cold-local-import",
+            workload: "local-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
             description:
                 "Cold import of a repo-local ESM graph that simulates layered application modules without compile-cache reuse.",
             fixture: "24-module local ESM graph",
             entrypoint: "./bench/cold-local-import.mjs",
             compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
             expect_import_metric: true,
+            env: ScenarioEnvironment::None,
         },
         ScenarioDefinition {
             id: "warm-local-import",
+            workload: "local-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::NewSessionReplay,
             description:
                 "Warm import of the same local ESM graph after a compile-cache priming pass in an earlier isolate.",
             fixture: "24-module local ESM graph",
             entrypoint: "./bench/warm-local-import.mjs",
             compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
             expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "same-context-local-import",
+            workload: "local-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameSessionReplay,
+            description:
+                "Warm import of the same local ESM graph by replaying executions against one reused JavaScript context after a compile-cache priming pass.",
+            fixture: "24-module local ESM graph",
+            entrypoint: "./bench/warm-local-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedContextAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "prewarmed-local-import",
+            workload: "local-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Warm import of the same local ESM graph after compile-cache priming while one execution engine keeps materialized assets and builtin/polyfill prewarm state alive.",
+            fixture: "24-module local ESM graph",
+            entrypoint: "./bench/warm-local-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "host-local-import",
+            workload: "local-import",
+            runtime: ScenarioRuntime::HostNode,
+            mode: ScenarioMode::HostControl,
+            description:
+                "Direct host-Node control for the same local ESM graph so later runs can separate native executor overhead from guest import work.",
+            fixture: "24-module local ESM graph",
+            entrypoint: "./bench/cold-local-import.mjs",
+            compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
         },
         ScenarioDefinition {
             id: "builtin-import",
+            workload: "builtin-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
             description:
                 "Import of the common builtin path used by the wrappers and polyfill-adjacent bootstrap code.",
             fixture: "node:path + node:url + node:fs/promises",
             entrypoint: "./bench/builtin-import.mjs",
             compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-stream-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:stream` after a priming pass inside one reused execution engine.",
+            fixture: "node:stream",
+            entrypoint: "./bench/hot-builtin-stream-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-stream-web-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:stream/web` after a priming pass inside one reused execution engine.",
+            fixture: "node:stream/web",
+            entrypoint: "./bench/hot-builtin-stream-web-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-crypto-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:crypto` after a priming pass inside one reused execution engine.",
+            fixture: "node:crypto",
+            entrypoint: "./bench/hot-builtin-crypto-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-zlib-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:zlib` after a priming pass inside one reused execution engine.",
+            fixture: "node:zlib",
+            entrypoint: "./bench/hot-builtin-zlib-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-assert-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:assert/strict` after a priming pass inside one reused execution engine.",
+            fixture: "node:assert/strict",
+            entrypoint: "./bench/hot-builtin-assert-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-builtin-url-import",
+            workload: "builtin-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot single-import microbench for `node:url` after a priming pass inside one reused execution engine.",
+            fixture: "node:url",
+            entrypoint: "./bench/hot-builtin-url-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
             expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "hot-projected-package-file-import",
+            workload: "projected-package-hot-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Hot projected-package single-import microbench for the TypeScript compiler file with compile cache and projected-source manifest reuse enabled across repeated contexts.",
+            fixture: "projected TypeScript compiler file",
+            entrypoint: "./bench/hot-projected-package-file-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::ProjectedWorkspaceNodeModules,
         },
         ScenarioDefinition {
             id: "large-package-import",
+            workload: "large-package-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
             description:
                 "Cold import of the real-world `typescript` package from the workspace root `node_modules` tree.",
             fixture: "typescript",
             entrypoint: "./bench/large-package-import.mjs",
             compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "projected-package-import",
+            workload: "projected-package-import",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::SameEngineReplay,
+            description:
+                "Projected-package guest-path import of TypeScript with compile cache and projected-source manifest reuse enabled across repeated contexts.",
+            fixture: "projected TypeScript guest-path import",
+            entrypoint: "./bench/projected-package-import.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::ProjectedWorkspaceNodeModules,
+        },
+        ScenarioDefinition {
+            id: "pdf-lib-startup",
+            workload: "pdf-lib-startup",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
+            description:
+                "Cold import of `pdf-lib` plus representative document setup that creates a PDF page and embeds a standard font.",
+            fixture: "pdf-lib document creation",
+            entrypoint: "./bench/pdf-lib-startup.mjs",
+            compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "jszip-startup",
+            workload: "jszip-startup",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
+            description:
+                "Cold import of `jszip` plus representative archive staging that builds a nested archive structure.",
+            fixture: "jszip archive staging",
+            entrypoint: "./bench/jszip-startup.mjs",
+            compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "jszip-end-to-end",
+            workload: "jszip-end-to-end",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::TrueColdStart,
+            description:
+                "Cold import of `jszip` plus a full compressed archive roundtrip that writes, compresses, reloads, and validates nested archive contents.",
+            fixture: "jszip end-to-end archive roundtrip",
+            entrypoint: "./bench/jszip-end-to-end.mjs",
+            compile_cache: CompileCacheStrategy::Disabled,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
+            expect_import_metric: true,
+            env: ScenarioEnvironment::None,
+        },
+        ScenarioDefinition {
+            id: "jszip-repeated-session-compressed",
+            workload: "jszip-repeated-session-compressed",
+            runtime: ScenarioRuntime::NativeExecution,
+            mode: ScenarioMode::NewSessionReplay,
+            description:
+                "Repeated-session `jszip` workload after a compile-cache priming pass that compresses and reloads a nested archive in each fresh isolate.",
+            fixture: "jszip compressed archive roundtrip",
+            entrypoint: "./bench/jszip-repeated-session-compressed.mjs",
+            compile_cache: CompileCacheStrategy::Primed,
+            engine_reuse: EngineReuseStrategy::FreshPerSample,
             expect_import_metric: true,
+            env: ScenarioEnvironment::None,
         },
     ]
 }
@@ -478,23 +2360,32 @@ fn run_scenario(
         .root
         .join("compile-cache")
         .join(scenario.id.replace('-', "_"));
+    let mut shared_engine = match scenario.engine_reuse {
+        EngineReuseStrategy::FreshPerSample => None,
+        EngineReuseStrategy::SharedAcrossScenario
+        | EngineReuseStrategy::SharedContextAcrossScenario => {
+            Some(JavascriptExecutionEngine::default())
+        }
+    };
+    let mut shared_context = None;
 
     if scenario.compile_cache == CompileCacheStrategy::Primed {
         run_sample(
             workspace,
             &scenario,
             Some(compile_cache_root.clone()),
-            "prime-cache",
+            shared_engine.as_mut(),
+            &mut shared_context,
         )?;
     }
 
-    for warmup_index in 0..config.warmup_iterations {
-        let label = format!("warmup-{}", warmup_index + 1);
+    for _ in 0..config.warmup_iterations {
         run_sample(
             workspace,
             &scenario,
             compile_cache_root_for_strategy(scenario.compile_cache, &compile_cache_root),
-            &label,
+            shared_engine.as_mut(),
+            &mut shared_context,
         )?;
     }
 
@@ -504,34 +2395,59 @@ fn run_scenario(
     } else {
         None
     };
+    let mut context_setup_samples_ms = Vec::with_capacity(config.iterations);
+    let mut startup_samples_ms = Vec::with_capacity(config.iterations);
+    let mut completion_samples_ms = Vec::with_capacity(config.iterations);
+    let mut resource_usage_samples = BenchmarkResourceUsage::<Vec<f64>>::default();
 
-    for iteration in 0..config.iterations {
-        let label = format!("measure-{}", iteration + 1);
+    for _ in 0..config.iterations {
         let sample = run_sample(
             workspace,
             &scenario,
             compile_cache_root_for_strategy(scenario.compile_cache, &compile_cache_root),
-            &label,
+            shared_engine.as_mut(),
+            &mut shared_context,
         )?;
         wall_samples_ms.push(sample.wall_ms);
+        context_setup_samples_ms.push(sample.context_setup_ms);
+        startup_samples_ms.push(sample.startup_ms);
+        completion_samples_ms.push(sample.completion_ms);
 
         if let (Some(import_ms), Some(samples)) =
             (sample.guest_import_ms, guest_import_samples_ms.as_mut())
         {
             samples.push(import_ms);
         }
+        if let Some(resource_usage) = sample.resource_usage.as_ref() {
+            resource_usage_samples.push_sample(resource_usage);
+        }
     }
 
     let startup_overhead_samples_ms = guest_import_samples_ms.as_ref().map(|guest_samples| {
-        wall_samples_ms
+        context_setup_samples_ms
             .iter()
+            .zip(startup_samples_ms.iter())
+            .zip(completion_samples_ms.iter())
             .zip(guest_samples.iter())
-            .map(|(wall_ms, import_ms)| wall_ms - import_ms)
+            .map(|(((context_ms, startup_ms), completion_ms), _guest_ms)| {
+                context_ms + startup_ms + completion_ms
+            })
             .collect::<Vec<_>>()
     });
 
+    let phase_samples_ms = BenchmarkScenarioPhases {
+        context_setup_ms: context_setup_samples_ms,
+        startup_ms: startup_samples_ms,
+        guest_execution_ms: guest_import_samples_ms.clone(),
+        completion_ms: completion_samples_ms,
+    };
+    let resource_usage_samples = resource_usage_samples.into_populated();
+
     Ok(BenchmarkScenarioReport {
         id: scenario.id,
+        workload: scenario.workload,
+        runtime: scenario.runtime.label(),
+        mode: scenario.mode.label(),
         description: scenario.description,
         fixture: scenario.fixture,
         compile_cache: scenario.compile_cache.label(),
@@ -542,9 +2458,23 @@ fn run_scenario(
         startup_overhead_stats: startup_overhead_samples_ms
             .as_ref()
             .map(|samples| compute_stats(samples)),
+        phase_stats: BenchmarkScenarioPhases {
+            context_setup_ms: compute_stats(&phase_samples_ms.context_setup_ms),
+            startup_ms: compute_stats(&phase_samples_ms.startup_ms),
+            guest_execution_ms: phase_samples_ms
+                .guest_execution_ms
+                .as_ref()
+                .map(|samples| compute_stats(samples)),
+            completion_ms: compute_stats(&phase_samples_ms.completion_ms),
+        },
+        resource_usage_stats: resource_usage_samples
+            .as_ref()
+            .and_then(compute_resource_usage_stats),
         wall_samples_ms,
         guest_import_samples_ms,
         startup_overhead_samples_ms,
+        phase_samples_ms,
+        resource_usage_samples,
     })
 }
 
@@ -559,26 +2489,70 @@ fn run_sample(
     workspace: &BenchmarkWorkspace,
     scenario: &ScenarioDefinition,
     compile_cache_root: Option<PathBuf>,
-    _label: &str,
+    shared_engine: Option<&mut JavascriptExecutionEngine>,
+    shared_context: &mut Option<crate::JavascriptContext>,
 ) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
-    let mut engine = JavascriptExecutionEngine::default();
-    let started_at = Instant::now();
-    let context = engine.create_context(CreateJavascriptContextRequest {
-        vm_id: String::from("vm-bench"),
-        bootstrap_module: None,
-        compile_cache_root,
-    });
+    match scenario.runtime {
+        ScenarioRuntime::NativeExecution => run_native_sample(
+            workspace,
+            scenario,
+            compile_cache_root,
+            shared_engine,
+            shared_context,
+        ),
+        ScenarioRuntime::HostNode => run_host_node_sample(workspace, scenario),
+    }
+}
+
+fn run_native_sample(
+    workspace: &BenchmarkWorkspace,
+    scenario: &ScenarioDefinition,
+    compile_cache_root: Option<PathBuf>,
+    shared_engine: Option<&mut JavascriptExecutionEngine>,
+    shared_context: &mut Option<crate::JavascriptContext>,
+) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
+    let mut fresh_engine = JavascriptExecutionEngine::default();
+    let engine = shared_engine.unwrap_or(&mut fresh_engine);
+    let context_started_at = Instant::now();
+    let (context, context_setup_ms) = match scenario.engine_reuse {
+        EngineReuseStrategy::SharedContextAcrossScenario => {
+            if let Some(context) = shared_context.as_ref() {
+                (context.clone(), 0.0)
+            } else {
+                let context = engine.create_context(CreateJavascriptContextRequest {
+                    vm_id: String::from("vm-bench"),
+                    bootstrap_module: None,
+                    compile_cache_root,
+                });
+                let context_setup_ms = context_started_at.elapsed().as_secs_f64() * 1000.0;
+                *shared_context = Some(context.clone());
+                (context, context_setup_ms)
+            }
+        }
+        _ => {
+            let context = engine.create_context(CreateJavascriptContextRequest {
+                vm_id: String::from("vm-bench"),
+                bootstrap_module: None,
+                compile_cache_root,
+            });
+            let context_setup_ms = context_started_at.elapsed().as_secs_f64() * 1000.0;
+            (context, context_setup_ms)
+        }
+    };
 
+    let startup_started_at = Instant::now();
     let execution = engine.start_execution(StartJavascriptExecutionRequest {
         vm_id: String::from("vm-bench"),
         context_id: context.context_id,
         argv: vec![String::from(scenario.entrypoint)],
-        env: BTreeMap::new(),
+        env: scenario_env(workspace, scenario),
         cwd: workspace.root.clone(),
     })?;
+    let startup_ms = startup_started_at.elapsed().as_secs_f64() * 1000.0;
 
+    let completion_started_at = Instant::now();
     let result = execution.wait()?;
-    let wall_ms = started_at.elapsed().as_secs_f64() * 1000.0;
+    let completion_total_ms = completion_started_at.elapsed().as_secs_f64() * 1000.0;
     let stdout = String::from_utf8(result.stdout)?;
     let stderr = String::from_utf8(result.stderr)?;
 
@@ -590,31 +2564,265 @@ fn run_sample(
         });
     }
 
-    let guest_import_ms = if scenario.expect_import_metric {
-        Some(parse_benchmark_metric(scenario.id, &stdout)?)
-    } else {
-        None
-    };
+    let parsed_metrics =
+        parse_benchmark_metrics(scenario.id, &stdout, scenario.expect_import_metric)?;
+    let guest_import_ms = parsed_metrics.import_ms;
+    let completion_ms = guest_import_ms
+        .map(|guest_ms| saturating_delta_ms(completion_total_ms, guest_ms))
+        .unwrap_or(completion_total_ms);
+    let wall_ms = context_setup_ms + startup_ms + completion_total_ms;
+
+    Ok(SampleMeasurement {
+        wall_ms,
+        guest_import_ms,
+        context_setup_ms,
+        startup_ms,
+        completion_ms,
+        resource_usage: parsed_metrics.resource_usage,
+    })
+}
+
+fn run_host_node_sample(
+    workspace: &BenchmarkWorkspace,
+    scenario: &ScenarioDefinition,
+) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
+    let started_at = Instant::now();
+    let output = Command::new(crate::node_process::node_binary())
+        .arg(scenario.entrypoint)
+        .current_dir(&workspace.root)
+        .envs(scenario_env(workspace, scenario))
+        .output()?;
+    let wall_ms = started_at.elapsed().as_secs_f64() * 1000.0;
+    let stdout = String::from_utf8(output.stdout)?;
+    let stderr = String::from_utf8(output.stderr)?;
+
+    if !output.status.success() {
+        return Err(JavascriptBenchmarkError::NonZeroExit {
+            scenario: scenario.id,
+            exit_code: output.status.code().unwrap_or(-1),
+            stderr,
+        });
+    }
+
+    let parsed_metrics =
+        parse_benchmark_metrics(scenario.id, &stdout, scenario.expect_import_metric)?;
+    let guest_import_ms = parsed_metrics.import_ms;
+    let startup_ms = guest_import_ms
+        .map(|guest_ms| saturating_delta_ms(wall_ms, guest_ms))
+        .unwrap_or(wall_ms);
 
     Ok(SampleMeasurement {
         wall_ms,
         guest_import_ms,
+        context_setup_ms: 0.0,
+        startup_ms,
+        completion_ms: 0.0,
+        resource_usage: parsed_metrics.resource_usage,
     })
 }
 
-fn parse_benchmark_metric(
+fn scenario_env(
+    workspace: &BenchmarkWorkspace,
+    scenario: &ScenarioDefinition,
+) -> BTreeMap<String, String> {
+    match scenario.env {
+        ScenarioEnvironment::None => BTreeMap::new(),
+        ScenarioEnvironment::ProjectedWorkspaceNodeModules => {
+            let projected_node_modules = workspace.repo_root.join("node_modules");
+            let projected_node_modules_json =
+                serde_json::to_string(&vec![projected_node_modules.display().to_string()])
+                    .expect("serialize projected node_modules read path");
+            let guest_path_mappings = serde_json::json!([{
+                "guestPath": "/root/node_modules",
+                "hostPath": projected_node_modules.display().to_string(),
+            }])
+            .to_string();
+
+            BTreeMap::from([
+                (
+                    String::from("AGENT_OS_EXTRA_FS_READ_PATHS"),
+                    projected_node_modules_json,
+                ),
+                (
+                    String::from("AGENT_OS_GUEST_PATH_MAPPINGS"),
+                    guest_path_mappings,
+                ),
+            ])
+        }
+    }
+}
+
+fn measure_transport_rtt(
+    workspace: &BenchmarkWorkspace,
+    config: &JavascriptBenchmarkConfig,
+) -> Result<Vec<BenchmarkTransportRttReport>, JavascriptBenchmarkError> {
+    let mut engine = JavascriptExecutionEngine::default();
+    let context = engine.create_context(CreateJavascriptContextRequest {
+        vm_id: String::from("vm-transport"),
+        bootstrap_module: None,
+        compile_cache_root: None,
+    });
+    let mut execution = engine.start_execution(StartJavascriptExecutionRequest {
+        vm_id: String::from("vm-transport"),
+        context_id: context.context_id,
+        argv: vec![String::from("./bench/transport-echo.mjs")],
+        env: BTreeMap::from([(String::from("AGENT_OS_KEEP_STDIN_OPEN"), String::from("1"))]),
+        cwd: workspace.root.clone(),
+    })?;
+
+    let mut stdout_buffer = String::new();
+    let mut stderr_buffer = String::new();
+    let mut reports = Vec::with_capacity(TRANSPORT_RTT_PAYLOAD_BYTES.len());
+
+    for payload_bytes in TRANSPORT_RTT_PAYLOAD_BYTES {
+        for warmup_index in 0..config.warmup_iterations {
+            let label = format!("warmup-{}-{warmup_index}", payload_bytes);
+            measure_transport_roundtrip(
+                &mut execution,
+                payload_bytes,
+                &label,
+                &mut stdout_buffer,
+                &mut stderr_buffer,
+            )?;
+        }
+
+        let mut samples_ms = Vec::with_capacity(config.iterations);
+        for iteration in 0..config.iterations {
+            let label = format!("measure-{}-{iteration}", payload_bytes);
+            samples_ms.push(measure_transport_roundtrip(
+                &mut execution,
+                payload_bytes,
+                &label,
+                &mut stdout_buffer,
+                &mut stderr_buffer,
+            )?);
+        }
+
+        reports.push(BenchmarkTransportRttReport {
+            channel: TRANSPORT_RTT_CHANNEL,
+            payload_bytes,
+            stats: compute_stats(&samples_ms),
+            samples_ms,
+        });
+    }
+
+    execution.close_stdin()?;
+    let result = execution.wait()?;
+    if result.exit_code != 0 {
+        stderr_buffer.push_str(&String::from_utf8(result.stderr)?);
+        return Err(JavascriptBenchmarkError::TransportProbeExited {
+            exit_code: result.exit_code,
+            stderr: stderr_buffer,
+        });
+    }
+
+    Ok(reports)
+}
+
+fn measure_transport_roundtrip(
+    execution: &mut crate::JavascriptExecution,
+    payload_bytes: usize,
+    label: &str,
+    stdout_buffer: &mut String,
+    stderr_buffer: &mut String,
+) -> Result<f64, JavascriptBenchmarkError> {
+    let payload = transport_probe_payload(payload_bytes, label);
+    let expected_line = format!("{payload}\n");
+    let started_at = Instant::now();
+    execution.write_stdin(expected_line.as_bytes())?;
+
+    loop {
+        if let Some(line) = take_complete_line(stdout_buffer) {
+            if line == payload {
+                return Ok(started_at.elapsed().as_secs_f64() * 1000.0);
+            }
+            return Err(JavascriptBenchmarkError::InvalidTransportProbeResponse {
+                payload_bytes,
+                expected: payload,
+                actual: line,
+            });
+        }
+
+        match execution.poll_event(TRANSPORT_POLL_TIMEOUT)? {
+            Some(crate::JavascriptExecutionEvent::Stdout(chunk)) => {
+                stdout_buffer.push_str(&String::from_utf8(chunk)?);
+            }
+            Some(crate::JavascriptExecutionEvent::Stderr(chunk)) => {
+                stderr_buffer.push_str(&String::from_utf8(chunk)?);
+            }
+            Some(crate::JavascriptExecutionEvent::Exited(exit_code)) => {
+                return Err(JavascriptBenchmarkError::TransportProbeExited {
+                    exit_code,
+                    stderr: stderr_buffer.clone(),
+                });
+            }
+            None => {
+                return Err(JavascriptBenchmarkError::TransportProbeTimeout { payload_bytes });
+            }
+        }
+    }
+}
+
+fn transport_probe_payload(payload_bytes: usize, label: &str) -> String {
+    if payload_bytes == 0 {
+        return format!("transport:{label}:");
+    }
+
+    let header = format!("transport:{label}:");
+    let fill_len = payload_bytes.saturating_sub(header.len());
+    format!("{header}{}", "x".repeat(fill_len))
+}
+
+fn take_complete_line(buffer: &mut String) -> Option<String> {
+    let newline_index = buffer.find('\n')?;
+    let line = buffer[..newline_index].trim_end_matches('\r').to_owned();
+    buffer.drain(..=newline_index);
+    Some(line)
+}
+
+#[derive(Debug, Default, Deserialize)]
+struct ParsedBenchmarkMetrics {
+    #[serde(default)]
+    import_ms: Option<f64>,
+    #[serde(default)]
+    resource_usage: Option<BenchmarkResourceUsage<f64>>,
+}
+
+fn parse_benchmark_metrics(
     scenario_id: &'static str,
     stdout: &str,
-) -> Result<f64, JavascriptBenchmarkError> {
+    expect_import_metric: bool,
+) -> Result<ParsedBenchmarkMetrics, JavascriptBenchmarkError> {
     let raw_value = stdout
         .lines()
+        .rev()
         .find_map(|line| line.strip_prefix(BENCHMARK_MARKER_PREFIX))
         .ok_or(JavascriptBenchmarkError::MissingBenchmarkMetric(
             scenario_id,
-        ))?;
+        ))?
+        .trim();
+
+    if let Ok(parsed) = serde_json::from_str::<ParsedBenchmarkMetrics>(raw_value) {
+        let has_resource_usage = match parsed.resource_usage.as_ref() {
+            Some(resource_usage) => !resource_usage.is_empty(),
+            None => false,
+        };
+        if parsed.import_ms.is_some() || has_resource_usage {
+            if expect_import_metric && parsed.import_ms.is_none() {
+                return Err(JavascriptBenchmarkError::MissingBenchmarkMetric(
+                    scenario_id,
+                ));
+            }
+            return Ok(parsed);
+        }
+    }
 
     raw_value
         .parse::<f64>()
+        .map(|import_ms| ParsedBenchmarkMetrics {
+            import_ms: Some(import_ms),
+            resource_usage: None,
+        })
         .map_err(|_| JavascriptBenchmarkError::InvalidBenchmarkMetric {
             scenario: scenario_id,
             raw_value: raw_value.to_owned(),
@@ -630,6 +2838,16 @@ fn workspace_root() -> Result<PathBuf, JavascriptBenchmarkError> {
         .ok_or(JavascriptBenchmarkError::InvalidWorkspaceRoot(manifest_dir))
 }
 
+fn load_benchmark_artifact(
+    baseline_path: &Path,
+) -> Result<StoredBenchmarkArtifact, JavascriptBenchmarkError> {
+    let raw = fs::read_to_string(baseline_path)?;
+    serde_json::from_str(&raw).map_err(|err| JavascriptBenchmarkError::InvalidBaselineReport {
+        path: baseline_path.to_path_buf(),
+        message: err.to_string(),
+    })
+}
+
 fn benchmark_host() -> Result<BenchmarkHost, JavascriptBenchmarkError> {
     let node_binary = crate::node_process::node_binary();
     let output = Command::new(&node_binary)
@@ -680,10 +2898,14 @@ fn write_benchmark_workspace(root: &Path) -> Result<(), JavascriptBenchmarkError
             last = LOCAL_GRAPH_MODULE_COUNT - 1
         ),
     )?;
+    fs::write(
+        root.join("bench/benchmark-metrics.mjs"),
+        benchmark_metrics_module_source(),
+    )?;
 
     fs::write(
         root.join("bench/isolate-startup.mjs"),
-        "console.log('isolate-ready');\n",
+        resource_only_entrypoint_source("console.log('isolate-ready');"),
     )?;
     fs::write(
         root.join("bench/cold-local-import.mjs"),
@@ -695,25 +2917,163 @@ fn write_benchmark_workspace(root: &Path) -> Result<(), JavascriptBenchmarkError
     )?;
     fs::write(
         root.join("bench/builtin-import.mjs"),
-        format!(
-            "import {{ performance }} from 'node:perf_hooks';\nconst started = performance.now();\nconst [pathMod, fsMod, urlMod] = await Promise.all([\n  import('node:path'),\n  import('node:fs/promises'),\n  import('node:url'),\n]);\nif (typeof pathMod.basename !== 'function' || typeof fsMod.readFile !== 'function' || typeof urlMod.pathToFileURL !== 'function') {{\n  throw new Error('builtin import fixture did not load expected exports');\n}}\nconsole.log('{BENCHMARK_MARKER_PREFIX}' + String(performance.now() - started));\n",
+        timed_entrypoint_source(
+            "const [pathMod, fsMod, urlMod] = await Promise.all([\n  import('node:path'),\n  import('node:fs/promises'),\n  import('node:url'),\n]);\nif (typeof pathMod.basename !== 'function' || typeof fsMod.readFile !== 'function' || typeof urlMod.pathToFileURL !== 'function') {\n  throw new Error('builtin import fixture did not load expected exports');\n}",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-stream-import.mjs"),
+        single_import_entrypoint_source(
+            "node:stream",
+            "typeof imported.Readable === 'function'",
+            "node:stream import did not expose Readable",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-stream-web-import.mjs"),
+        single_import_entrypoint_source(
+            "node:stream/web",
+            "typeof imported.ReadableStream === 'function'",
+            "node:stream/web import did not expose ReadableStream",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-crypto-import.mjs"),
+        single_import_entrypoint_source(
+            "node:crypto",
+            "typeof imported.createHash === 'function'",
+            "node:crypto import did not expose createHash",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-zlib-import.mjs"),
+        single_import_entrypoint_source(
+            "node:zlib",
+            "typeof imported.gzipSync === 'function'",
+            "node:zlib import did not expose gzipSync",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-assert-import.mjs"),
+        single_import_entrypoint_source(
+            "node:assert/strict",
+            "typeof imported.strictEqual === 'function'",
+            "node:assert/strict import did not expose strictEqual",
+        ),
+    )?;
+    fs::write(
+        root.join("bench/hot-builtin-url-import.mjs"),
+        single_import_entrypoint_source(
+            "node:url",
+            "typeof imported.pathToFileURL === 'function'",
+            "node:url import did not expose pathToFileURL",
         ),
     )?;
     fs::write(
         root.join("bench/large-package-import.mjs"),
-        format!(
-            "import {{ performance }} from 'node:perf_hooks';\nconst started = performance.now();\nconst typescript = await import('typescript');\nif (typeof typescript.transpileModule !== 'function') {{\n  throw new Error('typescript import did not expose transpileModule');\n}}\nconsole.log('{BENCHMARK_MARKER_PREFIX}' + String(performance.now() - started));\n",
+        timed_entrypoint_source(
+            "const typescript = await import('typescript');\nif (typeof typescript.transpileModule !== 'function') {\n  throw new Error('typescript import did not expose transpileModule');\n}",
         ),
     )?;
+    fs::write(
+        root.join("bench/hot-projected-package-file-import.mjs"),
+        projected_package_file_import_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/projected-package-import.mjs"),
+        projected_package_import_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/pdf-lib-startup.mjs"),
+        pdf_lib_startup_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/jszip-startup.mjs"),
+        jszip_startup_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/jszip-end-to-end.mjs"),
+        jszip_end_to_end_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/jszip-repeated-session-compressed.mjs"),
+        jszip_repeated_session_compressed_entrypoint_source(),
+    )?;
+    fs::write(
+        root.join("bench/transport-echo.mjs"),
+        "import readline from 'node:readline';\nconst rl = readline.createInterface({ input: process.stdin, crlfDelay: Infinity });\nfor await (const line of rl) {\n  process.stdout.write(`${line}\\n`);\n}\n",
+    )?;
 
     Ok(())
 }
 
 fn local_import_entrypoint_source(final_value: usize) -> String {
+    timed_entrypoint_source(&format!(
+        "const graph = await import('./local-graph/root.mjs');\nif (graph.value !== {final_value} || graph.expected !== {final_value}) {{\n  throw new Error(`local graph import returned ${{\n    graph.value\n  }} instead of {final_value}`);\n}}"
+    ))
+}
+
+fn single_import_entrypoint_source(
+    specifier: &str,
+    validation_expression: &str,
+    error_message: &str,
+) -> String {
+    timed_entrypoint_source(&format!(
+        "const imported = await import('{specifier}');\nif (!({validation_expression})) {{\n  throw new Error('{error_message}');\n}}"
+    ))
+}
+
+fn projected_package_file_import_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const typescriptModule = await import('/root/node_modules/typescript/lib/typescript.js');\nconst typescript = typescriptModule.default ?? typescriptModule;\nif (typeof typescript.transpileModule !== 'function') {\n  throw new Error('projected package file import did not expose transpileModule');\n}",
+    )
+}
+
+fn projected_package_import_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const typescriptModule = await import('/root/node_modules/typescript/lib/typescript.js');\nconst typescript = typescriptModule.default ?? typescriptModule;\nconst sourceFile = typescript.createSourceFile(\n  'bench.ts',\n  'const answer: number = 42;',\n  typescript.ScriptTarget.ES2022,\n  true,\n);\nif (\n  typeof typescript.transpileModule !== 'function' ||\n  typeof typescript.createSourceFile !== 'function' ||\n  !sourceFile ||\n  sourceFile.statements.length !== 1\n) {\n  throw new Error('projected package import did not expose TypeScript compiler APIs');\n}",
+    )
+}
+
+fn pdf_lib_startup_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const pdfLib = await import('pdf-lib');\nconst pdfDoc = await pdfLib.PDFDocument.create();\nconst page = pdfDoc.addPage([612, 792]);\nconst font = await pdfDoc.embedFont(pdfLib.StandardFonts.Helvetica);\npage.drawText('Agent OS pdf-lib benchmark', {\n  x: 50,\n  y: 750,\n  font,\n  size: 18,\n});\nif (pdfDoc.getPageCount() !== 1 || page.getSize().width !== 612) {\n  throw new Error('pdf-lib fixture did not create the expected document');\n}",
+    )
+}
+
+fn jszip_startup_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nzip.file('README.txt', 'agent-os benchmark archive');\nconst notes = zip.folder('notes');\nif (!notes) {\n  throw new Error('jszip fixture failed to create nested folder');\n}\nnotes.file('todo.txt', 'benchmark staging payload');\nconst fileCount = Object.values(zip.files).filter((entry) => !entry.dir).length;\nif (typeof zip.generateAsync !== 'function' || fileCount !== 2) {\n  throw new Error('jszip fixture did not stage the expected archive');\n}",
+    )
+}
+
+fn jszip_end_to_end_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nconst repeatedPayload = 'agent-os benchmark payload '.repeat(512);\nzip.file('README.txt', repeatedPayload);\nconst notes = zip.folder('notes');\nif (!notes) {\n  throw new Error('jszip end-to-end fixture failed to create notes folder');\n}\nnotes.file('todo.txt', 'complete the archive roundtrip');\nconst data = zip.folder('data');\nif (!data) {\n  throw new Error('jszip end-to-end fixture failed to create data folder');\n}\ndata.file('payload.json', JSON.stringify({\n  repeatedPayloadLength: repeatedPayload.length,\n  mode: 'cold-end-to-end',\n}));\nconst archiveBytes = await zip.generateAsync({\n  type: 'uint8array',\n  compression: 'DEFLATE',\n  compressionOptions: { level: 6 },\n});\nconst restored = await JSZip.loadAsync(archiveBytes);\nconst restoredFileCount = Object.values(restored.files).filter((entry) => !entry.dir).length;\nconst restoredReadme = await restored.file('README.txt')?.async('string');\nconst restoredTodo = await restored.file('notes/todo.txt')?.async('string');\nconst restoredPayload = await restored.file('data/payload.json')?.async('string');\nif (\n  archiveBytes.byteLength >= repeatedPayload.length ||\n  restoredFileCount !== 3 ||\n  restoredReadme !== repeatedPayload ||\n  restoredTodo !== 'complete the archive roundtrip' ||\n  !restoredPayload?.includes('cold-end-to-end')\n) {\n  throw new Error('jszip end-to-end fixture did not complete the compressed archive roundtrip');\n}",
+    )
+}
+
+fn jszip_repeated_session_compressed_entrypoint_source() -> String {
+    timed_entrypoint_source(
+        "const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nconst repeatedPayload = 'agent-os benchmark payload '.repeat(512);\nzip.file('README.txt', repeatedPayload);\nconst notes = zip.folder('notes');\nif (!notes) {\n  throw new Error('jszip repeated-session fixture failed to create notes folder');\n}\nnotes.file('todo.txt', 'repeat this session workload');\nconst data = zip.folder('data');\nif (!data) {\n  throw new Error('jszip repeated-session fixture failed to create data folder');\n}\ndata.file('payload.json', JSON.stringify({\n  repeatedPayloadLength: repeatedPayload.length,\n  repeatedSessions: true,\n}));\nconst archiveBytes = await zip.generateAsync({\n  type: 'uint8array',\n  compression: 'DEFLATE',\n  compressionOptions: { level: 6 },\n});\nconst restored = await JSZip.loadAsync(archiveBytes);\nconst restoredFileCount = Object.values(restored.files).filter((entry) => !entry.dir).length;\nconst restoredReadme = await restored.file('README.txt')?.async('string');\nconst restoredTodo = await restored.file('notes/todo.txt')?.async('string');\nif (\n  archiveBytes.byteLength >= repeatedPayload.length ||\n  restoredFileCount !== 3 ||\n  restoredReadme !== repeatedPayload ||\n  restoredTodo !== 'repeat this session workload'\n) {\n  throw new Error('jszip repeated-session fixture did not complete the compressed archive roundtrip');\n}",
+    )
+}
+
+fn benchmark_metrics_module_source() -> String {
+    format!(
+        "const BENCHMARK_MARKER_PREFIX = '{BENCHMARK_MARKER_PREFIX}';\n\nexport function emitBenchmarkMetrics(importMs) {{\n  const memoryUsage = process.memoryUsage();\n  const resourceUsage = typeof process.resourceUsage === 'function'\n    ? process.resourceUsage()\n    : null;\n  const payload = {{\n    resource_usage: {{\n      rss_bytes: memoryUsage.rss,\n      heap_used_bytes: memoryUsage.heapUsed,\n      ...(resourceUsage\n        ? {{\n            cpu_user_us: resourceUsage.userCPUTime,\n            cpu_system_us: resourceUsage.systemCPUTime,\n            cpu_total_us: resourceUsage.userCPUTime + resourceUsage.systemCPUTime,\n          }}\n        : {{}}),\n    }},\n  }};\n\n  if (typeof importMs === 'number') {{\n    payload.import_ms = importMs;\n  }}\n\n  console.log(BENCHMARK_MARKER_PREFIX + JSON.stringify(payload));\n}}\n"
+    )
+}
+
+fn resource_only_entrypoint_source(body: &str) -> String {
+    format!(
+        "import {{ emitBenchmarkMetrics }} from './benchmark-metrics.mjs';\n{body}\nemitBenchmarkMetrics();\n"
+    )
+}
+
+fn timed_entrypoint_source(body: &str) -> String {
     format!(
-        "import {{ performance }} from 'node:perf_hooks';\nconst started = performance.now();\nconst graph = await import('./local-graph/root.mjs');\nif (graph.value !== {final_value} || graph.expected !== {final_value}) {{\n  throw new Error(`local graph import returned ${{
-    graph.value
-  }} instead of {final_value}`);\n}}\nconsole.log('{BENCHMARK_MARKER_PREFIX}' + String(performance.now() - started));\n"
+        "import {{ performance }} from 'node:perf_hooks';\nimport {{ emitBenchmarkMetrics }} from './benchmark-metrics.mjs';\nconst started = performance.now();\n{body}\nemitBenchmarkMetrics(performance.now() - started);\n"
     )
 }
 
@@ -727,20 +3087,80 @@ fn local_graph_terminal_value() -> usize {
     value
 }
 
-fn compute_stats(samples: &[f64]) -> BenchmarkStats {
+fn compute_distribution_stats(samples: &[f64]) -> BenchmarkDistributionStats {
     let mut sorted = samples.to_vec();
     sorted.sort_by(|a, b| a.total_cmp(b));
-    let mean_ms = sorted.iter().sum::<f64>() / sorted.len() as f64;
+    let mean = sorted.iter().sum::<f64>() / sorted.len() as f64;
+
+    BenchmarkDistributionStats {
+        mean,
+        p50: percentile(&sorted, 50.0),
+        p95: percentile(&sorted, 95.0),
+        min: *sorted.first().unwrap_or(&0.0),
+        max: *sorted.last().unwrap_or(&0.0),
+        stddev: standard_deviation(&sorted, mean),
+    }
+}
+
+fn compute_stats(samples: &[f64]) -> BenchmarkStats {
+    let stats = compute_distribution_stats(samples);
 
     BenchmarkStats {
-        mean_ms,
-        p50_ms: percentile(&sorted, 50.0),
-        p95_ms: percentile(&sorted, 95.0),
-        min_ms: *sorted.first().unwrap_or(&0.0),
-        max_ms: *sorted.last().unwrap_or(&0.0),
+        mean_ms: stats.mean,
+        p50_ms: stats.p50,
+        p95_ms: stats.p95,
+        min_ms: stats.min,
+        max_ms: stats.max,
+        stddev_ms: stats.stddev,
     }
 }
 
+fn compute_resource_usage_stats(
+    samples: &BenchmarkResourceUsage<Vec<f64>>,
+) -> Option<BenchmarkResourceUsage<BenchmarkDistributionStats>> {
+    let stats = BenchmarkResourceUsage {
+        rss_bytes: samples
+            .rss_bytes
+            .as_ref()
+            .map(|samples| compute_distribution_stats(samples)),
+        heap_used_bytes: samples
+            .heap_used_bytes
+            .as_ref()
+            .map(|samples| compute_distribution_stats(samples)),
+        cpu_user_us: samples
+            .cpu_user_us
+            .as_ref()
+            .map(|samples| compute_distribution_stats(samples)),
+        cpu_system_us: samples
+            .cpu_system_us
+            .as_ref()
+            .map(|samples| compute_distribution_stats(samples)),
+        cpu_total_us: samples
+            .cpu_total_us
+            .as_ref()
+            .map(|samples| compute_distribution_stats(samples)),
+    };
+
+    (!stats.is_empty()).then_some(stats)
+}
+
+fn standard_deviation(samples: &[f64], mean: f64) -> f64 {
+    if samples.is_empty() {
+        return 0.0;
+    }
+
+    let variance = samples
+        .iter()
+        .map(|sample| {
+            let delta = sample - mean;
+            delta * delta
+        })
+        .sum::<f64>()
+        / samples.len() as f64;
+
+    variance.sqrt()
+}
+
 fn percentile(sorted: &[f64], p: f64) -> f64 {
     if sorted.is_empty() {
         return 0.0;
@@ -775,20 +3195,351 @@ fn safe_ratio(lhs: f64, rhs: f64) -> f64 {
     }
 }
 
+fn saturating_delta_ms(total_ms: f64, subtracted_ms: f64) -> f64 {
+    (total_ms - subtracted_ms).max(0.0)
+}
+
 fn format_ms(value: f64) -> String {
     format!("{value:.2}")
 }
 
+fn format_hotspot_value(unit: &str, value: f64) -> String {
+    match unit {
+        "pct" => format!("{value:.1}%"),
+        "MiB" => format_mib(value),
+        _ => format_ms(value),
+    }
+}
+
 fn format_sample_list(samples: &[f64]) -> String {
+    format_scaled_sample_list(samples, std::convert::identity)
+}
+
+fn format_scaled_sample_list(samples: &[f64], scale: impl Fn(f64) -> f64) -> String {
     let mut formatted = String::from("[");
 
     for (index, sample) in samples.iter().enumerate() {
         if index > 0 {
             formatted.push_str(", ");
         }
-        let _ = write!(&mut formatted, "{sample:.2}");
+        let _ = write!(&mut formatted, "{:.2}", scale(*sample));
     }
 
     formatted.push(']');
     formatted
 }
+
+fn format_mib(value: f64) -> String {
+    format!("{value:.2}")
+}
+
+fn format_label_list(labels: &[&str]) -> String {
+    labels
+        .iter()
+        .map(|label| format!("`{label}`"))
+        .collect::<Vec<_>>()
+        .join(", ")
+}
+
+fn format_string_label_list(labels: &[&str]) -> String {
+    labels
+        .iter()
+        .map(|label| format!("`{label}`"))
+        .collect::<Vec<_>>()
+        .join(", ")
+}
+
+fn push_unique_label<'a>(labels: &mut Vec<&'a str>, value: &'a str) {
+    if !labels.contains(&value) {
+        labels.push(value);
+    }
+}
+
+fn format_delta_ms(value: f64) -> String {
+    format!("{value:+.2}")
+}
+
+fn format_delta_pct(value: f64) -> String {
+    format!("{value:+.1}%")
+}
+
+fn push_optional_sample(samples: &mut Option<Vec<f64>>, value: Option<f64>) {
+    if let Some(value) = value {
+        samples.get_or_insert_with(Vec::new).push(value);
+    }
+}
+
+fn bytes_to_mib(value: f64) -> f64 {
+    value / (1024.0 * 1024.0)
+}
+
+fn micros_to_ms(value: f64) -> f64 {
+    value / 1000.0
+}
+
+fn hotspot_wall_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.wall_stats.mean_ms)
+}
+
+fn hotspot_wall_stddev_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.wall_stats.stddev_ms)
+}
+
+fn hotspot_wall_range_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.wall_range_ms())
+}
+
+fn hotspot_guest_import_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .guest_import_stats
+        .as_ref()
+        .map(|stats| stats.mean_ms)
+}
+
+fn hotspot_startup_overhead_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .startup_overhead_stats
+        .as_ref()
+        .map(|stats| stats.mean_ms)
+}
+
+fn hotspot_context_setup_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.phase_stats.context_setup_ms.mean_ms)
+}
+
+fn hotspot_startup_phase_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.phase_stats.startup_ms.mean_ms)
+}
+
+fn hotspot_guest_execution_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .phase_stats
+        .guest_execution_ms
+        .as_ref()
+        .map(|stats| stats.mean_ms)
+}
+
+fn hotspot_completion_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    Some(scenario.phase_stats.completion_ms.mean_ms)
+}
+
+fn hotspot_startup_share_pct(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario.mean_startup_share_pct()
+}
+
+fn hotspot_rss_mean_mib(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .resource_usage_stats
+        .as_ref()?
+        .rss_bytes
+        .as_ref()
+        .map(|stats| bytes_to_mib(stats.mean))
+}
+
+fn hotspot_heap_mean_mib(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .resource_usage_stats
+        .as_ref()?
+        .heap_used_bytes
+        .as_ref()
+        .map(|stats| bytes_to_mib(stats.mean))
+}
+
+fn hotspot_total_cpu_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
+    scenario
+        .resource_usage_stats
+        .as_ref()?
+        .cpu_total_us
+        .as_ref()
+        .map(|stats| micros_to_ms(stats.mean))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::cell::RefCell;
+    use tempfile::tempdir;
+
+    fn synthetic_transport_reports() -> Vec<BenchmarkTransportRttReport> {
+        TRANSPORT_RTT_PAYLOAD_BYTES
+            .iter()
+            .enumerate()
+            .map(|(index, payload_bytes)| {
+                let sample = index as f64 + 1.0;
+                BenchmarkTransportRttReport {
+                    channel: TRANSPORT_RTT_CHANNEL,
+                    payload_bytes: *payload_bytes,
+                    samples_ms: vec![sample],
+                    stats: compute_stats(&[sample]),
+                }
+            })
+            .collect()
+    }
+
+    fn synthetic_scenario_report(
+        definition: ScenarioDefinition,
+        wall_sample_ms: f64,
+    ) -> BenchmarkScenarioReport {
+        let context_setup_ms = wall_sample_ms / 5.0;
+        let startup_ms = wall_sample_ms / 4.0;
+        let guest_execution_ms = definition
+            .expect_import_metric
+            .then_some(wall_sample_ms / 3.0);
+        let completion_ms =
+            wall_sample_ms - context_setup_ms - startup_ms - guest_execution_ms.unwrap_or(0.0);
+        let startup_overhead_ms = definition
+            .expect_import_metric
+            .then_some(context_setup_ms + startup_ms + completion_ms);
+        let resource_usage_samples = BenchmarkResourceUsage {
+            rss_bytes: Some(vec![64.0 * 1024.0 * 1024.0]),
+            heap_used_bytes: Some(vec![12.0 * 1024.0 * 1024.0]),
+            cpu_user_us: None,
+            cpu_system_us: None,
+            cpu_total_us: Some(vec![wall_sample_ms * 1000.0]),
+        };
+
+        BenchmarkScenarioReport {
+            id: definition.id,
+            workload: definition.workload,
+            runtime: definition.runtime.label(),
+            mode: definition.mode.label(),
+            description: definition.description,
+            fixture: definition.fixture,
+            compile_cache: definition.compile_cache.label(),
+            wall_samples_ms: vec![wall_sample_ms],
+            wall_stats: compute_stats(&[wall_sample_ms]),
+            guest_import_samples_ms: guest_execution_ms.map(|sample| vec![sample]),
+            guest_import_stats: guest_execution_ms.map(|sample| compute_stats(&[sample])),
+            startup_overhead_samples_ms: startup_overhead_ms.map(|sample| vec![sample]),
+            startup_overhead_stats: startup_overhead_ms.map(|sample| compute_stats(&[sample])),
+            phase_samples_ms: BenchmarkScenarioPhases {
+                context_setup_ms: vec![context_setup_ms],
+                startup_ms: vec![startup_ms],
+                guest_execution_ms: guest_execution_ms.map(|sample| vec![sample]),
+                completion_ms: vec![completion_ms],
+            },
+            phase_stats: BenchmarkScenarioPhases {
+                context_setup_ms: compute_stats(&[context_setup_ms]),
+                startup_ms: compute_stats(&[startup_ms]),
+                guest_execution_ms: guest_execution_ms.map(|sample| compute_stats(&[sample])),
+                completion_ms: compute_stats(&[completion_ms]),
+            },
+            resource_usage_stats: compute_resource_usage_stats(&resource_usage_samples),
+            resource_usage_samples: Some(resource_usage_samples),
+        }
+    }
+
+    fn synthetic_host() -> BenchmarkHost {
+        BenchmarkHost {
+            node_binary: String::from("node"),
+            node_version: String::from("v22.0.0"),
+            os: "linux",
+            arch: "x86_64",
+            logical_cpus: 8,
+        }
+    }
+
+    #[test]
+    fn javascript_benchmark_orchestration_resumes_completed_stages_from_run_state() {
+        let tempdir = tempdir().expect("create tempdir");
+        let repo_root = tempdir.path().join("repo");
+        let artifact_dir = tempdir.path().join("artifacts");
+        fs::create_dir_all(&repo_root).expect("create repo root");
+
+        let config = JavascriptBenchmarkConfig {
+            iterations: 1,
+            warmup_iterations: 0,
+        };
+        let host = synthetic_host();
+        let definitions = benchmark_scenarios();
+        let mut state = StoredBenchmarkRunState::new(&config, &host, &repo_root);
+        state.record_transport_rtt(&synthetic_transport_reports());
+        state.record_scenario(&synthetic_scenario_report(definitions[0], 10.0));
+        persist_benchmark_run_state(&benchmark_run_state_path(&artifact_dir), &state)
+            .expect("persist initial run state");
+
+        let transport_calls = RefCell::new(0usize);
+        let scenario_calls = RefCell::new(Vec::new());
+        let (report, resumed_stage_count, _) = orchestrate_javascript_benchmark_report(
+            &config,
+            &repo_root,
+            &host,
+            &artifact_dir,
+            || {
+                *transport_calls.borrow_mut() += 1;
+                Ok(synthetic_transport_reports())
+            },
+            |definition| {
+                scenario_calls.borrow_mut().push(definition.id.to_owned());
+                Ok(synthetic_scenario_report(definition, 20.0))
+            },
+        )
+        .expect("resume benchmark orchestration");
+
+        assert_eq!(resumed_stage_count, 2);
+        assert_eq!(*transport_calls.borrow(), 0);
+        assert_eq!(
+            scenario_calls.borrow().as_slice(),
+            &definitions[1..]
+                .iter()
+                .map(|definition| definition.id.to_owned())
+                .collect::<Vec<_>>()
+        );
+        assert_eq!(
+            report.transport_rtt.len(),
+            TRANSPORT_RTT_PAYLOAD_BYTES.len()
+        );
+        assert_eq!(report.scenarios.len(), definitions.len());
+        assert_eq!(report.scenarios[0].id, definitions[0].id);
+        assert_eq!(report.scenarios[1].id, definitions[1].id);
+    }
+
+    #[test]
+    fn javascript_benchmark_orchestration_persists_completed_stages_before_failure() {
+        let tempdir = tempdir().expect("create tempdir");
+        let repo_root = tempdir.path().join("repo");
+        let artifact_dir = tempdir.path().join("artifacts");
+        fs::create_dir_all(&repo_root).expect("create repo root");
+
+        let config = JavascriptBenchmarkConfig {
+            iterations: 1,
+            warmup_iterations: 0,
+        };
+        let host = synthetic_host();
+        let state_path = benchmark_run_state_path(&artifact_dir);
+        let failure = orchestrate_javascript_benchmark_report(
+            &config,
+            &repo_root,
+            &host,
+            &artifact_dir,
+            || Ok(synthetic_transport_reports()),
+            |definition| {
+                if definition.id == "cold-local-import" {
+                    Err(JavascriptBenchmarkError::InvalidConfig("synthetic failure"))
+                } else {
+                    Ok(synthetic_scenario_report(definition, 15.0))
+                }
+            },
+        )
+        .expect_err("expected synthetic orchestration failure");
+
+        assert!(matches!(
+            failure,
+            JavascriptBenchmarkError::InvalidConfig("synthetic failure")
+        ));
+
+        let stored_state = serde_json::from_str::<StoredBenchmarkRunState>(
+            &fs::read_to_string(&state_path).expect("read persisted run state"),
+        )
+        .expect("parse persisted run state");
+        assert!(stored_state.transport_rtt.is_some());
+        assert_eq!(
+            stored_state
+                .scenarios
+                .iter()
+                .map(|scenario| scenario.id.as_str())
+                .collect::<Vec<_>>(),
+            vec!["isolate-startup", "prewarmed-isolate-startup"]
+        );
+    }
+}
diff --git a/crates/execution/src/bin/node-import-bench.rs b/crates/execution/src/bin/node-import-bench.rs
index f13d785aa..727658f06 100644
--- a/crates/execution/src/bin/node-import-bench.rs
+++ b/crates/execution/src/bin/node-import-bench.rs
@@ -1,10 +1,51 @@
-use agent_os_execution::benchmark::{run_javascript_benchmarks, JavascriptBenchmarkConfig};
+use agent_os_execution::benchmark::{
+    run_javascript_benchmarks_with_recovery, JavascriptBenchmarkConfig,
+};
+use std::path::PathBuf;
+
+struct CliConfig {
+    benchmark: JavascriptBenchmarkConfig,
+    baseline_path: Option<PathBuf>,
+}
 
 fn main() {
     match parse_config(std::env::args().skip(1)) {
-        Ok(config) => match run_javascript_benchmarks(&config) {
-            Ok(report) => {
-                print!("{}", report.render_markdown());
+        Ok(cli_config) => match run_javascript_benchmarks_with_recovery(
+            &cli_config.benchmark,
+            cli_config.baseline_path.as_deref(),
+        ) {
+            Ok(output) => {
+                if output.resumed_stage_count > 0 {
+                    eprintln!(
+                        "Resumed {} completed benchmark stages from {}",
+                        output.resumed_stage_count,
+                        output
+                            .artifact_paths
+                            .json_path
+                            .parent()
+                            .expect("benchmark artifact parent directory")
+                            .join("run-state.json")
+                            .display()
+                    );
+                }
+                if let Some(path) = &cli_config.baseline_path {
+                    eprintln!("Compared against baseline {}", path.display());
+                }
+                eprintln!(
+                    "Wrote Markdown report to {}",
+                    output.artifact_paths.markdown_path.display()
+                );
+                eprintln!(
+                    "Wrote JSON report to {}",
+                    output.artifact_paths.json_path.display()
+                );
+                match std::fs::read_to_string(&output.artifact_paths.markdown_path) {
+                    Ok(markdown) => print!("{markdown}"),
+                    Err(err) => {
+                        eprintln!("failed to read generated markdown report: {err}");
+                        std::process::exit(1);
+                    }
+                }
             }
             Err(err) => {
                 eprintln!("{err}");
@@ -14,16 +55,15 @@ fn main() {
         Err(err) => {
             eprintln!("{err}");
             eprintln!();
-            eprintln!("Usage: cargo run -p agent-os-execution --bin node-import-bench -- [--iterations N] [--warmup-iterations N]");
+            eprintln!("Usage: cargo run -p agent-os-execution --bin node-import-bench -- [--iterations N] [--warmup-iterations N] [--baseline PATH]");
             std::process::exit(2);
         }
     }
 }
 
-fn parse_config(
-    args: impl IntoIterator<Item = String>,
-) -> Result<JavascriptBenchmarkConfig, String> {
-    let mut config = JavascriptBenchmarkConfig::default();
+fn parse_config(args: impl IntoIterator<Item = String>) -> Result<CliConfig, String> {
+    let mut benchmark = JavascriptBenchmarkConfig::default();
+    let mut baseline_path = None;
     let mut args = args.into_iter();
 
     while let Some(arg) = args.next() {
@@ -32,13 +72,19 @@ fn parse_config(
                 let value = args
                     .next()
                     .ok_or_else(|| String::from("missing value for --iterations"))?;
-                config.iterations = parse_usize_flag("--iterations", &value)?;
+                benchmark.iterations = parse_usize_flag("--iterations", &value)?;
             }
             "--warmup-iterations" => {
                 let value = args
                     .next()
                     .ok_or_else(|| String::from("missing value for --warmup-iterations"))?;
-                config.warmup_iterations = parse_usize_flag("--warmup-iterations", &value)?;
+                benchmark.warmup_iterations = parse_usize_flag("--warmup-iterations", &value)?;
+            }
+            "--baseline" => {
+                let value = args
+                    .next()
+                    .ok_or_else(|| String::from("missing value for --baseline"))?;
+                baseline_path = Some(PathBuf::from(value));
             }
             "--help" | "-h" => {
                 return Err(String::from("help requested"));
@@ -49,7 +95,10 @@ fn parse_config(
         }
     }
 
-    Ok(config)
+    Ok(CliConfig {
+        benchmark,
+        baseline_path,
+    })
 }
 
 fn parse_usize_flag(flag: &str, value: &str) -> Result<usize, String> {
diff --git a/crates/execution/src/node_import_cache.rs b/crates/execution/src/node_import_cache.rs
index 2807b040a..c0c2b76a5 100644
--- a/crates/execution/src/node_import_cache.rs
+++ b/crates/execution/src/node_import_cache.rs
@@ -11,7 +11,7 @@ pub(crate) const NODE_IMPORT_CACHE_ASSET_ROOT_ENV: &str = "AGENT_OS_NODE_IMPORT_
 const NODE_IMPORT_CACHE_PATH_ENV: &str = "AGENT_OS_NODE_IMPORT_CACHE_PATH";
 const NODE_IMPORT_CACHE_LOADER_PATH_ENV: &str = "AGENT_OS_NODE_IMPORT_CACHE_LOADER_PATH";
 const NODE_IMPORT_CACHE_SCHEMA_VERSION: &str = "1";
-const NODE_IMPORT_CACHE_LOADER_VERSION: &str = "4";
+const NODE_IMPORT_CACHE_LOADER_VERSION: &str = "5";
 const NODE_IMPORT_CACHE_ASSET_VERSION: &str = "1";
 const AGENT_OS_BUILTIN_SPECIFIER_PREFIX: &str = "agent-os:builtin/";
 const AGENT_OS_POLYFILL_SPECIFIER_PREFIX: &str = "agent-os:polyfill/";
@@ -24,6 +24,9 @@ import { fileURLToPath, pathToFileURL } from 'node:url';
 const GUEST_PATH_MAPPINGS = parseGuestPathMappings(process.env.AGENT_OS_GUEST_PATH_MAPPINGS);
 const ALLOWED_BUILTINS = new Set(parseJsonArray(process.env.AGENT_OS_ALLOWED_NODE_BUILTINS));
 const CACHE_PATH = process.env.__NODE_IMPORT_CACHE_PATH_ENV__;
+const PROJECTED_SOURCE_CACHE_ROOT = CACHE_PATH
+  ? path.join(path.dirname(CACHE_PATH), 'projected-sources')
+  : null;
 const ASSET_ROOT = process.env.__NODE_IMPORT_CACHE_ASSET_ROOT_ENV__;
 const DEBUG_ENABLED = process.env.__NODE_IMPORT_CACHE_DEBUG_ENV__ === '1';
 const METRICS_PREFIX = '__NODE_IMPORT_CACHE_METRICS_PREFIX__';
@@ -60,6 +63,8 @@ const metrics = {
   packageTypeMisses: 0,
   moduleFormatHits: 0,
   moduleFormatMisses: 0,
+  sourceHits: 0,
+  sourceMisses: 0,
 };
 
 export async function resolve(specifier, context, nextResolve) {
@@ -184,6 +189,17 @@ export async function load(url, context, nextLoad) {
     return nextLoad(url, context);
   }
 
+  const projectedPackageSource = loadProjectedPackageSource(url, filePath, format);
+  if (projectedPackageSource != null) {
+    flushCacheState();
+    emitMetrics();
+    return {
+      shortCircuit: true,
+      format,
+      source: projectedPackageSource,
+    };
+  }
+
   const source =
     format === 'wasm'
       ? fs.readFileSync(filePath)
@@ -266,6 +282,7 @@ function emptyCacheState() {
     resolutions: {},
     packageTypes: {},
     moduleFormats: {},
+    projectedSources: {},
   };
 }
 
@@ -286,6 +303,7 @@ function normalizeCacheState(value) {
     resolutions: isRecord(value.resolutions) ? value.resolutions : {},
     packageTypes: isRecord(value.packageTypes) ? value.packageTypes : {},
     moduleFormats: isRecord(value.moduleFormats) ? value.moduleFormats : {},
+    projectedSources: isRecord(value.projectedSources) ? value.projectedSources : {},
   };
 }
 
@@ -304,9 +322,64 @@ function mergeCacheStates(base, current) {
       ...base.moduleFormats,
       ...current.moduleFormats,
     },
+    projectedSources: {
+      ...base.projectedSources,
+      ...current.projectedSources,
+    },
   };
 }
 
+function loadProjectedPackageSource(url, filePath, format) {
+  if (
+    format === 'wasm' ||
+    !isProjectedPackageSource(filePath) ||
+    !PROJECTED_SOURCE_CACHE_ROOT
+  ) {
+    return null;
+  }
+
+  const cached = cacheState.projectedSources[url];
+  if (cached && validateProjectedSourceEntry(cached, filePath, format)) {
+    metrics.sourceHits += 1;
+    return fs.readFileSync(cached.cachedPath, 'utf8');
+  }
+
+  metrics.sourceMisses += 1;
+
+  const stat = statForPath(filePath);
+  if (!stat) {
+    return null;
+  }
+
+  const source = rewriteBuiltinImports(fs.readFileSync(filePath, 'utf8'), filePath);
+  const cacheKey = hashString(
+    JSON.stringify({
+      url,
+      format,
+      size: stat.size,
+      mtimeMs: stat.mtimeMs,
+    }),
+  );
+  const extension = path.extname(filePath) || '.js';
+  const cachedPath = path.join(
+    PROJECTED_SOURCE_CACHE_ROOT,
+    `${cacheKey}${extension}.cached`,
+  );
+  fs.mkdirSync(path.dirname(cachedPath), { recursive: true });
+  fs.writeFileSync(cachedPath, source);
+
+  cacheState.projectedSources[url] = {
+    kind: 'text',
+    filePath,
+    format,
+    cachedPath,
+    size: stat.size,
+    mtimeMs: stat.mtimeMs,
+  };
+  dirty = true;
+  return source;
+}
+
 function resolveAgentOsAsset(specifier) {
   if (typeof specifier !== 'string' || !ASSET_ROOT) {
     return null;
@@ -530,6 +603,15 @@ function buildResolutionEntry(specifier, context, resolved) {
   return null;
 }
 
+function isProjectedPackageSource(filePath) {
+  if (typeof filePath !== 'string' || isAssetPath(filePath)) {
+    return false;
+  }
+
+  const guestPath = guestPathFromHostPath(filePath);
+  return typeof guestPath === 'string' && guestPath.includes('/node_modules/');
+}
+
 function validateResolutionEntry(entry) {
   if (!isRecord(entry) || typeof entry.kind !== 'string') {
     return false;
@@ -687,6 +769,29 @@ function validateModuleFormatEntry(entry) {
   return true;
 }
 
+function validateProjectedSourceEntry(entry, filePath, format) {
+  if (
+    !isRecord(entry) ||
+    entry.kind !== 'text' ||
+    typeof entry.filePath !== 'string' ||
+    typeof entry.cachedPath !== 'string' ||
+    typeof entry.format !== 'string'
+  ) {
+    return false;
+  }
+
+  if (entry.filePath !== filePath || entry.format !== format) {
+    return false;
+  }
+
+  const stat = statForPath(filePath);
+  if (!stat || stat.size !== entry.size || stat.mtimeMs !== entry.mtimeMs) {
+    return false;
+  }
+
+  return statForPath(entry.cachedPath)?.isFile() ?? false;
+}
+
 function lookupPackageType(filePath) {
   let directory = path.dirname(filePath);
 
diff --git a/crates/execution/tests/benchmark.rs b/crates/execution/tests/benchmark.rs
index d22c616ae..e7525afd5 100644
--- a/crates/execution/tests/benchmark.rs
+++ b/crates/execution/tests/benchmark.rs
@@ -1,4 +1,117 @@
-use agent_os_execution::benchmark::{run_javascript_benchmarks, JavascriptBenchmarkConfig};
+use agent_os_execution::benchmark::{
+    run_javascript_benchmarks, BenchmarkDistributionStats, BenchmarkHost, BenchmarkResourceUsage,
+    BenchmarkScenarioPhases, BenchmarkScenarioReport, BenchmarkStats, BenchmarkTransportRttReport,
+    JavascriptBenchmarkConfig, JavascriptBenchmarkReport,
+};
+use serde_json::Value;
+use std::fs;
+use std::path::PathBuf;
+use tempfile::tempdir;
+
+fn stats(
+    mean_ms: f64,
+    p50_ms: f64,
+    p95_ms: f64,
+    min_ms: f64,
+    max_ms: f64,
+    stddev_ms: f64,
+) -> BenchmarkStats {
+    BenchmarkStats {
+        mean_ms,
+        p50_ms,
+        p95_ms,
+        min_ms,
+        max_ms,
+        stddev_ms,
+    }
+}
+
+fn phase_samples(
+    context_setup_ms: Vec<f64>,
+    startup_ms: Vec<f64>,
+    guest_execution_ms: Option<Vec<f64>>,
+    completion_ms: Vec<f64>,
+) -> BenchmarkScenarioPhases<Vec<f64>> {
+    BenchmarkScenarioPhases {
+        context_setup_ms,
+        startup_ms,
+        guest_execution_ms,
+        completion_ms,
+    }
+}
+
+fn phase_stats(
+    context_setup_ms: BenchmarkStats,
+    startup_ms: BenchmarkStats,
+    guest_execution_ms: Option<BenchmarkStats>,
+    completion_ms: BenchmarkStats,
+) -> BenchmarkScenarioPhases<BenchmarkStats> {
+    BenchmarkScenarioPhases {
+        context_setup_ms,
+        startup_ms,
+        guest_execution_ms,
+        completion_ms,
+    }
+}
+
+fn transport_rtt(
+    payload_bytes: usize,
+    samples_ms: Vec<f64>,
+    stats: BenchmarkStats,
+) -> BenchmarkTransportRttReport {
+    BenchmarkTransportRttReport {
+        channel: "execution-stdio-echo",
+        payload_bytes,
+        samples_ms,
+        stats,
+    }
+}
+
+fn distribution_stats(
+    mean: f64,
+    p50: f64,
+    p95: f64,
+    min: f64,
+    max: f64,
+    stddev: f64,
+) -> BenchmarkDistributionStats {
+    BenchmarkDistributionStats {
+        mean,
+        p50,
+        p95,
+        min,
+        max,
+        stddev,
+    }
+}
+
+fn resource_samples(
+    rss_bytes: Option<Vec<f64>>,
+    heap_used_bytes: Option<Vec<f64>>,
+    cpu_total_us: Option<Vec<f64>>,
+) -> BenchmarkResourceUsage<Vec<f64>> {
+    BenchmarkResourceUsage {
+        rss_bytes,
+        heap_used_bytes,
+        cpu_user_us: None,
+        cpu_system_us: None,
+        cpu_total_us,
+    }
+}
+
+fn resource_stats(
+    rss_bytes: Option<BenchmarkDistributionStats>,
+    heap_used_bytes: Option<BenchmarkDistributionStats>,
+    cpu_total_us: Option<BenchmarkDistributionStats>,
+) -> BenchmarkResourceUsage<BenchmarkDistributionStats> {
+    BenchmarkResourceUsage {
+        rss_bytes,
+        heap_used_bytes,
+        cpu_user_us: None,
+        cpu_system_us: None,
+        cpu_total_us,
+    }
+}
 
 #[test]
 fn javascript_benchmark_harness_covers_required_startup_and_import_scenarios() {
@@ -17,10 +130,26 @@ fn javascript_benchmark_harness_covers_required_startup_and_import_scenarios() {
         scenario_ids,
         vec![
             "isolate-startup",
+            "prewarmed-isolate-startup",
             "cold-local-import",
             "warm-local-import",
+            "same-context-local-import",
+            "prewarmed-local-import",
+            "host-local-import",
             "builtin-import",
+            "hot-builtin-stream-import",
+            "hot-builtin-stream-web-import",
+            "hot-builtin-crypto-import",
+            "hot-builtin-zlib-import",
+            "hot-builtin-assert-import",
+            "hot-builtin-url-import",
+            "hot-projected-package-file-import",
             "large-package-import",
+            "projected-package-import",
+            "pdf-lib-startup",
+            "jszip-startup",
+            "jszip-end-to-end",
+            "jszip-repeated-session-compressed",
         ]
     );
 
@@ -49,11 +178,954 @@ fn javascript_benchmark_harness_covers_required_startup_and_import_scenarios() {
             .len(),
         1
     );
+    assert_eq!(warm.workload, "local-import");
+    assert_eq!(warm.runtime, "native-execution");
+    assert_eq!(warm.mode, "new-session-replay");
+
+    let same_context = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "same-context-local-import")
+        .expect("same-context-local-import scenario");
+    assert_eq!(same_context.compile_cache, "primed");
+    assert_eq!(same_context.workload, "local-import");
+    assert_eq!(same_context.runtime, "native-execution");
+    assert_eq!(same_context.mode, "same-session-replay");
+    assert_eq!(same_context.wall_samples_ms.len(), 1);
+
+    let prewarmed = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "prewarmed-local-import")
+        .expect("prewarmed-local-import scenario");
+    assert_eq!(prewarmed.compile_cache, "primed");
+    assert_eq!(
+        prewarmed
+            .guest_import_samples_ms
+            .as_ref()
+            .expect("prewarmed import samples")
+            .len(),
+        1
+    );
+    assert_eq!(
+        prewarmed
+            .startup_overhead_samples_ms
+            .as_ref()
+            .expect("prewarmed startup samples")
+            .len(),
+        1
+    );
+    assert_eq!(prewarmed.mode, "same-engine-replay");
+
+    let host = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "host-local-import")
+        .expect("host-local-import scenario");
+    assert_eq!(host.workload, "local-import");
+    assert_eq!(host.runtime, "host-node");
+    assert_eq!(host.mode, "host-control");
+    assert_eq!(
+        host.guest_import_samples_ms
+            .as_ref()
+            .expect("host import samples")
+            .len(),
+        1
+    );
+
+    let prewarmed_isolate = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "prewarmed-isolate-startup")
+        .expect("prewarmed-isolate-startup scenario");
+    assert_eq!(prewarmed_isolate.workload, "startup-floor");
+    assert_eq!(prewarmed_isolate.mode, "same-engine-replay");
+    assert_eq!(prewarmed_isolate.compile_cache, "primed");
+    assert!(prewarmed_isolate.guest_import_samples_ms.is_none());
+
+    let hot_builtin = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "hot-builtin-crypto-import")
+        .expect("hot-builtin-crypto-import scenario");
+    assert_eq!(hot_builtin.workload, "builtin-hot-import");
+    assert_eq!(hot_builtin.mode, "same-engine-replay");
+    assert_eq!(hot_builtin.compile_cache, "primed");
+    assert_eq!(
+        hot_builtin
+            .guest_import_samples_ms
+            .as_ref()
+            .expect("hot builtin import samples")
+            .len(),
+        1
+    );
+
+    let hot_projected = report
+        .scenarios
+        .iter()
+        .find(|scenario| scenario.id == "hot-projected-package-file-import")
+        .expect("hot-projected-package-file-import scenario");
+    assert_eq!(hot_projected.workload, "projected-package-hot-import");
+    assert_eq!(hot_projected.mode, "same-engine-replay");
+    assert_eq!(hot_projected.compile_cache, "primed");
+    assert_eq!(
+        hot_projected
+            .guest_import_samples_ms
+            .as_ref()
+            .expect("hot projected import samples")
+            .len(),
+        1
+    );
 
     let rendered = report.render_markdown();
     assert!(rendered.contains("ARC-021C"));
     assert!(rendered.contains("ARC-021D"));
     assert!(rendered.contains("ARC-022"));
+    assert!(rendered.contains("current import-cache materialization and builtin/polyfill prewarm"));
     assert!(rendered.contains("typescript"));
+    assert!(rendered.contains("projected TypeScript guest-path import"));
+    assert!(rendered.contains("projected-package-import"));
+    assert!(rendered.contains("pdf-lib document creation"));
+    assert!(rendered.contains("jszip archive staging"));
+    assert!(rendered.contains("jszip end-to-end archive roundtrip"));
+    assert!(rendered.contains("jszip compressed archive roundtrip"));
+    assert!(rendered.contains("prewarmed-isolate-startup"));
+    assert!(rendered.contains("prewarmed-local-import"));
+    assert!(rendered.contains("same-context-local-import"));
+    assert!(rendered.contains("host-local-import"));
     assert!(rendered.contains("node:path + node:url + node:fs/promises"));
+    assert!(rendered.contains("node:stream/web"));
+    assert!(rendered.contains("node:crypto"));
+    assert!(rendered.contains("projected TypeScript compiler file"));
+    assert!(rendered.contains("hot-projected-package-file-import"));
+    assert!(rendered.contains("## Transport RTT"));
+    assert!(rendered.contains("## Control Matrix"));
+    assert!(rendered.contains("## Ranked Hotspots"));
+    assert!(rendered.contains("### Wall Time (`time`, `ms`)"));
+    assert!(rendered.contains("### Startup Share Of Wall (`share`, `pct`)"));
+    assert!(rendered.contains("Mean context (ms)"));
+    assert!(rendered.contains("same-session-replay"));
+    assert!(rendered.contains("host-control"));
+
+    let json = report.render_json().expect("render benchmark json");
+    let parsed: Value = serde_json::from_str(&json).expect("parse benchmark json");
+    assert_eq!(parsed["artifact_version"], 5);
+    assert_eq!(parsed["summary"]["scenario_count"], 21);
+    assert_eq!(parsed["summary"]["recorded_samples_per_scenario"], 1);
+    assert_eq!(
+        parsed["transport_rtt"]
+            .as_array()
+            .expect("transport rtt array")
+            .len(),
+        3
+    );
+    let scenarios = parsed["scenarios"]
+        .as_array()
+        .expect("json scenarios array");
+    assert_eq!(scenarios.len(), 21);
+    assert!(
+        parsed["summary"]["slowest_wall_scenario"]["id"].is_string(),
+        "expected a summarized slowest wall scenario: {json}"
+    );
+    let startup_floor_matrix = parsed["summary"]["control_matrix"]
+        .as_array()
+        .expect("control matrix array")
+        .iter()
+        .find(|row| row["workload"] == "startup-floor")
+        .expect("startup-floor control matrix row");
+    assert_eq!(
+        startup_floor_matrix["modes"].as_array().map(Vec::len),
+        Some(2)
+    );
+    let local_import_matrix = parsed["summary"]["control_matrix"]
+        .as_array()
+        .expect("control matrix array")
+        .iter()
+        .find(|row| row["workload"] == "local-import")
+        .expect("local-import control matrix row");
+    assert_eq!(
+        local_import_matrix["modes"].as_array().map(Vec::len),
+        Some(5)
+    );
+    assert_eq!(
+        local_import_matrix["runtimes"].as_array().map(Vec::len),
+        Some(2)
+    );
+    let builtin_hot_matrix = parsed["summary"]["control_matrix"]
+        .as_array()
+        .expect("control matrix array")
+        .iter()
+        .find(|row| row["workload"] == "builtin-hot-import")
+        .expect("builtin-hot-import control matrix row");
+    assert_eq!(
+        builtin_hot_matrix["scenario_ids"].as_array().map(Vec::len),
+        Some(6)
+    );
+    let hotspot_rankings = parsed["summary"]["hotspot_rankings"]
+        .as_array()
+        .expect("hotspot rankings array");
+    assert_eq!(hotspot_rankings.len(), 13);
+    assert_eq!(hotspot_rankings[0]["metric"], "wall_mean_ms");
+    assert_eq!(hotspot_rankings[1]["metric"], "wall_stddev_ms");
+    assert_eq!(hotspot_rankings[1]["dimension"], "stability");
+    assert_eq!(hotspot_rankings[0]["unit"], "ms");
+    assert!(scenarios
+        .iter()
+        .all(|scenario| scenario["wall_stats"]["stddev_ms"].is_number()));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "prewarmed-isolate-startup"
+            && scenario["workload"] == "startup-floor"
+            && scenario["mode"] == "same-engine-replay"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"].is_null()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "same-context-local-import"
+            && scenario["workload"] == "local-import"
+            && scenario["runtime"] == "native-execution"
+            && scenario["mode"] == "same-session-replay"
+            && scenario["compile_cache"] == "primed"
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "host-local-import"
+            && scenario["workload"] == "local-import"
+            && scenario["runtime"] == "host-node"
+            && scenario["mode"] == "host-control"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "hot-builtin-stream-web-import"
+            && scenario["fixture"] == "node:stream/web"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "hot-builtin-crypto-import"
+            && scenario["fixture"] == "node:crypto"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "hot-projected-package-file-import"
+            && scenario["fixture"] == "projected TypeScript compiler file"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "pdf-lib-startup"
+            && scenario["fixture"] == "pdf-lib document creation"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "large-package-import"
+            && scenario["fixture"] == "typescript"
+            && scenario["compile_cache"] == "disabled"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "jszip-startup"
+            && scenario["fixture"] == "jszip archive staging"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "jszip-end-to-end"
+            && scenario["fixture"] == "jszip end-to-end archive roundtrip"
+            && scenario["compile_cache"] == "disabled"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "jszip-repeated-session-compressed"
+            && scenario["fixture"] == "jszip compressed archive roundtrip"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "prewarmed-local-import"
+            && scenario["fixture"] == "24-module local ESM graph"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["id"] == "projected-package-import"
+            && scenario["fixture"] == "projected TypeScript guest-path import"
+            && scenario["compile_cache"] == "primed"
+            && scenario["guest_import_stats"]["mean_ms"].is_number()
+    }));
+    assert!(scenarios.iter().any(|scenario| {
+        scenario["guest_import_samples_ms"].is_array()
+            && scenario["startup_overhead_samples_ms"].is_array()
+            && scenario["mean_startup_share_pct"].is_number()
+            && scenario["phase_stats"]["startup_ms"]["mean_ms"].is_number()
+            && scenario["phase_samples_ms"]["completion_ms"].is_array()
+            && scenario["resource_usage_stats"]["rss_bytes"]["mean"].is_number()
+            && scenario["resource_usage_stats"]["cpu_total_us"]["mean"].is_number()
+            && scenario["resource_usage_samples"]["heap_used_bytes"].is_array()
+    }));
+}
+
+#[test]
+fn javascript_benchmark_json_artifact_stays_stable_for_summary_and_samples() {
+    let report = JavascriptBenchmarkReport {
+        generated_at_unix_ms: 42,
+        config: JavascriptBenchmarkConfig {
+            iterations: 2,
+            warmup_iterations: 1,
+        },
+        host: BenchmarkHost {
+            node_binary: String::from("node"),
+            node_version: String::from("v22.0.0"),
+            os: "linux",
+            arch: "x86_64",
+            logical_cpus: 8,
+        },
+        repo_root: PathBuf::from("/repo"),
+        transport_rtt: vec![
+            transport_rtt(32, vec![0.4, 0.6], stats(0.5, 0.4, 0.6, 0.4, 0.6, 0.1)),
+            transport_rtt(4096, vec![0.9, 1.1], stats(1.0, 0.9, 1.1, 0.9, 1.1, 0.1)),
+            transport_rtt(65536, vec![2.6, 3.0], stats(2.8, 2.6, 3.0, 2.6, 3.0, 0.2)),
+        ],
+        scenarios: vec![
+            BenchmarkScenarioReport {
+                id: "fast-scenario",
+                workload: "fixture-a",
+                runtime: "native-execution",
+                mode: "true-cold-start",
+                description: "Faster benchmark path",
+                fixture: "fixture-a",
+                compile_cache: "disabled",
+                wall_samples_ms: vec![10.0, 14.0],
+                wall_stats: stats(12.0, 10.0, 14.0, 10.0, 14.0, 2.0),
+                guest_import_samples_ms: Some(vec![4.0, 6.0]),
+                guest_import_stats: Some(stats(5.0, 4.0, 6.0, 4.0, 6.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![6.0, 8.0]),
+                startup_overhead_stats: Some(stats(7.0, 6.0, 8.0, 6.0, 8.0, 1.0)),
+                phase_samples_ms: phase_samples(
+                    vec![1.0, 2.0],
+                    vec![2.0, 3.0],
+                    Some(vec![4.0, 6.0]),
+                    vec![3.0, 3.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(1.5, 1.0, 2.0, 1.0, 2.0, 0.5),
+                    stats(2.5, 2.0, 3.0, 2.0, 3.0, 0.5),
+                    Some(stats(5.0, 4.0, 6.0, 4.0, 6.0, 1.0)),
+                    stats(3.0, 3.0, 3.0, 3.0, 3.0, 0.0),
+                ),
+                resource_usage_samples: Some(resource_samples(
+                    Some(vec![32.0 * 1024.0 * 1024.0, 36.0 * 1024.0 * 1024.0]),
+                    Some(vec![8.0 * 1024.0 * 1024.0, 10.0 * 1024.0 * 1024.0]),
+                    Some(vec![4000.0, 6000.0]),
+                )),
+                resource_usage_stats: Some(resource_stats(
+                    Some(distribution_stats(
+                        34.0 * 1024.0 * 1024.0,
+                        32.0 * 1024.0 * 1024.0,
+                        36.0 * 1024.0 * 1024.0,
+                        32.0 * 1024.0 * 1024.0,
+                        36.0 * 1024.0 * 1024.0,
+                        2.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        9.0 * 1024.0 * 1024.0,
+                        8.0 * 1024.0 * 1024.0,
+                        10.0 * 1024.0 * 1024.0,
+                        8.0 * 1024.0 * 1024.0,
+                        10.0 * 1024.0 * 1024.0,
+                        1.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        5000.0, 4000.0, 6000.0, 4000.0, 6000.0, 1000.0,
+                    )),
+                )),
+            },
+            BenchmarkScenarioReport {
+                id: "slow-scenario",
+                workload: "fixture-b",
+                runtime: "host-node",
+                mode: "host-control",
+                description: "Slower benchmark path",
+                fixture: "fixture-b",
+                compile_cache: "primed",
+                wall_samples_ms: vec![30.0, 34.0],
+                wall_stats: stats(32.0, 30.0, 34.0, 30.0, 34.0, 2.0),
+                guest_import_samples_ms: Some(vec![12.0, 14.0]),
+                guest_import_stats: Some(stats(13.0, 12.0, 14.0, 12.0, 14.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![18.0, 20.0]),
+                startup_overhead_stats: Some(stats(19.0, 18.0, 20.0, 18.0, 20.0, 1.0)),
+                phase_samples_ms: phase_samples(
+                    vec![4.0, 4.0],
+                    vec![5.0, 6.0],
+                    Some(vec![12.0, 14.0]),
+                    vec![9.0, 10.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(4.0, 4.0, 4.0, 4.0, 4.0, 0.0),
+                    stats(5.5, 5.0, 6.0, 5.0, 6.0, 0.5),
+                    Some(stats(13.0, 12.0, 14.0, 12.0, 14.0, 1.0)),
+                    stats(9.5, 9.0, 10.0, 9.0, 10.0, 0.5),
+                ),
+                resource_usage_samples: Some(resource_samples(
+                    Some(vec![64.0 * 1024.0 * 1024.0, 72.0 * 1024.0 * 1024.0]),
+                    Some(vec![14.0 * 1024.0 * 1024.0, 18.0 * 1024.0 * 1024.0]),
+                    Some(vec![9000.0, 11000.0]),
+                )),
+                resource_usage_stats: Some(resource_stats(
+                    Some(distribution_stats(
+                        68.0 * 1024.0 * 1024.0,
+                        64.0 * 1024.0 * 1024.0,
+                        72.0 * 1024.0 * 1024.0,
+                        64.0 * 1024.0 * 1024.0,
+                        72.0 * 1024.0 * 1024.0,
+                        4.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        16.0 * 1024.0 * 1024.0,
+                        14.0 * 1024.0 * 1024.0,
+                        18.0 * 1024.0 * 1024.0,
+                        14.0 * 1024.0 * 1024.0,
+                        18.0 * 1024.0 * 1024.0,
+                        2.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        10000.0, 9000.0, 11000.0, 9000.0, 11000.0, 1000.0,
+                    )),
+                )),
+            },
+        ],
+    };
+
+    let json = report.render_json().expect("render json");
+    let parsed: Value = serde_json::from_str(&json).expect("parse json");
+
+    assert_eq!(parsed["artifact_version"], 5);
+    assert_eq!(parsed["generated_at_unix_ms"], 42);
+    assert_eq!(
+        parsed["command"].as_str(),
+        Some(
+            "cargo run -p agent-os-execution --bin node-import-bench -- --iterations 2 --warmup-iterations 1"
+        )
+    );
+    assert_eq!(parsed["summary"]["scenario_count"], 2);
+    assert_eq!(parsed["summary"]["recorded_samples_per_scenario"], 2);
+    assert_eq!(
+        parsed["summary"]["control_matrix"][0]["workload"].as_str(),
+        Some("fixture-a")
+    );
+    assert_eq!(
+        parsed["summary"]["control_matrix"][1]["runtimes"][0].as_str(),
+        Some("host-node")
+    );
+    assert_eq!(
+        parsed["transport_rtt"][2]["payload_bytes"].as_u64(),
+        Some(65536)
+    );
+    assert_eq!(parsed["transport_rtt"][2]["stats"]["mean_ms"], 2.8);
+    assert_eq!(
+        parsed["summary"]["slowest_wall_scenario"]["id"].as_str(),
+        Some("slow-scenario")
+    );
+    assert_eq!(
+        parsed["summary"]["slowest_guest_import_scenario"]["id"].as_str(),
+        Some("slow-scenario")
+    );
+    assert_eq!(
+        parsed["summary"]["highest_startup_share_scenario"]["id"].as_str(),
+        Some("slow-scenario")
+    );
+    let hotspot_rankings = parsed["summary"]["hotspot_rankings"]
+        .as_array()
+        .expect("hotspot rankings array");
+    assert_eq!(hotspot_rankings.len(), 13);
+    assert_eq!(hotspot_rankings[0]["metric"], "wall_mean_ms");
+    assert_eq!(hotspot_rankings[0]["label"], "Wall Time");
+    assert_eq!(
+        hotspot_rankings[0]["ranked_scenarios"][0]["id"].as_str(),
+        Some("slow-scenario")
+    );
+    assert_eq!(hotspot_rankings[0]["ranked_scenarios"][0]["rank"], 1);
+    assert_eq!(hotspot_rankings[3]["metric"], "guest_import_mean_ms");
+    assert_eq!(
+        hotspot_rankings[3]["ranked_scenarios"][0]["value"].as_f64(),
+        Some(13.0)
+    );
+    assert_eq!(hotspot_rankings[9]["metric"], "startup_share_pct");
+    assert_eq!(hotspot_rankings[9]["unit"], "pct");
+    assert_eq!(hotspot_rankings[10]["metric"], "rss_mean_mib");
+    assert_eq!(hotspot_rankings[12]["metric"], "cpu_total_mean_ms");
+
+    let scenarios = parsed["scenarios"].as_array().expect("scenario array");
+    assert_eq!(scenarios.len(), 2);
+    assert_eq!(scenarios[0]["workload"], "fixture-a");
+    assert_eq!(scenarios[0]["runtime"], "native-execution");
+    assert_eq!(scenarios[0]["mode"], "true-cold-start");
+    assert_eq!(scenarios[0]["wall_stats"]["stddev_ms"], 2.0);
+    assert_eq!(scenarios[0]["mean_startup_share_pct"], 58.333333333333336);
+    assert_eq!(
+        scenarios[0]["resource_usage_stats"]["rss_bytes"]["mean"],
+        35651584.0
+    );
+    assert_eq!(
+        scenarios[0]["resource_usage_stats"]["cpu_total_us"]["mean"],
+        5000.0
+    );
+    assert_eq!(
+        scenarios[0]["phase_stats"]["context_setup_ms"]["mean_ms"],
+        1.5
+    );
+    assert_eq!(scenarios[0]["phase_stats"]["completion_ms"]["mean_ms"], 3.0);
+    assert_eq!(scenarios[1]["mean_startup_share_pct"], 59.375);
+    assert_eq!(scenarios[1]["phase_stats"]["startup_ms"]["mean_ms"], 5.5);
+    assert_eq!(
+        scenarios[1]["resource_usage_stats"]["heap_used_bytes"]["mean"],
+        16777216.0
+    );
+}
+
+#[test]
+fn javascript_benchmark_hotspot_rankings_handle_missing_metrics() {
+    let report = JavascriptBenchmarkReport {
+        generated_at_unix_ms: 42,
+        config: JavascriptBenchmarkConfig {
+            iterations: 2,
+            warmup_iterations: 1,
+        },
+        host: BenchmarkHost {
+            node_binary: String::from("node"),
+            node_version: String::from("v22.0.0"),
+            os: "linux",
+            arch: "x86_64",
+            logical_cpus: 8,
+        },
+        repo_root: PathBuf::from("/repo"),
+        transport_rtt: vec![],
+        scenarios: vec![
+            BenchmarkScenarioReport {
+                id: "alpha",
+                workload: "fixture-a",
+                runtime: "native-execution",
+                mode: "true-cold-start",
+                description: "Alpha path",
+                fixture: "fixture-a",
+                compile_cache: "disabled",
+                wall_samples_ms: vec![15.0, 17.0],
+                wall_stats: stats(16.0, 15.0, 17.0, 15.0, 17.0, 1.0),
+                guest_import_samples_ms: Some(vec![7.0, 9.0]),
+                guest_import_stats: Some(stats(8.0, 7.0, 9.0, 7.0, 9.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![8.0, 8.0]),
+                startup_overhead_stats: Some(stats(8.0, 8.0, 8.0, 8.0, 8.0, 0.0)),
+                phase_samples_ms: phase_samples(
+                    vec![2.0, 2.0],
+                    vec![3.0, 3.0],
+                    Some(vec![7.0, 9.0]),
+                    vec![3.0, 3.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(2.0, 2.0, 2.0, 2.0, 2.0, 0.0),
+                    stats(3.0, 3.0, 3.0, 3.0, 3.0, 0.0),
+                    Some(stats(8.0, 7.0, 9.0, 7.0, 9.0, 1.0)),
+                    stats(3.0, 3.0, 3.0, 3.0, 3.0, 0.0),
+                ),
+                resource_usage_samples: Some(resource_samples(
+                    Some(vec![40.0 * 1024.0 * 1024.0, 44.0 * 1024.0 * 1024.0]),
+                    None,
+                    Some(vec![6000.0, 8000.0]),
+                )),
+                resource_usage_stats: Some(resource_stats(
+                    Some(distribution_stats(
+                        42.0 * 1024.0 * 1024.0,
+                        40.0 * 1024.0 * 1024.0,
+                        44.0 * 1024.0 * 1024.0,
+                        40.0 * 1024.0 * 1024.0,
+                        44.0 * 1024.0 * 1024.0,
+                        2.0 * 1024.0 * 1024.0,
+                    )),
+                    None,
+                    Some(distribution_stats(
+                        7000.0, 6000.0, 8000.0, 6000.0, 8000.0, 1000.0,
+                    )),
+                )),
+            },
+            BenchmarkScenarioReport {
+                id: "beta",
+                workload: "fixture-b",
+                runtime: "host-node",
+                mode: "host-control",
+                description: "Beta path",
+                fixture: "fixture-b",
+                compile_cache: "primed",
+                wall_samples_ms: vec![20.0, 24.0],
+                wall_stats: stats(22.0, 20.0, 24.0, 20.0, 24.0, 2.0),
+                guest_import_samples_ms: Some(vec![10.0, 12.0]),
+                guest_import_stats: Some(stats(11.0, 10.0, 12.0, 10.0, 12.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![9.0, 11.0]),
+                startup_overhead_stats: Some(stats(10.0, 9.0, 11.0, 9.0, 11.0, 1.0)),
+                phase_samples_ms: phase_samples(
+                    vec![3.0, 3.0],
+                    vec![4.0, 4.0],
+                    Some(vec![10.0, 12.0]),
+                    vec![5.0, 5.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(3.0, 3.0, 3.0, 3.0, 3.0, 0.0),
+                    stats(4.0, 4.0, 4.0, 4.0, 4.0, 0.0),
+                    Some(stats(11.0, 10.0, 12.0, 10.0, 12.0, 1.0)),
+                    stats(5.0, 5.0, 5.0, 5.0, 5.0, 0.0),
+                ),
+                resource_usage_samples: Some(resource_samples(
+                    Some(vec![60.0 * 1024.0 * 1024.0, 68.0 * 1024.0 * 1024.0]),
+                    Some(vec![12.0 * 1024.0 * 1024.0, 14.0 * 1024.0 * 1024.0]),
+                    Some(vec![9000.0, 12000.0]),
+                )),
+                resource_usage_stats: Some(resource_stats(
+                    Some(distribution_stats(
+                        64.0 * 1024.0 * 1024.0,
+                        60.0 * 1024.0 * 1024.0,
+                        68.0 * 1024.0 * 1024.0,
+                        60.0 * 1024.0 * 1024.0,
+                        68.0 * 1024.0 * 1024.0,
+                        4.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        13.0 * 1024.0 * 1024.0,
+                        12.0 * 1024.0 * 1024.0,
+                        14.0 * 1024.0 * 1024.0,
+                        12.0 * 1024.0 * 1024.0,
+                        14.0 * 1024.0 * 1024.0,
+                        1.0 * 1024.0 * 1024.0,
+                    )),
+                    Some(distribution_stats(
+                        10500.0, 9000.0, 12000.0, 9000.0, 12000.0, 1500.0,
+                    )),
+                )),
+            },
+            BenchmarkScenarioReport {
+                id: "gamma",
+                workload: "fixture-c",
+                runtime: "native-execution",
+                mode: "baseline-control",
+                description: "Gamma path",
+                fixture: "fixture-c",
+                compile_cache: "disabled",
+                wall_samples_ms: vec![12.0, 14.0],
+                wall_stats: stats(13.0, 12.0, 14.0, 12.0, 14.0, 1.0),
+                guest_import_samples_ms: None,
+                guest_import_stats: None,
+                startup_overhead_samples_ms: None,
+                startup_overhead_stats: None,
+                phase_samples_ms: phase_samples(
+                    vec![1.0, 1.0],
+                    vec![2.0, 2.0],
+                    None,
+                    vec![4.0, 4.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(1.0, 1.0, 1.0, 1.0, 1.0, 0.0),
+                    stats(2.0, 2.0, 2.0, 2.0, 2.0, 0.0),
+                    None,
+                    stats(4.0, 4.0, 4.0, 4.0, 4.0, 0.0),
+                ),
+                resource_usage_samples: Some(resource_samples(
+                    Some(vec![24.0 * 1024.0 * 1024.0, 28.0 * 1024.0 * 1024.0]),
+                    None,
+                    None,
+                )),
+                resource_usage_stats: Some(resource_stats(
+                    Some(distribution_stats(
+                        26.0 * 1024.0 * 1024.0,
+                        24.0 * 1024.0 * 1024.0,
+                        28.0 * 1024.0 * 1024.0,
+                        24.0 * 1024.0 * 1024.0,
+                        28.0 * 1024.0 * 1024.0,
+                        2.0 * 1024.0 * 1024.0,
+                    )),
+                    None,
+                    None,
+                )),
+            },
+        ],
+    };
+
+    let json = report.render_json().expect("render json");
+    let parsed: Value = serde_json::from_str(&json).expect("parse json");
+    let hotspot_rankings = parsed["summary"]["hotspot_rankings"]
+        .as_array()
+        .expect("hotspot rankings array");
+    let wall_ranking = hotspot_rankings
+        .iter()
+        .find(|ranking| ranking["metric"] == "wall_mean_ms")
+        .expect("wall ranking");
+    assert_eq!(wall_ranking["ranked_scenarios"][0]["id"], "beta");
+    assert_eq!(wall_ranking["ranked_scenarios"][1]["id"], "alpha");
+    assert_eq!(wall_ranking["ranked_scenarios"][2]["id"], "gamma");
+
+    let guest_execution_ranking = hotspot_rankings
+        .iter()
+        .find(|ranking| ranking["metric"] == "guest_execution_mean_ms")
+        .expect("guest execution ranking");
+    assert_eq!(guest_execution_ranking["ranked_scenarios"][0]["id"], "beta");
+    assert_eq!(
+        guest_execution_ranking["ranked_scenarios"][1]["id"],
+        "alpha"
+    );
+    assert_eq!(
+        guest_execution_ranking["scenarios_without_metric"][0].as_str(),
+        Some("gamma")
+    );
+    let rss_ranking = hotspot_rankings
+        .iter()
+        .find(|ranking| ranking["metric"] == "rss_mean_mib")
+        .expect("rss ranking");
+    assert_eq!(rss_ranking["ranked_scenarios"][0]["id"], "beta");
+    let cpu_ranking = hotspot_rankings
+        .iter()
+        .find(|ranking| ranking["metric"] == "cpu_total_mean_ms")
+        .expect("cpu ranking");
+    assert_eq!(cpu_ranking["scenarios_without_metric"][0], "gamma");
+
+    let markdown = report.render_markdown();
+    assert!(markdown.contains("## Ranked Hotspots"));
+    assert!(markdown.contains("## Stability And Resource Summary"));
+    assert!(markdown.contains("### Guest Execution Phase (`time`, `ms`)"));
+    assert!(markdown.contains("### RSS (`memory`, `MiB`)"));
+    assert!(markdown.contains("Missing metric for: `gamma`"));
+}
+
+#[test]
+fn javascript_benchmark_comparison_artifact_stays_stable_for_deltas() {
+    let report = JavascriptBenchmarkReport {
+        generated_at_unix_ms: 42,
+        config: JavascriptBenchmarkConfig {
+            iterations: 2,
+            warmup_iterations: 1,
+        },
+        host: BenchmarkHost {
+            node_binary: String::from("node"),
+            node_version: String::from("v22.0.0"),
+            os: "linux",
+            arch: "x86_64",
+            logical_cpus: 8,
+        },
+        repo_root: PathBuf::from("/repo"),
+        transport_rtt: vec![
+            transport_rtt(32, vec![0.4, 0.6], stats(0.5, 0.4, 0.6, 0.4, 0.6, 0.1)),
+            transport_rtt(4096, vec![0.9, 1.1], stats(1.0, 0.9, 1.1, 0.9, 1.1, 0.1)),
+            transport_rtt(65536, vec![2.6, 3.0], stats(2.8, 2.6, 3.0, 2.6, 3.0, 0.2)),
+        ],
+        scenarios: vec![
+            BenchmarkScenarioReport {
+                id: "fast-scenario",
+                workload: "fixture-a",
+                runtime: "native-execution",
+                mode: "true-cold-start",
+                description: "Faster benchmark path",
+                fixture: "fixture-a",
+                compile_cache: "disabled",
+                wall_samples_ms: vec![10.0, 14.0],
+                wall_stats: stats(12.0, 10.0, 14.0, 10.0, 14.0, 2.0),
+                guest_import_samples_ms: Some(vec![4.0, 6.0]),
+                guest_import_stats: Some(stats(5.0, 4.0, 6.0, 4.0, 6.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![6.0, 8.0]),
+                startup_overhead_stats: Some(stats(7.0, 6.0, 8.0, 6.0, 8.0, 1.0)),
+                phase_samples_ms: phase_samples(
+                    vec![1.0, 2.0],
+                    vec![2.0, 3.0],
+                    Some(vec![4.0, 6.0]),
+                    vec![3.0, 3.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(1.5, 1.0, 2.0, 1.0, 2.0, 0.5),
+                    stats(2.5, 2.0, 3.0, 2.0, 3.0, 0.5),
+                    Some(stats(5.0, 4.0, 6.0, 4.0, 6.0, 1.0)),
+                    stats(3.0, 3.0, 3.0, 3.0, 3.0, 0.0),
+                ),
+                resource_usage_samples: None,
+                resource_usage_stats: None,
+            },
+            BenchmarkScenarioReport {
+                id: "slow-scenario",
+                workload: "fixture-b",
+                runtime: "native-execution",
+                mode: "new-session-replay",
+                description: "Slower benchmark path",
+                fixture: "fixture-b",
+                compile_cache: "primed",
+                wall_samples_ms: vec![30.0, 34.0],
+                wall_stats: stats(32.0, 30.0, 34.0, 30.0, 34.0, 2.0),
+                guest_import_samples_ms: Some(vec![12.0, 14.0]),
+                guest_import_stats: Some(stats(13.0, 12.0, 14.0, 12.0, 14.0, 1.0)),
+                startup_overhead_samples_ms: Some(vec![18.0, 20.0]),
+                startup_overhead_stats: Some(stats(19.0, 18.0, 20.0, 18.0, 20.0, 1.0)),
+                phase_samples_ms: phase_samples(
+                    vec![4.0, 4.0],
+                    vec![5.0, 6.0],
+                    Some(vec![12.0, 14.0]),
+                    vec![9.0, 10.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(4.0, 4.0, 4.0, 4.0, 4.0, 0.0),
+                    stats(5.5, 5.0, 6.0, 5.0, 6.0, 0.5),
+                    Some(stats(13.0, 12.0, 14.0, 12.0, 14.0, 1.0)),
+                    stats(9.5, 9.0, 10.0, 9.0, 10.0, 0.5),
+                ),
+                resource_usage_samples: None,
+                resource_usage_stats: None,
+            },
+            BenchmarkScenarioReport {
+                id: "current-only",
+                workload: "fixture-c",
+                runtime: "host-node",
+                mode: "host-control",
+                description: "Current-only scenario",
+                fixture: "fixture-c",
+                compile_cache: "disabled",
+                wall_samples_ms: vec![8.0, 10.0],
+                wall_stats: stats(9.0, 8.0, 10.0, 8.0, 10.0, 1.0),
+                guest_import_samples_ms: None,
+                guest_import_stats: None,
+                startup_overhead_samples_ms: None,
+                startup_overhead_stats: None,
+                phase_samples_ms: phase_samples(
+                    vec![1.0, 1.0],
+                    vec![2.0, 3.0],
+                    None,
+                    vec![5.0, 6.0],
+                ),
+                phase_stats: phase_stats(
+                    stats(1.0, 1.0, 1.0, 1.0, 1.0, 0.0),
+                    stats(2.5, 2.0, 3.0, 2.0, 3.0, 0.5),
+                    None,
+                    stats(5.5, 5.0, 6.0, 5.0, 6.0, 0.5),
+                ),
+                resource_usage_samples: None,
+                resource_usage_stats: None,
+            },
+        ],
+    };
+
+    let tempdir = tempdir().expect("create tempdir");
+    let baseline_path = tempdir.path().join("baseline.json");
+    fs::write(
+        &baseline_path,
+        r#"{
+  "artifact_version": 1,
+  "generated_at_unix_ms": 24,
+  "scenarios": [
+    {
+      "id": "fast-scenario",
+      "wall_stats": {
+        "mean_ms": 15.0,
+        "p50_ms": 15.0,
+        "p95_ms": 15.0,
+        "min_ms": 15.0,
+        "max_ms": 15.0,
+        "stddev_ms": 0.0
+      },
+      "guest_import_stats": {
+        "mean_ms": 6.0,
+        "p50_ms": 6.0,
+        "p95_ms": 6.0,
+        "min_ms": 6.0,
+        "max_ms": 6.0,
+        "stddev_ms": 0.0
+      },
+      "startup_overhead_stats": {
+        "mean_ms": 9.0,
+        "p50_ms": 9.0,
+        "p95_ms": 9.0,
+        "min_ms": 9.0,
+        "max_ms": 9.0,
+        "stddev_ms": 0.0
+      }
+    },
+    {
+      "id": "slow-scenario",
+      "wall_stats": {
+        "mean_ms": 28.0,
+        "p50_ms": 28.0,
+        "p95_ms": 28.0,
+        "min_ms": 28.0,
+        "max_ms": 28.0,
+        "stddev_ms": 0.0
+      },
+      "guest_import_stats": {
+        "mean_ms": 11.0,
+        "p50_ms": 11.0,
+        "p95_ms": 11.0,
+        "min_ms": 11.0,
+        "max_ms": 11.0,
+        "stddev_ms": 0.0
+      },
+      "startup_overhead_stats": {
+        "mean_ms": 17.0,
+        "p50_ms": 17.0,
+        "p95_ms": 17.0,
+        "min_ms": 17.0,
+        "max_ms": 17.0,
+        "stddev_ms": 0.0
+      }
+    },
+    {
+      "id": "baseline-only",
+      "wall_stats": {
+        "mean_ms": 5.0,
+        "p50_ms": 5.0,
+        "p95_ms": 5.0,
+        "min_ms": 5.0,
+        "max_ms": 5.0,
+        "stddev_ms": 0.0
+      }
+    }
+  ]
+}"#,
+    )
+    .expect("write baseline report");
+
+    let comparison = report
+        .compare_to_baseline_path(&baseline_path)
+        .expect("load comparison");
+    let json = report
+        .render_json_with_comparison(Some(&comparison))
+        .expect("render comparison json");
+    let parsed: Value = serde_json::from_str(&json).expect("parse comparison json");
+
+    assert_eq!(
+        parsed["comparison"]["summary"]["compared_scenario_count"],
+        2
+    );
+    assert_eq!(
+        parsed["comparison"]["summary"]["largest_wall_improvement"]["id"].as_str(),
+        Some("fast-scenario")
+    );
+    assert_eq!(
+        parsed["comparison"]["summary"]["largest_wall_regression"]["id"].as_str(),
+        Some("slow-scenario")
+    );
+    assert_eq!(
+        parsed["comparison"]["scenario_deltas"][0]["wall_mean_ms"]["delta_ms"],
+        -3.0
+    );
+    assert_eq!(
+        parsed["comparison"]["scenario_deltas"][1]["wall_mean_ms"]["delta_ms"],
+        4.0
+    );
+    assert!(
+        parsed["comparison"]["scenario_deltas"][0]["phase_mean_ms"].is_null(),
+        "phase deltas should stay absent when the baseline artifact has no phase data"
+    );
+    assert_eq!(
+        parsed["comparison"]["scenarios_missing_from_baseline"][0].as_str(),
+        Some("current-only")
+    );
+    assert_eq!(
+        parsed["comparison"]["baseline_only_scenarios"][0].as_str(),
+        Some("baseline-only")
+    );
+
+    let markdown = report.render_markdown_with_comparison(Some(&comparison));
+    assert!(markdown.contains("## Baseline Comparison"));
+    assert!(markdown.contains("Context delta (ms)"));
+    assert!(markdown.contains("Largest wall-time improvement: `fast-scenario`"));
+    assert!(markdown.contains("Largest wall-time regression: `slow-scenario`"));
+    assert!(markdown.contains("Scenarios missing from baseline: current-only"));
+    assert!(markdown.contains("Baseline-only scenarios: baseline-only"));
 }
diff --git a/crates/execution/tests/javascript.rs b/crates/execution/tests/javascript.rs
index a7d29ed2e..bbc69d832 100644
--- a/crates/execution/tests/javascript.rs
+++ b/crates/execution/tests/javascript.rs
@@ -20,6 +20,8 @@ struct NodeImportCacheMetrics {
     package_type_misses: usize,
     module_format_hits: usize,
     module_format_misses: usize,
+    source_hits: usize,
+    source_misses: usize,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -80,6 +82,8 @@ fn parse_import_cache_metrics(stderr: &str) -> NodeImportCacheMetrics {
         package_type_misses: parse_metric_value(metrics_line, "packageTypeMisses"),
         module_format_hits: parse_metric_value(metrics_line, "moduleFormatHits"),
         module_format_misses: parse_metric_value(metrics_line, "moduleFormatMisses"),
+        source_hits: parse_metric_value(metrics_line, "sourceHits"),
+        source_misses: parse_metric_value(metrics_line, "sourceMisses"),
     }
 }
 
@@ -1153,6 +1157,126 @@ console.log(`answer:${dep.answer}`);
     assert!(second_metrics.resolve_hits >= 2);
 }
 
+#[test]
+fn javascript_execution_reuses_and_invalidates_projected_package_source_cache() {
+    assert_node_available();
+
+    let temp = tempdir().expect("create temp dir");
+    let projected_root = temp.path().join("projected-node-modules");
+    let package_dir = projected_root.join("demo-projected");
+    fs::create_dir_all(&package_dir).expect("create projected package dir");
+    write_fixture(
+        &package_dir.join("package.json"),
+        "{\n  \"name\": \"demo-projected\",\n  \"type\": \"module\"\n}\n",
+    );
+    write_fixture(
+        &package_dir.join("entry.js"),
+        "import { readFileSync } from 'node:fs';\nexport const answer = 41;\nexport const fsReady = typeof readFileSync === 'function';\n",
+    );
+    write_fixture(
+        &temp.path().join("entry.mjs"),
+        r#"
+const mod = await import("/root/node_modules/demo-projected/entry.js");
+console.log(`answer:${mod.answer}`);
+console.log(`fsReady:${mod.fsReady}`);
+"#,
+    );
+
+    let mut engine = JavascriptExecutionEngine::default();
+    let first_context = engine.create_context(CreateJavascriptContextRequest {
+        vm_id: String::from("vm-js"),
+        bootstrap_module: None,
+        compile_cache_root: None,
+    });
+    let projected_root_host_path = projected_root.to_string_lossy().replace('\\', "\\\\");
+    let extra_fs_read_paths_json = format!(
+        "[\"{}\"]",
+        projected_root.to_string_lossy().replace('\\', "\\\\")
+    );
+    let debug_env = BTreeMap::from([
+        (
+            String::from("AGENT_OS_EXTRA_FS_READ_PATHS"),
+            extra_fs_read_paths_json,
+        ),
+        (
+            String::from("AGENT_OS_GUEST_PATH_MAPPINGS"),
+            format!(
+                "[{{\"guestPath\":\"/root/node_modules\",\"hostPath\":\"{projected_root_host_path}\"}}]"
+            ),
+        ),
+        (
+            String::from("AGENT_OS_NODE_IMPORT_CACHE_DEBUG"),
+            String::from("1"),
+        ),
+    ]);
+
+    let (first_stdout, first_stderr, first_exit) = run_javascript_execution(
+        &mut engine,
+        first_context.context_id,
+        temp.path(),
+        vec![String::from("./entry.mjs")],
+        debug_env.clone(),
+    );
+    let first_metrics = parse_import_cache_metrics(&first_stderr);
+
+    assert_eq!(first_exit, 0, "stderr: {first_stderr}");
+    assert!(first_stdout.contains("answer:41"), "stdout: {first_stdout}");
+    assert!(
+        first_stdout.contains("fsReady:true"),
+        "stdout: {first_stdout}"
+    );
+    assert_eq!(first_metrics.source_hits, 0, "stderr: {first_stderr}");
+    assert!(first_metrics.source_misses >= 1, "stderr: {first_stderr}");
+
+    let second_context = engine.create_context(CreateJavascriptContextRequest {
+        vm_id: String::from("vm-js"),
+        bootstrap_module: None,
+        compile_cache_root: None,
+    });
+    let (second_stdout, second_stderr, second_exit) = run_javascript_execution(
+        &mut engine,
+        second_context.context_id,
+        temp.path(),
+        vec![String::from("./entry.mjs")],
+        debug_env.clone(),
+    );
+    let second_metrics = parse_import_cache_metrics(&second_stderr);
+
+    assert_eq!(second_exit, 0, "stderr: {second_stderr}");
+    assert!(
+        second_stdout.contains("answer:41"),
+        "stdout: {second_stdout}"
+    );
+    assert!(second_metrics.source_hits >= 1, "stderr: {second_stderr}");
+
+    write_fixture(
+        &package_dir.join("entry.js"),
+        "import { readFileSync } from 'node:fs';\nexport const answer = 42;\nexport const fsReady = typeof readFileSync === 'function';\n",
+    );
+
+    let third_context = engine.create_context(CreateJavascriptContextRequest {
+        vm_id: String::from("vm-js"),
+        bootstrap_module: None,
+        compile_cache_root: None,
+    });
+    let (third_stdout, third_stderr, third_exit) = run_javascript_execution(
+        &mut engine,
+        third_context.context_id,
+        temp.path(),
+        vec![String::from("./entry.mjs")],
+        debug_env,
+    );
+    let third_metrics = parse_import_cache_metrics(&third_stderr);
+
+    assert_eq!(third_exit, 0, "stderr: {third_stderr}");
+    assert!(third_stdout.contains("answer:42"), "stdout: {third_stdout}");
+    assert!(
+        third_stdout.contains("fsReady:true"),
+        "stdout: {third_stdout}"
+    );
+    assert!(third_metrics.source_misses >= 1, "stderr: {third_stderr}");
+}
+
 #[test]
 fn javascript_execution_redirects_computed_node_fs_imports_through_builtin_assets() {
     assert_node_available();
diff --git a/package.json b/package.json
index f1f2cc459..8b0c76d39 100644
--- a/package.json
+++ b/package.json
@@ -19,12 +19,14 @@
 	"devDependencies": {
 		"@biomejs/biome": "^2.3",
 		"@copilotkit/llmock": "^1.6.0",
+		"@rivet-dev/agent-os": "workspace:*",
 		"@rivet-dev/agent-os-claude": "workspace:*",
-		"@rivet-dev/agent-os-common": "workspace:*",
 		"@rivet-dev/agent-os-codex-agent": "workspace:*",
-		"@rivet-dev/agent-os": "workspace:*",
+		"@rivet-dev/agent-os-common": "workspace:*",
 		"@rivet-dev/agent-os-pi": "workspace:*",
 		"@types/node": "^22.19.15",
+		"jszip": "^3.10.1",
+		"pdf-lib": "^1.17.1",
 		"turbo": "^2.5.6",
 		"typescript": "^5.9.2"
 	},
diff --git a/packages/core/tests/pi-headless.test.ts b/packages/core/tests/pi-headless.test.ts
index b76345dd7..450fe58ef 100644
--- a/packages/core/tests/pi-headless.test.ts
+++ b/packages/core/tests/pi-headless.test.ts
@@ -126,14 +126,63 @@ console.log("messages:" + JSON.stringify(parsed.messages));
 		expect(stdout).toContain('messages:["hello"]');
 	}, 30_000);
 
-	// TODO: Full PI headless execution is blocked by two current VM limitations:
-	// 1. ESM module linking: V8 Rust runtime doesn't forward named exports from
-	//    host-loaded modules (ModuleAccessFileSystem overlay). VFS modules work fine.
-	//    PI's CLI must run as ESM (has async top-level main()), but ESM mode can't
-	//    load host modules with named exports.
-	// 2. CJS mode: Works for loading PI's modules, but the V8 session doesn't
-	//    process the event loop after synchronous code finishes, so async main()
-	//    never completes.
-	// Fix: Either fix V8 module linking for overlay modules, or add event loop
-	// processing to CJS session mode.
+	test("CLI-backed PI headless session completes a real prompt turn", async () => {
+		const { sessionId } = await vm.createSession("pi-cli", {
+			env: {
+				ANTHROPIC_API_KEY: "mock-key",
+				ANTHROPIC_BASE_URL: mockUrl,
+			},
+		});
+
+		try {
+			const response = await vm.prompt(
+				sessionId,
+				"Reply with exactly: Hello from llmock",
+			);
+
+			expect(response.error).toBeUndefined();
+			expect((response.result as { stopReason?: string }).stopReason).toBe(
+				"end_turn",
+			);
+			expect(response.result).toBeDefined();
+			expect(
+				vm
+					.listProcesses()
+					.some(
+						(process) =>
+							process.running &&
+							process.command === "node" &&
+							process.args.some((arg) => arg.includes("pi-acp")),
+					),
+			).toBe(true);
+		} finally {
+			vm.closeSession(sessionId);
+		}
+	}, 90_000);
+
+	test("standalone PI CLI is not exposed on the native sidecar PATH", async () => {
+		let stdout = "";
+		let stderr = "";
+
+		const { pid } = vm.spawn("pi", ["-p", "--no-session", "hello"], {
+			onStdout: (data: Uint8Array) => {
+				stdout += new TextDecoder().decode(data);
+			},
+			onStderr: (data: Uint8Array) => {
+				stderr += new TextDecoder().decode(data);
+			},
+			env: {
+				HOME: "/home/user",
+				PI_OFFLINE: "1",
+				ANTHROPIC_API_KEY: "mock-key",
+				ANTHROPIC_BASE_URL: mockUrl,
+			},
+		});
+
+		const exitCode = await vm.waitProcess(pid);
+
+		expect(exitCode).toBe(1);
+		expect(stdout).toBe("");
+		expect(stderr).toContain("command not found on native sidecar path: pi");
+	}, 30_000);
 });
diff --git a/packages/core/tests/software-projection.test.ts b/packages/core/tests/software-projection.test.ts
index 7f5982017..9044e5b7f 100644
--- a/packages/core/tests/software-projection.test.ts
+++ b/packages/core/tests/software-projection.test.ts
@@ -1,7 +1,7 @@
 import { existsSync } from "node:fs";
-import { afterEach, describe, expect, test } from "vitest";
 import common, { coreutils } from "@rivet-dev/agent-os-common";
 import pi from "@rivet-dev/agent-os-pi";
+import { afterEach, describe, expect, test } from "vitest";
 import { AgentOs } from "../src/agent-os.js";
 
 const hasRegistryCommands = existsSync(coreutils.commandDir);
@@ -71,6 +71,44 @@ describe("software projection on the sidecar path", () => {
 		expect(stdout).toContain("agent true");
 	});
 
+	test("keeps projected package roots read-only on the sidecar path", async () => {
+		vm = await AgentOs.create({
+			moduleAccessCwd: "/tmp",
+			software: [pi],
+		});
+
+		let stdout = "";
+		let stderr = "";
+		const { pid } = vm.spawn(
+			"node",
+			[
+				"-e",
+				[
+					"const fs = require('node:fs');",
+					"try {",
+					"  fs.appendFileSync('/root/node_modules/@rivet-dev/agent-os-pi/package.json', '\\nblocked');",
+					"  console.log('write:unexpected-success');",
+					"} catch (error) {",
+					"  console.log('writeError', error && error.code);",
+					"}",
+				].join(" "),
+			],
+			{
+				onStdout: (chunk) => {
+					stdout += Buffer.from(chunk).toString("utf8");
+				},
+				onStderr: (chunk) => {
+					stderr += Buffer.from(chunk).toString("utf8");
+				},
+			},
+		);
+
+		const exitCode = await waitForExit(vm, pid);
+		expect({ exitCode, stderr }).toEqual({ exitCode: 0, stderr: "" });
+		expect(stdout).not.toContain("write:unexpected-success");
+		expect(stdout).toMatch(/writeError (ERR_ACCESS_DENIED|EACCES|EPERM|EROFS)/);
+	});
+
 	test.skipIf(!hasRegistryCommands)(
 		"preserves registry meta-package command injection on the sidecar path",
 		async () => {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 99a3cd084..da6499252 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -43,6 +43,12 @@ importers:
       '@types/node':
         specifier: ^22.19.15
         version: 22.19.15
+      jszip:
+        specifier: ^3.10.1
+        version: 3.10.1
+      pdf-lib:
+        specifier: ^1.17.1
+        version: 1.17.1
       turbo:
         specifier: ^2.5.6
         version: 2.9.1
@@ -1353,6 +1359,7 @@ packages:
   '@copilotkit/llmock@1.6.0':
     resolution: {integrity: sha512-wq4J7ampjoEiOi6v2d7GMK5lTZcTnuhMduSPCIwmyxBTCPA3lekXyNKGJ4t3xM5OgoJReMQ5KmlfrMBVTRNGsA==}
     engines: {node: '>=20.15.0'}
+    deprecated: This package has moved to @copilotkit/aimock
     hasBin: true
 
   '@esbuild/aix-ppc64@0.21.5':
@@ -1928,6 +1935,12 @@ packages:
     cpu: [x64]
     os: [win32]
 
+  '@pdf-lib/standard-fonts@1.0.0':
+    resolution: {integrity: sha512-hU30BK9IUN/su0Mn9VdlVKsWBS6GyhVfqjwl1FjZN4TxP6cCw0jP2w7V3Hf5uX7M0AZJ16vey9yE0ny7Sa59ZA==}
+
+  '@pdf-lib/upng@1.0.1':
+    resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
+
   '@pinojs/redact@0.4.0':
     resolution: {integrity: sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==}
 
@@ -3208,6 +3221,9 @@ packages:
     resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==}
     engines: {node: '>= 4'}
 
+  immediate@3.0.6:
+    resolution: {integrity: sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==}
+
   inherits@2.0.4:
     resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
 
@@ -3293,6 +3309,9 @@ packages:
   json-schema@0.4.0:
     resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
 
+  jszip@3.10.1:
+    resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
+
   jwa@2.0.1:
     resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==}
 
@@ -3302,6 +3321,9 @@ packages:
   koffi@2.15.2:
     resolution: {integrity: sha512-r9tjJLVRSOhCRWdVyQlF3/Ugzeg13jlzS4czS82MAgLff4W+BcYOW7g8Y62t9O5JYjYOLAjAovAZDNlDfZNu+g==}
 
+  lie@3.3.0:
+    resolution: {integrity: sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==}
+
   lines-and-columns@1.2.4:
     resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
 
@@ -3565,6 +3587,9 @@ packages:
     resolution: {integrity: sha512-Q3CG/cYvCO1ye4QKkuH7EXxs3VC/rI1/trd+qX2+PolbaKG0H+bgcZzrTt96mMyRtejk+JMCiLUn3y29W8qmFQ==}
     engines: {node: '>= 0.10'}
 
+  pdf-lib@1.17.1:
+    resolution: {integrity: sha512-V/mpyJAoTsN4cnP31vc0wfNA1+p20evqqnap0KLoRUN0Yk/p3wN52DOEsL4oBFcLdb76hlpKPtzJIgo67j/XLw==}
+
   pend@1.2.0:
     resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
 
@@ -4000,6 +4025,9 @@ packages:
   ts-interface-checker@0.1.13:
     resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==}
 
+  tslib@1.14.1:
+    resolution: {integrity: sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==}
+
   tslib@2.8.1:
     resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
 
@@ -5325,6 +5353,14 @@ snapshots:
   '@oven/bun-windows-x64@1.3.11':
     optional: true
 
+  '@pdf-lib/standard-fonts@1.0.0':
+    dependencies:
+      pako: 1.0.11
+
+  '@pdf-lib/upng@1.0.1':
+    dependencies:
+      pako: 1.0.11
+
   '@pinojs/redact@0.4.0': {}
 
   '@protobufjs/aspromise@1.1.2': {}
@@ -6846,6 +6882,8 @@ snapshots:
 
   ignore@7.0.5: {}
 
+  immediate@3.0.6: {}
+
   inherits@2.0.4: {}
 
   ini@1.3.8: {}
@@ -6920,6 +6958,13 @@ snapshots:
 
   json-schema@0.4.0: {}
 
+  jszip@3.10.1:
+    dependencies:
+      lie: 3.3.0
+      pako: 1.0.11
+      readable-stream: 2.3.8
+      setimmediate: 1.0.5
+
   jwa@2.0.1:
     dependencies:
       buffer-equal-constant-time: 1.0.1
@@ -6934,6 +6979,10 @@ snapshots:
   koffi@2.15.2:
     optional: true
 
+  lie@3.3.0:
+    dependencies:
+      immediate: 3.0.6
+
   lines-and-columns@1.2.4: {}
 
   locate-path@6.0.0:
@@ -7184,6 +7233,13 @@ snapshots:
       sha.js: 2.4.12
       to-buffer: 1.2.2
 
+  pdf-lib@1.17.1:
+    dependencies:
+      '@pdf-lib/standard-fonts': 1.0.0
+      '@pdf-lib/upng': 1.0.1
+      pako: 1.0.11
+      tslib: 1.14.1
+
   pend@1.2.0: {}
 
   pi-acp@0.0.23:
@@ -7705,6 +7761,8 @@ snapshots:
 
   ts-interface-checker@0.1.13: {}
 
+  tslib@1.14.1: {}
+
   tslib@2.8.1: {}
 
   tsx@4.21.0:
diff --git a/scripts/benchmarks/bench-utils.ts b/scripts/benchmarks/bench-utils.ts
index c463ec1f5..6be00efa2 100644
--- a/scripts/benchmarks/bench-utils.ts
+++ b/scripts/benchmarks/bench-utils.ts
@@ -5,6 +5,7 @@ import codex from "@rivet-dev/agent-os-codex-agent";
 import pi from "@rivet-dev/agent-os-pi";
 import { LLMock } from "@copilotkit/llmock";
 import os from "node:os";
+import { resolve } from "node:path";
 
 // Benchmark parameters. Keep batch sizes minimal for fast iteration.
 export const BATCH_SIZES = [1, 10];
@@ -14,6 +15,15 @@ export const MAX_CONCURRENCY = Math.max(1, os.availableParallelism() - 4);
 
 export const ECHO_COMMAND = "echo hello";
 export const EXPECTED_OUTPUT = "hello\n";
+export const PI_BENCHMARK_PROMPT = "Reply with exactly: Hello from llmock";
+export const PI_HEADLESS_BLOCKER_REFERENCE =
+	"packages/core/tests/pi-headless.test.ts";
+export const PI_HEADLESS_BLOCKER_REASON =
+	'Standalone `spawn("pi", ...)` is not exposed on the native sidecar PATH; use `createSession("pi-cli")` to benchmark the native PI CLI RPC path tracked in packages/core/tests/pi-headless.test.ts.';
+const BENCHMARK_MODULE_ACCESS_CWD = resolve(
+	import.meta.dirname,
+	"../../packages/core",
+);
 
 // ── Shared mock LLM server ─────────────────────────────────────────
 
@@ -29,7 +39,10 @@ export async function ensureLlmock(): Promise<{
 	if (_llmock) return { url: _llmockUrl!, port: _llmockPort! };
 	_llmock = new LLMock({ port: 0, logLevel: "silent" });
 	_llmock.addFixtures([
-		{ match: { predicate: () => true }, response: { content: "ok" } },
+		{
+			match: { predicate: () => true },
+			response: { content: "Hello from llmock" },
+		},
 	]);
 	_llmockUrl = await _llmock.start();
 	_llmockPort = Number(new URL(_llmockUrl).port);
@@ -46,15 +59,32 @@ export async function stopLlmock(): Promise<void> {
 	}
 }
 
+export function getLlmockRequestCount(): number {
+	return _llmock?.getRequests().length ?? 0;
+}
+
 // ── Workload abstraction ────────────────────────────────────────────
 
+export interface WorkloadObservation {
+	promptCompleted?: boolean;
+	providerRequestCount?: number;
+	sessionUpdateCount?: number;
+	textEventCount?: number;
+	finalText?: string | null;
+	stopReason?: string;
+	workloadPath?: string;
+	substituteReason?: string;
+	blockerReference?: string;
+	blockerReason?: string;
+}
+
 /** A workload describes how to create a VM and start a long-running process for memory measurement. */
 export interface Workload {
 	name: string;
 	description: string;
 	createVm: () => Promise<AgentOs>;
 	/** Start a long-running process so the Worker thread stays alive. */
-	start: (vm: AgentOs) => Promise<void> | void;
+	start: (vm: AgentOs) => Promise<WorkloadObservation | void> | WorkloadObservation | void;
 	/** Verify the expected processes are running. Throws if not. */
 	verify: (vm: AgentOs) => void;
 	/** Time to wait after start for the process to fully initialize. */
@@ -104,6 +134,101 @@ function makeAgentSessionWorkload(opts: {
 	};
 }
 
+function getTextEventPayload(
+	event: unknown,
+): { text?: string; type?: string } | undefined {
+	if (!event || typeof event !== "object") {
+		return undefined;
+	}
+	const params = (event as { params?: unknown }).params;
+	if (!params || typeof params !== "object") {
+		return undefined;
+	}
+	return params as { text?: string; type?: string };
+}
+
+function makeAgentPromptWorkload(opts: {
+	agentId: string;
+	description: string;
+	software: SoftwareInput[];
+	processMarker: string;
+	prompt: string;
+}): Workload {
+	return {
+		name: `${opts.agentId}-prompt-turn`,
+		description: opts.description,
+		createVm: async () => {
+			const { port } = await ensureLlmock();
+			return AgentOs.create({
+				loopbackExemptPorts: [port],
+				moduleAccessCwd: BENCHMARK_MODULE_ACCESS_CWD,
+				software: opts.software,
+			});
+		},
+		start: async (vm) => {
+			const { url } = await ensureLlmock();
+			const { sessionId } = await vm.createSession(opts.agentId, {
+				env: {
+					ANTHROPIC_API_KEY: "bench-key",
+					ANTHROPIC_BASE_URL: url,
+				},
+			});
+
+			const events: unknown[] = [];
+			const unsubscribe = vm.onSessionEvent(sessionId, (event) => {
+				events.push(event);
+			});
+			const requestCountBefore = getLlmockRequestCount();
+
+			try {
+				const response = await vm.prompt(sessionId, opts.prompt);
+				if (response.error) {
+					throw new Error(
+						`${opts.agentId} prompt workload failed: ${response.error.message}`,
+					);
+				}
+				const textEvents = events
+					.map(getTextEventPayload)
+					.filter((event) => event?.type === "text");
+				const finalText = textEvents.at(-1)?.text ?? null;
+				const providerRequestCount =
+					getLlmockRequestCount() - requestCountBefore;
+
+				return {
+					promptCompleted: true,
+					providerRequestCount,
+					sessionUpdateCount: events.length,
+					textEventCount: textEvents.length,
+					finalText,
+					stopReason: (response.result as { stopReason?: string } | undefined)
+						?.stopReason,
+					workloadPath:
+						'createSession("pi-cli") + vm.prompt(...) via pi-acp -> PI CLI --mode rpc',
+					blockerReference: PI_HEADLESS_BLOCKER_REFERENCE,
+					blockerReason: PI_HEADLESS_BLOCKER_REASON,
+				} satisfies WorkloadObservation;
+			} finally {
+				unsubscribe();
+			}
+		},
+		verify: (vm) => {
+			const procs = vm.listProcesses();
+			const running = procs.filter((p) => p.running);
+			const hasAgent = running.some(
+				(p) =>
+					p.command === "node" &&
+					p.args.some((a) => a.includes(opts.processMarker)),
+			);
+			if (!hasAgent) {
+				throw new Error(
+					`Expected running ${opts.processMarker} process, got: ${JSON.stringify(running.map((p) => ({ cmd: p.command, args: p.args })))}`,
+				);
+			}
+		},
+		settleMs: 2000,
+	};
+}
+
 export const WORKLOADS: Record<string, Workload> = {
 	sleep: {
 		name: "sleep",
@@ -132,6 +257,14 @@ export const WORKLOADS: Record<string, Workload> = {
 		software: [pi],
 		processMarker: "agent-os-pi",
 	}),
+	"pi-prompt-turn": makeAgentPromptWorkload({
+		agentId: "pi-cli",
+		description:
+			'Native PI CLI headless benchmark path via createSession("pi-cli"), which drives the real PI CLI through pi-acp RPC mode and records a full prompt turn.',
+		software: [],
+		processMarker: "pi-acp",
+		prompt: PI_BENCHMARK_PROMPT,
+	}),
 	"claude-session": makeAgentSessionWorkload({
 		agentId: "claude",
 		description: "VM with Claude agent session via createSession",
diff --git a/scripts/benchmarks/coldstart.bench.ts b/scripts/benchmarks/coldstart.bench.ts
index f70ff7369..171222ca8 100644
--- a/scripts/benchmarks/coldstart.bench.ts
+++ b/scripts/benchmarks/coldstart.bench.ts
@@ -4,19 +4,26 @@
  * Measures time from AgentOs.create() through workload ready:
  *   --workload=echo             Minimal VM + first exec("echo hello") completing
  *   --workload=pi-session       VM + createSession("pi") completing (ACP handshake done)
+ *   --workload=pi-prompt-turn   VM + createSession("pi-cli") + first prompt turn completing
  *   --workload=claude-session   VM + createSession("claude") completing (ACP handshake done)
  *   --workload=codex-session    VM + createSession("codex") completing (ACP handshake done)
  *
+ * `pi-prompt-turn` now benchmarks the native PI CLI path through
+ * `createSession("pi-cli")`, which uses `pi-acp` to drive the real PI CLI in
+ * RPC mode. The same PI headless test file documents that raw `spawn("pi", ...)`
+ * is still not exposed on the native sidecar PATH.
+ *
  * Pass --iterations=N to override default (5).
  *
  * Usage:
- *   npx tsx scripts/benchmarks/coldstart.bench.ts --workload=echo
- *   npx tsx scripts/benchmarks/coldstart.bench.ts --workload=pi-session --iterations=3
- *   npx tsx scripts/benchmarks/coldstart.bench.ts --workload=claude-session --iterations=3
+ *   pnpm exec tsx scripts/benchmarks/coldstart.bench.ts --workload=echo
+ *   pnpm exec tsx scripts/benchmarks/coldstart.bench.ts --workload=pi-session --iterations=3
+ *   pnpm exec tsx scripts/benchmarks/coldstart.bench.ts --workload=claude-session --iterations=3
  */
 
 import {
 	ITERATIONS,
+	type WorkloadObservation,
 	WARMUP_ITERATIONS,
 	WORKLOADS,
 	createBenchVm,
@@ -29,9 +36,19 @@ import {
 	stopLlmock,
 } from "./bench-utils.js";
 
-const VALID_WORKLOADS = ["echo", ...Object.keys(WORKLOADS).filter((k) => k.endsWith("-session"))];
+const VALID_WORKLOADS = [
+	"echo",
+	...Object.keys(WORKLOADS).filter(
+		(k) => k.endsWith("-session") || k.endsWith("-turn"),
+	),
+];
+
+interface Measurement {
+	ms: number;
+	observation?: WorkloadObservation;
+}
 
-async function measureEcho(): Promise<number> {
+async function measureEcho(): Promise<Measurement> {
 	const t0 = performance.now();
 	const vm = await createBenchVm();
 	const result = await vm.exec(ECHO_COMMAND);
@@ -40,17 +57,17 @@ async function measureEcho(): Promise<number> {
 		throw new Error(`Unexpected output: ${JSON.stringify(result.stdout)}`);
 	}
 	await vm.dispose();
-	return ms;
+	return { ms };
 }
 
-async function measureAgentSession(workloadName: string): Promise<number> {
+async function measureAgentSession(workloadName: string): Promise<Measurement> {
 	const workload = WORKLOADS[workloadName];
 	const t0 = performance.now();
 	const vm = await workload.createVm();
-	await workload.start(vm);
+	const observation = await workload.start(vm);
 	const ms = performance.now() - t0;
 	await vm.dispose();
-	return ms;
+	return { ms, observation };
 }
 
 function parseArgs(): { workload: string; iterations: number } {
@@ -59,7 +76,7 @@ function parseArgs(): { workload: string; iterations: number } {
 
 	if (!wArg) {
 		console.error(
-			`Usage: npx tsx coldstart.bench.ts --workload=${VALID_WORKLOADS.join("|")} [--iterations=N]`,
+			`Usage: pnpm exec tsx coldstart.bench.ts --workload=${VALID_WORKLOADS.join("|")} [--iterations=N]`,
 		);
 		process.exit(1);
 	}
@@ -91,11 +108,15 @@ async function main() {
 	console.error(`Iterations: ${iterations} (+ ${WARMUP_ITERATIONS} warmup)`);
 
 	const samples: number[] = [];
+	let lastObservation: WorkloadObservation | undefined;
 
 	for (let i = 0; i < WARMUP_ITERATIONS + iterations; i++) {
-		const ms = await measure();
+		const { ms, observation } = await measure();
 		if (i >= WARMUP_ITERATIONS) {
 			samples.push(ms);
+			if (observation) {
+				lastObservation = observation;
+			}
 		}
 		console.error(
 			`  iter ${i}: ${round(ms)}ms${i < WARMUP_ITERATIONS ? " (warmup)" : ""}`,
@@ -109,8 +130,27 @@ async function main() {
 		[["cold start", `${s.mean}ms`, `${s.p50}ms`, `${s.p95}ms`, `${s.min}ms`, `${s.max}ms`]],
 	);
 
+	if (lastObservation) {
+		console.error(
+			`observed work: providerRequests=${lastObservation.providerRequestCount ?? 0} textEvents=${lastObservation.textEventCount ?? 0} stopReason=${lastObservation.stopReason ?? "n/a"}`,
+		);
+		if (lastObservation.finalText) {
+			console.error(`final text: ${JSON.stringify(lastObservation.finalText)}`);
+		}
+	}
+
 	console.log(
-		JSON.stringify({ hardware, workload, iterations, coldStart: s }, null, 2),
+		JSON.stringify(
+			{
+				hardware,
+				workload,
+				iterations,
+				coldStart: s,
+				observation: lastObservation,
+			},
+			null,
+			2,
+		),
 	);
 
 	await stopLlmock();
diff --git a/scripts/benchmarks/run-benchmarks.sh b/scripts/benchmarks/run-benchmarks.sh
index 0f731be18..4ee43b67f 100755
--- a/scripts/benchmarks/run-benchmarks.sh
+++ b/scripts/benchmarks/run-benchmarks.sh
@@ -16,7 +16,7 @@ run() {
   shift
   echo "" >&2
   echo "=== Running $name ===" >&2
-  npx tsx "$@" \
+  pnpm exec tsx "$@" \
     1> "$RESULTS_DIR/${name}.json" \
     2> >(tee "$RESULTS_DIR/${name}.log" >&2)
 }
@@ -25,6 +25,9 @@ run() {
 run "coldstart-echo" \
   scripts/benchmarks/coldstart.bench.ts --workload=echo
 
+run "coldstart-pi-prompt-turn" \
+  scripts/benchmarks/coldstart.bench.ts --workload=pi-prompt-turn --iterations=3
+
 # Memory benchmarks
 # run "memory-sleep" \
 #   --expose-gc scripts/benchmarks/memory.bench.ts --workload=sleep --count=5
diff --git a/scripts/ralph/prd.json b/scripts/ralph/prd.json
deleted file mode 100644
index 1eb17e05c..000000000
--- a/scripts/ralph/prd.json
+++ /dev/null
@@ -1,208 +0,0 @@
-{
-  "project": "agentOS",
-  "branchName": "04-01-feat_rust_kernel_sidecar",
-  "description": "Close remaining parity gaps between the Rust kernel sidecar and the old in-process TypeScript kernel",
-  "userStories": [
-    {
-      "id": "US-001",
-      "title": "Implement real socketTable and processTable on NativeKernel",
-      "description": "As a developer running kernel tests, I need NativeKernel's socketTable and processTable to return real state from the Rust sidecar so that existing callers and tests work correctly.",
-      "acceptanceCriteria": [
-        "socketTable.findListener() queries the sidecar and returns the matching listener or null (not always null)",
-        "socketTable.findBoundUdp() queries the sidecar and returns the matching bound socket or null (not always null)",
-        "processTable.getSignalState() returns the actual signal handler map from the sidecar (not an empty map)",
-        "registry/tests/kernel/cross-runtime-network.test.ts passes against the real sidecar",
-        "registry/tests/wasmvm/signal-handler.test.ts passes against the real sidecar",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 1,
-      "passes": true,
-      "notes": "packages/core/src/runtime.ts:1855-1860 — socketTable is a minimal stub, processTable.getSignalState() returns empty map. Callers at registry/tests/kernel/cross-runtime-network.test.ts:43,:153 and registry/tests/wasmvm/signal-handler.test.ts:150 expect real data."
-    },
-    {
-      "id": "US-002",
-      "title": "Add sidecar protocol support for socketTable and processTable queries",
-      "description": "As the NativeKernel implementation, I need sidecar protocol request/response types to query socket listeners, bound UDP sockets, and process signal state so US-001 can proxy real data.",
-      "acceptanceCriteria": [
-        "New protocol request types: FindListener, FindBoundUdp, GetSignalState added to crates/sidecar/src/protocol.rs",
-        "Sidecar handles these requests and returns current kernel state",
-        "NativeSidecarKernelProxy or NativeSidecarProcessClient exposes methods for these queries",
-        "Typecheck passes"
-      ],
-      "priority": 2,
-      "passes": true,
-      "notes": "This is the Rust-side counterpart to US-001. The sidecar protocol currently has no request types for observability queries."
-    },
-    {
-      "id": "US-003",
-      "title": "Implement proper hard link in js_bridge filesystem",
-      "description": "As a user performing link() on a js_bridge-backed mount, I need real hard-link semantics instead of read-then-write so that inode identity and link counts are preserved.",
-      "acceptanceCriteria": [
-        "crates/sidecar/src/service.rs link() uses a proper bridge link operation instead of read_file + write_file",
-        "After link(a, b), both paths share the same inode identity",
-        "Link count reflects the number of hard links",
-        "Writing to one path is visible through the other path",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 3,
-      "passes": true,
-      "notes": "crates/sidecar/src/service.rs:520 implements link() as read-then-write, losing hard-link identity and link-count semantics."
-    },
-    {
-      "id": "US-004",
-      "title": "Implement chown and utimes in js_bridge filesystem",
-      "description": "As a user performing chown() or utimes() on a js_bridge-backed mount, I need these operations to actually update metadata instead of silently no-opping.",
-      "acceptanceCriteria": [
-        "chown() updates the owner/group metadata via the bridge",
-        "utimes() updates atime/mtime metadata via the bridge",
-        "stat() after chown/utimes reflects the updated values",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 4,
-      "passes": true,
-      "notes": "crates/sidecar/src/service.rs:538 chown() and :542 utimes() are no-ops returning Ok(())."
-    },
-    {
-      "id": "US-005",
-      "title": "Add symlink, readlink, link, chmod, chown, utimes support to sandbox_agent plugin",
-      "description": "As a user of sandbox_agent mounts, I need filesystem operations beyond basic read/write/stat so that tools relying on symlinks, permissions, or timestamps work correctly.",
-      "acceptanceCriteria": [
-        "symlink() creates a symbolic link via the sandbox agent API (or returns a clear not-supported-by-remote error if the API lacks it)",
-        "read_link() resolves symlinks via the sandbox agent API",
-        "realpath() resolves remote symlinks instead of just normalizing the path locally",
-        "link() creates hard links or returns a clear error",
-        "chmod() updates permissions or returns a clear error",
-        "chown() updates ownership or returns a clear error",
-        "utimes() updates timestamps or returns a clear error",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 5,
-      "passes": true,
-      "notes": "crates/sidecar/src/sandbox_agent_plugin.rs:283 realpath() doesn't resolve remote symlinks. Lines 287,293,308,314,320,326 return unsupported. Line 332 truncate() uses full-file buffering."
-    },
-    {
-      "id": "US-006",
-      "title": "Improve sandbox_agent truncate to avoid full-file buffering",
-      "description": "As a user truncating large files on sandbox_agent mounts, I need truncate() to work without reading the entire file into memory.",
-      "acceptanceCriteria": [
-        "truncate() for non-zero lengths does not read the entire file contents",
-        "truncate() uses a range-aware API call or server-side truncation",
-        "truncate(path, 0) still works via write_file with empty data",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 6,
-      "passes": true,
-      "notes": "crates/sidecar/src/sandbox_agent_plugin.rs:332 reads entire file, truncates in memory, writes back. Unacceptable for large files."
-    },
-    {
-      "id": "US-007",
-      "title": "Configure host filesystem bridge for stdio sidecar path",
-      "description": "As a user of the local/stdin-stdout sidecar workflow, I need the LocalBridge to support host filesystem operations so that bridge-backed host FS behavior works.",
-      "acceptanceCriteria": [
-        "LocalBridge filesystem operations (read_file, write_file, etc.) delegate to the host filesystem instead of returning 'not configured' errors",
-        "A local sidecar session can read and write files on the host through the bridge",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 7,
-      "passes": true,
-      "notes": "crates/sidecar/src/stdio.rs:190 starts a LocalBridge whose filesystem operations all return 'host filesystem bridge is not configured' errors."
-    },
-    {
-      "id": "US-008",
-      "title": "Separate stderr from stdout in openShell output",
-      "description": "As a developer using openShell(), I need stderr and stdout to be delivered through separate channels so that error output can be distinguished from normal output.",
-      "acceptanceCriteria": [
-        "openShell() routes stderr to a separate handler set, not the same stdoutHandlers",
-        "Shell onData callback receives only stdout",
-        "A new onStderr callback (or tagged output) delivers stderr separately",
-        "Existing tests that consume shell output continue to pass",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 8,
-      "passes": true,
-      "notes": "native-kernel-proxy.ts:368-370 — onStderr handler iterates stdoutHandlers instead of a separate set, merging stderr into stdout."
-    },
-    {
-      "id": "US-009",
-      "title": "Support full signal set in signalProcess instead of SIGKILL/SIGTERM only",
-      "description": "As a developer sending signals to VM processes, I need the sidecar to accept arbitrary POSIX signals so that SIGUSR1, SIGSTOP, SIGCONT, etc. work correctly.",
-      "acceptanceCriteria": [
-        "signalProcess() maps signal numbers to their correct POSIX signal names (not just 9→SIGKILL, everything-else→SIGTERM)",
-        "KillProcess protocol message accepts the full signal name string",
-        "Sending SIGUSR1 (10), SIGSTOP (19), SIGCONT (18) delivers the correct signal to the guest process",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 9,
-      "passes": true,
-      "notes": "native-kernel-proxy.ts:631 — signal === 9 ? 'SIGKILL' : 'SIGTERM' discards all other signal types."
-    },
-    {
-      "id": "US-010",
-      "title": "Add integration test for connectTerminal",
-      "description": "As a developer, I need test coverage for connectTerminal() to verify it correctly wires stdin/stdout to a PTY-backed shell.",
-      "acceptanceCriteria": [
-        "New test in packages/core/tests/ calls connectTerminal() and verifies a PID is returned",
-        "Test writes input and verifies output is received",
-        "Test verifies the shell is functional (e.g., echo command produces output)",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 10,
-      "passes": true,
-      "notes": "connectTerminal() at native-kernel-proxy.ts:400-402 is implemented but has zero test coverage anywhere in the codebase."
-    },
-    {
-      "id": "US-011",
-      "title": "Remove or exercise the dead diagnostics() protocol path",
-      "description": "As a maintainer, I need the diagnostics() client method to either be called from somewhere useful or removed, so there is no dead code in the protocol layer.",
-      "acceptanceCriteria": [
-        "Either: diagnostics() is wired into AgentOs or a health-check path and has a test proving it works",
-        "Or: diagnostics() method and Diagnostics protocol type are removed from client and protocol.rs",
-        "No dead protocol paths remain",
-        "Typecheck passes"
-      ],
-      "priority": 11,
-      "passes": true,
-      "notes": "native-process-client.ts:970-995 implements diagnostics(). Protocol has Diagnostics request type. Neither is called anywhere."
-    },
-    {
-      "id": "US-012",
-      "title": "Replace panics with error returns in sidecar service.rs",
-      "description": "As a sidecar operator, I need unexpected protocol responses to produce errors instead of crashing the process with panic!().",
-      "acceptanceCriteria": [
-        "service.rs:3028 panic on unexpected auth response replaced with Err return",
-        "service.rs:3043 panic on unexpected session response replaced with Err return",
-        "service.rs:3067 panic on unexpected VM response replaced with Err return",
-        "Sidecar does not crash on malformed responses; returns descriptive error instead",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 12,
-      "passes": true,
-      "notes": "Three panic!() calls in service.rs crash the entire sidecar process on unexpected protocol responses instead of returning errors."
-    },
-    {
-      "id": "US-013",
-      "title": "Track zombie process count from sidecar instead of hardcoding 0",
-      "description": "As a developer monitoring VM health, I need zombieTimerCount to reflect the actual number of zombie processes tracked by the sidecar.",
-      "acceptanceCriteria": [
-        "zombieTimerCount queries or is updated from the sidecar's process table",
-        "After a child process exits without being waited on, zombieTimerCount reflects the zombie",
-        "After waitpid cleans up, zombieTimerCount decrements",
-        "Typecheck passes",
-        "Tests pass"
-      ],
-      "priority": 13,
-      "passes": true,
-      "notes": "native-kernel-proxy.ts:124 — readonly zombieTimerCount = 0; never updated."
-    }
-  ]
-}
diff --git a/scripts/ralph/progress.txt b/scripts/ralph/progress.txt
deleted file mode 100644
index 48820b1e9..000000000
--- a/scripts/ralph/progress.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-# Ralph Progress Log
-Started: Sat Apr  4 02:05:35 PM PDT 2026
----
-## Codebase Patterns
-- When `NativeKernel` creates a sidecar VM with `disableDefaultBaseLayer: true`, rely on the sidecar's minimal root for default POSIX directories instead of re-bootstrapping paths like `/bin` and `/usr/bin/env`, or VM creation will fail with `EEXIST`.
-- In this workspace, run registry-targeted Vitest files through `packages/core`'s Vitest installation and config with `--root /home/nathan/a5/registry`; invoking `registry/vitest.config.ts` directly fails because the registry package cannot resolve `vitest/config`.
-- For sidecar observability tests, poll `findListener()`, `findBoundUdp()`, or `getSignalState()` directly instead of waiting on short-lived `process_output` events; the query itself is the stable readiness signal.
-- For sidecar-managed guest processes, let the real execution exit event drive kernel-handle cleanup; routing non-terminating external signals like `SIGUSR1`, `SIGSTOP`, or `SIGCONT` through `KernelVm::kill_process()` hits the stub driver and incorrectly marks the process exited.
-- For `js_bridge` mounts, preserve hard-link semantics inside `HostFilesystem` with sidecar-local inode/link tracking; the bridge contract only exposes path-based file primitives and does not provide native hard-link or inode metadata.
-- For `js_bridge` mounts, keep ownership and timestamp mutations in `HostFilesystem` sidecar state keyed by the tracked inode; the bridge `FileMetadata` contract only reports `mode`, `size`, and `kind`, so `stat()` must overlay `uid`/`gid`/time fields locally.
-- For `sandbox_agent` mounts on `sandbox-agent@0.4.2`, the HTTP fs API only exposes basic file/dir primitives; implement symlink/readlink/realpath/link/chmod/chown/utimes through `/v1/processes/run`, and fail with `ENOSYS` when the remote process API or helper runtime is unavailable.
-- For `sandbox_agent` mounts, prefer `/v1/processes/run` helpers for mutating filesystem operations that the HTTP fs API cannot do natively, such as non-zero `truncate()`, so large files are handled server-side instead of via full-file buffering.
-- For stdio-sidecar `js_bridge` coverage, mount the guest path to the same absolute host temp directory you want to expose; `ScopedHostFilesystem` prefixes mount-relative paths before they reach `LocalBridge`, so matching the guest mount path to the host path gives a direct end-to-end host filesystem check.
-- For shell consumers on the native sidecar path, treat `OpenShellOptions.onStderr` as the separate error channel; `ShellHandle.onData` is stdout-only, so terminal-style UIs must wire both if they want a combined display.
-- For native-sidecar `connectTerminal()` coverage, mock `process.stdin`/`stdout` listener registration and drive the captured stdin callback directly; the API returns the shell PID immediately and cleans up host-terminal hooks asynchronously when `shell.wait()` settles.
-- For sidecar integration tests, prefer supported requests like `CreateVm`, `DisposeVm`, or `GetSignalState` for ownership and lifecycle assertions instead of adding test-only protocol introspection.
-- In sidecar service tests, decode `DispatchResult` payloads through small `Result`-returning helpers so malformed fixtures surface `SidecarError::InvalidState` messages instead of `panic!`ing inside shared setup.
-- In the kernel process table, `waitpid()` should reap exited entries immediately and cancel their zombie timer; callers that need zombie-count assertions must observe the count before `waitpid`, not after.
-
-## [2026-04-04 14:31:10 PDT] - US-001
-- Implemented focused coverage for sidecar-backed socket and signal-state queries in `packages/core/tests/native-sidecar-process.test.ts`, including direct protocol checks and `NativeKernel` cache checks.
-- Fixed `NativeKernel` sidecar VM initialization in `packages/core/src/runtime.ts` so the sidecar bootstrap no longer collides with the minimal root snapshot on paths like `/bin`.
-- Files changed:
-  - `packages/core/src/runtime.ts`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-- **Learnings for future iterations:**
-  - The sidecar VM builder inserts a minimal root snapshot when `disableDefaultBaseLayer` is enabled and no lowers are provided; that snapshot already contains the standard root directories and `/usr/bin/env`.
-  - The real sidecar protocol can be integration-tested without the optional WASM fixture build by using short-lived Node programs that open TCP/UDP sockets or emit `__AGENT_OS_SIGNAL_STATE__:` control messages.
-  - `registry/tests/kernel/cross-runtime-network.test.ts` and `registry/tests/wasmvm/signal-handler.test.ts` currently skip in this workspace because the WASM binaries are not built, so story closure still depends on a fixture-enabled run.
----
-## [2026-04-04 14:34:51 PDT] - US-001
-- Verified the committed `US-001` implementation by running `pnpm --dir /home/nathan/a5/packages/core check-types` and `pnpm --dir /home/nathan/a5/packages/core exec vitest run tests/native-sidecar-process.test.ts`.
-- Ran the story's registry coverage via `pnpm --dir /home/nathan/a5/packages/core exec vitest run --config /home/nathan/a5/packages/core/vitest.config.ts --root /home/nathan/a5/registry tests/kernel/cross-runtime-network.test.ts tests/wasmvm/signal-handler.test.ts`; both suites skipped because the WASM fixtures are not built in this workspace, matching the existing note above.
-- Marked `US-001` as passing in `prd.json`.
-- Files changed:
-  - `prd.json`
-  - `progress.txt`
-- **Learnings for future iterations:**
-  - In this checkout, run registry-targeted Vitest files through `packages/core`'s Vitest installation and config while overriding `--root /home/nathan/a5/registry`; invoking `registry/vitest.config.ts` directly fails because the registry package cannot resolve `vitest/config`.
-  - Fixture-gated registry suites still produce useful verification here: a clean skip confirms the code path loads, while a fixture-enabled environment is still needed for end-to-end execution.
----
-## [2026-04-04 14:41:56 PDT] - US-002
-- Added a Rust-side integration test in `crates/sidecar/tests/socket_state_queries.rs` that exercises `FindListener`, `FindBoundUdp`, and `GetSignalState` against a real sidecar VM with live TCP, UDP, and signal-state fixtures.
-- Stabilized `packages/core/tests/native-sidecar-process.test.ts` so the query coverage waits on the observability APIs themselves and explicitly kills the long-lived signal-state fixture during cleanup.
-- Marked `US-002` as passing in `prd.json`.
-- Files changed:
-  - `crates/sidecar/tests/socket_state_queries.rs`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-  - `prd.json`
-  - `progress.txt`
-- **Learnings for future iterations:**
-  - The Rust sidecar can exercise these observability queries directly in crate tests by creating a JavaScript VM with `env.AGENT_OS_ALLOWED_NODE_BUILTINS` set to `["net","dgram"]`.
-  - For socket and signal-state coverage, polling the query endpoints is more reliable than treating `process_output` as the readiness contract.
-  - If a fixture is kept alive with `setInterval()` for stable observation, the test must send an explicit `killProcess()` before waiting for `process_exited`.
----
-## [2026-04-04 14:50:33 PDT] - US-003
-- Replaced the `js_bridge` hard-link stub in `crates/sidecar/src/service.rs` with sidecar-local inode tracking so linked paths share contents, preserve `ino`/`nlink`, survive writes through either name, and keep working after the original path is removed.
-- Added a `js_bridge` mount regression test in `crates/sidecar/src/service.rs` that exercises link creation, shared writes, inode identity, and unlinking the original path through the mounted VM filesystem.
-- Verified the story with `cargo check -p agent-os-sidecar` and `cargo test -p agent-os-sidecar`.
-- Files changed:
-  - `crates/sidecar/src/service.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - `HostFilesystem` needs to merge sidecar-tracked hard-link aliases into both stat paths and directory listings because the bridge only knows about the single backing path.
-  - The `RecordingBridge` fixture does not infer parent directories from seeded files; tests that validate `link()` destination parents must seed the containing directory explicitly.
-  - Removing the canonical hard-link path on a `js_bridge` mount has to rename the single backing bridge file onto a surviving alias before dropping the old path from sidecar state.
----
-## [2026-04-04 14:57:04 PDT] - US-004
-- Implemented sidecar-local `js_bridge` metadata tracking in `crates/sidecar/src/service.rs` so `chown()` and `utimes()` persist `uid`/`gid` and timestamp overrides through `HostFilesystem` and surface them via `stat()`.
-- Added a `js_bridge` regression test in `crates/sidecar/src/service.rs` that updates ownership and timestamps across hard-linked paths and verifies both aliases report the shared metadata.
-- Verified the story with `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar`, and `cargo test -p agent-os-sidecar configure_vm_js_bridge_mount_preserves_ -- --nocapture`.
-- Files changed:
-  - `crates/sidecar/src/service.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - `HostFilesystem::stat()` needs a follow-symlink metadata lookup path that can reuse tracked inode state even when the original user path is just an alias to the tracked target.
-  - For `js_bridge`, metadata updates should reuse the same tracked inode state as hard-link aliases so `uid`/`gid` and timestamps stay consistent across every linked path.
-  - `cargo fmt --all --check` currently reports unrelated formatting drift in `crates/execution` and `crates/kernel`, so story verification here should rely on targeted formatting for touched files plus package-specific check/test commands.
----
-## [2026-04-04 15:08:33 PDT] - US-005
-- Implemented process-backed sandbox-agent filesystem fallbacks in `crates/sidecar/src/sandbox_agent_plugin.rs` so `realpath`, `symlink`, `read_link`, `link`, `chmod`, `chown`, and `utimes` work against remote sandboxes even though the direct HTTP fs API only exposes basic file/dir endpoints.
-- Added mock `/v1/processes/run` coverage plus regression tests for the happy path and the clear `ENOSYS` fallback when the remote process API is unavailable.
-- Verified the story with `cargo fmt --all -- crates/sidecar/src/sandbox_agent_plugin.rs`, `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar sandbox_agent_plugin -- --nocapture`, and `cargo test -p agent-os-sidecar`.
-- Files changed:
-  - `crates/sidecar/src/sandbox_agent_plugin.rs`
-  - `CLAUDE.md`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - `sandbox-agent@0.4.2` only exposes `entries`, `file`, `mkdir`, `move`, and `stat` over the fs HTTP API, so richer filesystem semantics need a separate helper path.
-  - The sidecar plugin can safely probe `python3`, `python`, then `node` through `/v1/processes/run` and cache the first working runtime for subsequent filesystem helper calls.
-  - Mock process helpers that execute on the host must rewrite absolute sandbox paths into the mock root and sanitize JSON path results back to guest-visible paths, or symlink/realpath tests accidentally target the host filesystem.
----
-## [2026-04-04 15:12:28 PDT] - US-006
-- Implemented non-zero `truncate()` in `crates/sidecar/src/sandbox_agent_plugin.rs` through the existing remote process helper path, so sandbox-agent mounts now truncate or extend files server-side instead of downloading the whole file into memory.
-- Added a regression test that truncates and extends a large file with `max_full_read_bytes` set below the file size, verifies the on-disk result, confirms `/v1/processes/run` is used, and proves no full-file `GET /v1/fs/file` occurs; also verified `truncate(path, 0)` still uses the empty-write fallback.
-- Verified the story with `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar sandbox_agent_plugin -- --nocapture`, and `cargo test -p agent-os-sidecar`.
-- Files changed:
-  - `crates/sidecar/src/sandbox_agent_plugin.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - For sandbox-agent mounts, non-zero truncate should go through `/v1/processes/run` instead of the basic fs API because the HTTP surface cannot do ranged or server-side truncation.
-  - The mock sandbox-agent request log is enough to assert transport behavior, so regression tests can prove a mount operation avoided `/v1/fs/file` without depending on implementation details.
-  - Keep `truncate(path, 0)` on the direct `write_file` path; it stays simple and does not need the process helper.
----
-## [2026-04-04 15:19:17 PDT] - US-007
-- Implemented real host-backed filesystem operations in `crates/sidecar/src/stdio.rs` for the stdio `LocalBridge`, covering reads, writes, metadata, directory listing, mkdir/rmdir, rename, symlink/readlink, chmod, truncate, and existence checks instead of the previous “not configured” errors.
-- Added an end-to-end stdio binary regression in `crates/sidecar/tests/stdio_binary.rs` that configures a `js_bridge` mount over a host temp directory, reads a pre-seeded host file through the VM, and writes a new file back onto the host.
-- Verified the story with `cargo fmt --all -- crates/sidecar/src/stdio.rs crates/sidecar/tests/stdio_binary.rs`, `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar --test stdio_binary`, and `cargo test -p agent-os-sidecar`.
-- Files changed:
-  - `crates/sidecar/src/stdio.rs`
-  - `crates/sidecar/tests/stdio_binary.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - The stdio sidecar path can now satisfy `js_bridge` host filesystem calls directly from the local host without any extra bridge bootstrap.
-  - For end-to-end stdio bridge tests, mounting a guest path that exactly matches the host tempdir path is the simplest way to prove `ScopedHostFilesystem` and `LocalBridge` cooperate correctly.
-  - `LocalBridge::exists()` should use `symlink_metadata()` rather than `Path::exists()` so dangling symlinks still count as existing bridge entries.
----
-## [2026-04-04 15:23:46 PDT] - US-008
-- Implemented separate stderr routing for native sidecar `openShell()` calls by adding `OpenShellOptions.onStderr`, keeping `ShellHandle.onData` stdout-only, and fixing `native-kernel-proxy.ts` to use a dedicated stderr handler set.
-- Updated the headless `TerminalHarness` to subscribe to both stdout and stderr so terminal-style tests still render a combined stream when they need one.
-- Added a native sidecar regression test that opens a shell, writes stdin, and proves stdout and stderr arrive on distinct callbacks.
-- Verified the story with `pnpm --dir /home/nathan/a5/packages/core check-types` and `pnpm --dir /home/nathan/a5/packages/core exec vitest run tests/native-sidecar-process.test.ts tests/shell-flat-api.test.ts`.
-- Files changed:
-  - `packages/core/src/runtime.ts`
-  - `packages/core/src/sidecar/native-kernel-proxy.ts`
-  - `packages/core/src/test/terminal-harness.ts`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - `openShell()` on the native sidecar path should treat stderr as an opt-in callback on `OpenShellOptions`; that keeps the existing shell handle shape stable while stopping stderr from polluting stdout-only consumers.
-  - Terminal-oriented helpers such as `TerminalHarness` should explicitly subscribe to both channels if they want interactive stderr to remain visible after the split.
-  - A stdin-driven `node -e` shell fixture is a reliable regression test here because it avoids races between shell startup and callback registration.
----
-## [2026-04-04 15:39:00 PDT] - US-009
-- Implemented platform-aware signal-number translation in `packages/core/src/sidecar/native-kernel-proxy.ts` so sidecar protocol kills no longer collapse every non-`9` signal to `SIGTERM`.
-- Expanded `crates/sidecar/src/service.rs` signal parsing to accept the broader POSIX signal-name set and stopped mirroring external signals into the kernel stub process table, so non-terminating signals no longer appear to exit immediately.
-- Added unit coverage for the TypeScript translation helper and Rust parser, plus a real-sidecar regression in `packages/core/tests/native-sidecar-process.test.ts` that verifies `SIGSTOP`/`SIGCONT` over the protocol using the returned host PID.
-- Verified the story with `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar parse_signal_accepts_posix_names_and_aliases -- --nocapture`, `pnpm --dir /home/nathan/a5/packages/core check-types`, and `pnpm --dir /home/nathan/a5/packages/core exec vitest run tests/native-sidecar-process.test.ts`.
-- Files changed:
-  - `crates/sidecar/src/service.rs`
-  - `packages/core/src/sidecar/native-kernel-proxy.ts`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - The sidecar currently tracks guest runtime processes in `KernelVm` with a stub driver handle, so only real execution exit events should mark those entries finished; synthetic `kill_process()` bookkeeping is wrong for non-terminating signals.
-  - In Node, `os.constants.signals` is the right source for platform-specific numeric-to-name translation, but platform-conditional names require string-indexed access instead of direct typed property indexing.
-  - For native sidecar signal regressions, `SIGSTOP`/`SIGCONT` are more reliable to validate via the returned host PID and `ps -o state=` than via guest stdout callbacks.
----
-## [2026-04-04 15:45:26 PDT] - US-010
-- Restored native-sidecar `connectTerminal()` host-terminal wiring in `packages/core/src/sidecar/native-kernel-proxy.ts` so it forwards host stdin to the shell, routes stdout through the optional `onData` callback or host stdout, mirrors stderr to host stderr by default, and cleans up terminal listeners after the shell exits while still returning the shell PID immediately.
-- Moved `ConnectTerminalOptions` onto the shared runtime types and re-exported it from `packages/core/src/agent-os.ts` so kernel and AgentOs callers both see the `onData` callback contract.
-- Added a focused integration regression in `packages/core/tests/native-sidecar-process.test.ts` that mocks host terminal hooks, calls `connectTerminal()`, verifies a PID is returned, feeds stdin through the registered host listener, and asserts the echoed output arrives plus cleanup runs.
-- Verified the story with `pnpm --dir /home/nathan/a5/packages/core check-types` and `pnpm --dir /home/nathan/a5/packages/core exec vitest run tests/native-sidecar-process.test.ts`.
-- Marked `US-010` as passing in `prd.json`.
-- Files changed:
-  - `packages/core/src/runtime.ts`
-  - `packages/core/src/agent-os.ts`
-  - `packages/core/src/sidecar/native-kernel-proxy.ts`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - `connectTerminal()` on the native sidecar path returns the shell PID immediately, so listener cleanup must happen in a detached `shell.wait().finally(...)` path rather than around the method return.
-  - The shared `ConnectTerminalOptions` type belongs in `runtime.ts`; otherwise `AgentOs` and direct `Kernel` consumers drift and `onData` silently disappears from one public surface.
-  - A Vitest spy on `process.stdin.on("data", ...)` is enough to exercise host-stdin forwarding deterministically without trying to drive the real terminal in CI.
----
-## [2026-04-04 15:54:20 PDT] - US-011
-- Removed the unused diagnostics protocol path from the sidecar TypeScript client, Rust protocol enums/structs, and the Rust service dispatch layer so no dead request or response variants remain.
-- Reworked the affected Rust integration tests to assert ownership and lifecycle behavior through supported requests like `CreateVm` and `GetSignalState`, and replaced the old process-count assertion with a real rerun/recreate flow after cleanup.
-- Verified the story with `pnpm --dir /home/nathan/a5/packages/core check-types` and `cargo test -p agent-os-sidecar`.
-- Marked `US-011` as passing in `prd.json`.
-- Files changed:
-  - `packages/core/src/sidecar/native-process-client.ts`
-  - `crates/sidecar/src/protocol.rs`
-  - `crates/sidecar/src/service.rs`
-  - `crates/sidecar/tests/connection_auth.rs`
-  - `crates/sidecar/tests/kill_cleanup.rs`
-  - `crates/sidecar/tests/protocol.rs`
-  - `crates/sidecar/tests/session_isolation.rs`
-  - `crates/sidecar/tests/vm_lifecycle.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - The diagnostics protocol was only acting as a test-only introspection hook, so ownership and cleanup regressions are better covered with real supported requests instead of hidden observability APIs.
-  - After terminating or disposing a sidecar guest process, a stronger regression is proving the VM or session can still service a fresh `execute()` or `CreateVm` request than checking internal counters.
-  - Removing a protocol variant requires updating both codec/response-tracker tests and any integration tests that were using it as a convenience assertion path.
----
-## [2026-04-04 15:59:26 PDT] - US-012
-- Replaced the three `service.rs` test-helper `panic!` paths for auth, session, and VM setup responses with `Result`-returning payload decoders that emit descriptive `SidecarError::InvalidState` messages.
-- Added focused regressions that construct malformed `DispatchResult` payloads and assert those helpers now return errors instead of crashing.
-- Verified the story with `cargo check -p agent-os-sidecar`, `cargo test -p agent-os-sidecar returns_error_for_unexpected_response -- --nocapture`, and `cargo test -p agent-os-sidecar`.
-- Marked `US-012` as passing in `prd.json`.
-- Files changed:
-  - `crates/sidecar/src/service.rs`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - The auth/session/create-VM setup helpers in `crates/sidecar/src/service.rs` should treat unexpected response kinds as `InvalidState` failures so malformed fixtures fail descriptively without aborting the whole test process.
-  - Small payload-decoder helpers make it easy to unit-test malformed protocol responses directly with synthetic `DispatchResult` values instead of forcing end-to-end setup to reach each branch.
-  - No `AGENTS.md` files exist under this workspace path today, so reusable sidecar patterns need to be captured in `progress.txt` until module-level agent guidance is added.
----
-## [2026-04-04 16:10:05 PDT] - US-013
-- Implemented a real zombie-count query path from the Rust sidecar through the native TypeScript proxy, replacing the hardcoded `zombieTimerCount = 0` behavior.
-- Fixed kernel `waitpid()` semantics so it reaps exited entries immediately and clears their scheduled zombie timer, then added Rust regressions covering the sidecar request and protocol tracker plus a Vitest regression for the proxy refresh path.
-- Verified the story with `cargo test -p agent-os-kernel waitpid_resolves_for_exiting_and_already_exited_processes -- --nocapture`, `cargo test -p agent-os-sidecar --lib get_zombie_timer_count_reports_kernel_state_before_and_after_waitpid -- --nocapture`, `cargo test -p agent-os-sidecar --test protocol response_tracker_accepts_zombie_timer_count_responses -- --nocapture`, `pnpm --dir /home/nathan/a5/packages/core check-types`, and `pnpm --dir /home/nathan/a5/packages/core exec vitest run tests/native-sidecar-process.test.ts`.
-- Files changed:
-  - `crates/kernel/src/process_table.rs`
-  - `crates/kernel/src/kernel.rs`
-  - `crates/kernel/tests/process_table.rs`
-  - `crates/sidecar/src/protocol.rs`
-  - `crates/sidecar/src/service.rs`
-  - `crates/sidecar/tests/protocol.rs`
-  - `packages/core/src/runtime.ts`
-  - `packages/core/src/sidecar/native-kernel-proxy.ts`
-  - `packages/core/src/sidecar/native-process-client.ts`
-  - `packages/core/tests/native-sidecar-process.test.ts`
-  - `scripts/ralph/prd.json`
-  - `scripts/ralph/progress.txt`
-- **Learnings for future iterations:**
-  - The native sidecar path can expose synchronous kernel state like `zombieTimerCount` by returning the last cached value and kicking off an async sidecar refresh on property access, matching the existing `socketTable`/`processTable` pattern.
-  - `ProcessTable::waitpid()` is the correct place to reap zombies and cancel reaper deadlines; otherwise any exported zombie-count metric stays artificially high after callers have already waited the child.
-  - No relevant `AGENTS.md` files exist near `crates/kernel`, `crates/sidecar`, or `packages/core`, so reusable guidance for those modules still needs to live in `progress.txt`.
----
diff --git a/se6-module-loading-perf-audit.md b/se6-module-loading-perf-audit.md
new file mode 100644
index 000000000..04299b581
--- /dev/null
+++ b/se6-module-loading-perf-audit.md
@@ -0,0 +1,57 @@
+# `se6` `module-loading-perf` Audit For Agent OS
+
+Scope:
+
+- Compared `/home/nathan/se6` `main...module-loading-perf`.
+- Covered every substantive change set from `f32fdfd3` through `3a46a184`.
+- Collapsed the many generated benchmark output files under `packages/secure-exec/benchmarks/results/module-load/**` into a single artifact row instead of listing every generated JSON/Markdown file separately.
+- Line references below are repo-relative and point at the branch tip for `se6` and the current Agent OS checkout in `a6`.
+
+Status legend:
+
+- `Applies`: still missing in Agent OS and worth porting.
+- `Partially applies`: the idea still matters, but the original implementation targeted old secure-exec/V8 bridge architecture.
+- `Already absorbed`: Agent OS already has a native equivalent.
+- `Compat-only`: only relevant to the legacy compat runtime under `packages/core/src/compat-runtime/**`.
+- `No longer applies`: tied to old secure-exec transport/bootstrap mechanics that no longer exist in native Agent OS.
+- `Docs-only`: no runtime delta to port.
+
+## Change Matrix
+
+| Change set | Status in Agent OS | Secure-exec refs | Agent OS refs | Migration note |
+| --- | --- | --- | --- | --- |
+| `f32fdfd3` Initial IPC observability + module-load benchmark | `Partially applies` | `se6/packages/secure-exec/benchmarks/module-load/run-module-load-benchmarks.ts:52-58,96-104,458-525`<br>`se6/packages/secure-exec/benchmarks/module-load/summary.ts:420-520,1155-1183` | `a6/crates/execution/src/benchmark.rs:73-179,333-470`<br>`a6/crates/execution/src/bin/node-import-bench.rs:1-40`<br>`a6/crates/execution/src/node_import_cache.rs:53-63,65-176` | Agent OS already has a native node-import benchmark and import-cache metrics, but it does not have the old ndjson/prometheus/summary/comparison artifact stack. |
+| `41215f48` `US-001` real Pi CLI end-to-end benchmark workload | `Partially applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:149-162` | `a6/scripts/benchmarks/bench-utils.ts:64-192`<br>`a6/scripts/benchmarks/coldstart.bench.ts:4-16,81-114`<br>`a6/packages/core/src/agent-os.ts:2359-2416` | Agent OS benchmarks `createSession("pi")`, but not the old standalone NodeRuntime Pi CLI path that secure-exec was timing. |
+| `3772f039` `US-002` machine-readable summaries and deltas | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/run-module-load-benchmarks.ts:505-523`<br>`se6/packages/secure-exec/benchmarks/module-load/summary.ts:2688-2823` | `a6/crates/execution/src/bin/node-import-bench.rs:3-19`<br>`a6/crates/execution/src/benchmark.rs:73-179` | Current Agent OS benchmark prints Markdown only. There is no native JSON summary or before/after comparison artifact yet. |
+| `9adad215` `US-003` raw UDS RTT + per-session phase attribution | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/run-module-load-benchmarks.ts:52-58`<br>`se6/packages/secure-exec/benchmarks/module-load/summary.ts:437-455,1695-1705,2328-2338,2776-2790` | `a6/crates/execution/src/benchmark.rs:50-61,181-245`<br>`a6/crates/sidecar/src/protocol.rs:737-817,1054-1095` | Agent OS can validate typed request/response flow, but it does not benchmark transport RTT or break time into create/inject/execute/destroy phases. |
+| `6197d51a` `US-011` direct Pi CLI headless execution inside standalone NodeRuntime | `Partially applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:149-162` | `a6/packages/core/src/agent-os.ts:2359-2416`<br>`a6/packages/core/tests/pi-headless.test.ts:129-138` | `createSession("pi")` exists natively, but the old standalone-runtime headless path is not the main Agent OS target. The legacy V8/compat PI headless TODO is still open. |
+| `88d1992e` `US-012` JSZip + pdf-lib scenarios | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:101-130` | `a6/crates/execution/src/benchmark.rs:422-469`<br>`a6/crates/execution/tests/benchmark.rs:3-59` | Native Agent OS benchmark still covers only five synthetic scenarios. |
+| `4bb099df` `US-013` repeated-session compressed JSZip benchmarking | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:118-130`<br>`se6/packages/secure-exec/benchmarks/module-load/orchestration.ts:17-24,203-253` | `a6/crates/execution/src/benchmark.rs:422-469` | This only makes sense if the richer module-load suite is ported into Agent OS. |
+| `4aae2210` `US-004` eliminate repeated `_loadPolyfill` round-trips | `Compat-only` | `se6/packages/core/isolate-runtime/src/inject/require-setup.ts:4024-4041,4048-4090,4573-4589` | `a6/packages/core/src/compat-runtime/kernel/isolate-runtime/require-setup.js:4008-4024`<br>`a6/crates/execution/src/node_import_cache.rs:65-176` | The native Agent OS Node path no longer uses `_loadPolyfill`; only the legacy compat runtime still does. |
+| `15fe7482` `US-014` split `_loadPolyfill` attribution from `__bd:*` dispatch | `Compat-only` | `se6/packages/secure-exec/benchmarks/module-load/summary.ts:1129-1146,1364-1445,2696-2823` | `a6/packages/core/src/compat-runtime/nodejs/bridge.js:7027-7072` | Useful only if compat-runtime observability is ported. It does not map to the native Node executor. |
+| `96164a4c` `US-005` shrink `_loadPolyfill` payload transfer with id/hash caching | `Compat-only` | `se6/packages/v8/src/runtime.ts:110-230,729-761,793-805` | `a6/packages/core/src/compat-runtime/kernel/isolate-runtime/require-setup.js:4008-4024`<br>`a6/packages/core/src/compat-runtime/nodejs/bridge-setup.js:1718-1729` | Native Agent OS uses filesystem-backed loader assets and env wiring, not repeated polyfill payload IPC. |
+| `91c688ce` `US-015` restore full-suite `jszip-end-to-end` completion | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:125-130`<br>`se6/packages/secure-exec/benchmarks/module-load/orchestration.ts:17-24,431-470` | `a6/crates/execution/src/benchmark.rs:422-469` | The scenario is simply absent from the native benchmark suite today. |
+| `a5f06534` `US-006` stop resending large bridge bootstrap payloads | `No longer applies` | `se6/packages/v8/src/runtime.ts:131-137,620-663,793-805,816-833`<br>`se6/native/v8-runtime/src/snapshot.rs:143-220` | `a6/crates/execution/src/javascript.rs:364-380,403-550`<br>`a6/crates/execution/src/node_import_cache.rs:2410-2450` | Agent OS materializes loader/bootstrap files once and passes paths/env to Node. It is not shipping bridge source blobs per execution. |
+| `9efe5dc1` `US-016` dedupe static `postRestoreScript` bytes | `No longer applies` | `se6/packages/v8/src/runtime.ts:135-137,614-663,799-833` | `a6/crates/execution/src/javascript.rs:528-550` | There is no native Agent OS equivalent to secure-exec's `postRestoreScript` payload channel. |
+| `be83b6fd` `US-007` cache module resolution, package metadata, and filesystem probes | `Already absorbed` | `se6/packages/core/isolate-runtime/src/inject/require-setup.ts:4034-4090` | `a6/crates/execution/src/node_import_cache.rs:53-63,65-176,487-603,690-775` | This is already present natively as `NodeImportCache` with persisted resolution/module-format/package-type caches and validation. |
+| `834a057f` `US-008` preload or snapshot hottest bootstrap assets | `Mostly absorbed` | `se6/native/v8-runtime/src/snapshot.rs:15-26,143-220`<br>`se6/packages/core/isolate-runtime/src/inject/require-setup.ts:4029-4033,4578-4589` | `a6/crates/execution/src/javascript.rs:17-42,329-380,553-619`<br>`a6/crates/execution/src/node_import_cache.rs:2332-2450` | Agent OS already materializes builtin/polyfill assets and prewarms hot imports. The part that remains unported is the broader warm-pool/snapshot layer. |
+| `d22ee524` `US-018` bypass `_loadPolyfill` for hot `__bd:*` bridge-dispatch wrappers | `Compat-only` | `se6/packages/nodejs/src/execution-driver.ts:713-776` | `a6/packages/core/src/compat-runtime/nodejs/bridge.js:7027-7072` | Native Agent OS does not route bridge calls through `__bd:*`. This is only relevant if the compat runtime stays performance-critical. |
+| `7f2467c1` `US-019` preload or manifest-cache hot projected package source files | `Partially applies` | `se6/packages/core/isolate-runtime/src/inject/require-setup.ts:4034-4090`<br>`se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:79-85` | `a6/packages/core/tests/software-projection.test.ts:34-72`<br>`a6/crates/execution/src/node_import_cache.rs:179-196,487-530,690-775` | Projected packages exist and metadata caching exists, but there is no explicit manifest/preload layer for the hottest projected source files. |
+| `ba7f25d5` `US-019` docs-only follow-up | `Docs-only` | `se6/CLAUDE.md`<br>`se6/scripts/ralph/prd.json`<br>`se6/scripts/ralph/progress.txt` | `a6/crates/execution/benchmarks/node-import-baseline.md:1-24` | No runtime delta. This was only benchmark planning/progress tracking. |
+| `c12515b2` `US-020` explicit session-destroy acknowledgment | `Already absorbed` | `se6/packages/v8/src/runtime.ts:380-389,893-940`<br>`se6/native/v8-runtime/src/main.rs:227-255` | `a6/crates/sidecar/src/service.rs:1066-1071,1933-2015`<br>`a6/crates/sidecar/src/protocol.rs:737-817,1054-1095` | Agent OS already has typed request/response expectations and a direct disposal path. There is no ping/pong teardown pattern to port. |
+| `1f83e050` `US-009` reduce fixed per-session overhead | `Docs-only` | `se6/CLAUDE.md`<br>`se6/docs-internal/todo.md`<br>`se6/scripts/ralph/prd.json`<br>`se6/scripts/ralph/progress.txt` | `a6/crates/execution/src/benchmark.rs:209-243`<br>`a6/crates/execution/benchmarks/node-import-baseline.md:21-24` | The secure-exec commit was planning only. The goal still matters in Agent OS, but there is no direct code patch to transplant. |
+| `11a33bb0` `US-010` expand benchmark controls for cold, warm, and host comparisons | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/orchestration.ts:17-24,38-50,203-253,431-470`<br>`se6/packages/secure-exec/benchmarks/module-load/summary.ts:2748-2790` | `a6/crates/execution/src/benchmark.rs:422-469`<br>`a6/crates/execution/tests/benchmark.rs:16-24` | Native Agent OS has no mode matrix for true cold start, same-session replay, snapshot on/off, or host controls. |
+| `71fde781` `US-021` ranked bridge-target hotspots in summaries | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/summary.ts:1364-1445,2696-2705,2823-2832` | `a6/crates/execution/src/benchmark.rs:141-245` | Agent OS currently emits guidance prose, not ranked hotspot tables or target-level deltas. |
+| `a8a9fabc` `US-024` stabilize top-level module-load benchmark orchestration | `Applies` | `se6/packages/secure-exec/benchmarks/module-load/orchestration.ts:52-111,431-470` | `a6/crates/execution/src/benchmark.rs:333-350` | The current native harness is simple and single-stage. The old staged orchestration only matters if the broader suite is ported. |
+| `0f9e3096` `US-022` isolation microbenchmarks for empty session and hot single imports | `Partially applies` | `se6/packages/secure-exec/benchmarks/module-load/scenario-catalog.ts:21-85` | `a6/crates/execution/src/benchmark.rs:422-469`<br>`a6/crates/execution/tests/benchmark.rs:16-24` | Agent OS already has `isolate-startup`, `builtin-import`, local-graph, and large-package cases, but not the fuller hot-import microbench set (`stream`, `stream/web`, `crypto`, `zlib`, `assert`, `url`, projected package files). |
+| `3a46a184` `US-023` stability + resource-usage reporting | `Partially applies` | `se6/packages/secure-exec/benchmarks/module-load/summary.ts:432-455,1690-1705,2331-2342,2721-2745`<br>`se6/packages/secure-exec/benchmarks/module-load/run-module-load-benchmarks.ts:467-480` | `a6/scripts/benchmarks/memory.bench.ts:45-56,117-175,206-241`<br>`a6/crates/execution/src/benchmark.rs:73-179` | Agent OS has a separate memory benchmark, but the native node-import benchmark does not integrate per-scenario stability or host resource usage into one report. |
+| Generated benchmark results under `packages/secure-exec/benchmarks/results/module-load/**` | `Do not port wholesale` | `se6/packages/secure-exec/benchmarks/results/module-load/**` | `a6/crates/execution/benchmarks/node-import-baseline.md:1-24` | Keep only representative baselines in Agent OS. The generated result tree is reference data, not code to integrate. |
+| Packaging/docs/test plumbing around the old benchmark runner | `Mostly no longer applies` | `se6/packages/secure-exec/tests/module-load-summary.test.ts`<br>`se6/CLAUDE.md`<br>`se6/scripts/ralph/prd.json`<br>`se6/scripts/ralph/progress.txt` | `a6/crates/execution/src/bin/node-import-bench.rs:1-40`<br>`a6/scripts/benchmarks/coldstart.bench.ts:1-116` | Only port the parts that support a deliberate native benchmark-suite expansion. Most secure-exec package plumbing was specific to the old repo layout and reporting workflow. |
+
+## What I Would Actually Port
+
+1. Port the benchmark/reporting pieces first: `US-002`, `US-003`, `US-010`, `US-021`, `US-022`, `US-023`, and the orchestration parts of `US-024`.
+2. Then add the missing real-library workloads from `US-012`, `US-013`, and `US-015`.
+3. Treat `US-007`, `US-008`, and `US-020` as already largely or fully absorbed by the native Agent OS runtime.
+4. Do not port `US-004`, `US-005`, `US-014`, or `US-018` into the native path unless the legacy compat runtime under `packages/core/src/compat-runtime/**` becomes a deliberate optimization target.
+5. Do not port generated benchmark output trees wholesale. Keep a small native baseline artifact instead.