From 7e23bfb557f1d498f3b49061394a3440375f679b Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:35:01 -0500
Subject: [PATCH 1/8] docs: add refactor plan and FLY analysis

---
 docs/analysis/fly-callgraph.md  |  95 +++++++++++++
 docs/analysis/fly-invariants.md |  45 ++++++
 docs/codex-refactor-02052026.md | 236 ++++++++++++++++++++++++++++++++
 3 files changed, 376 insertions(+)
 create mode 100644 docs/analysis/fly-callgraph.md
 create mode 100644 docs/analysis/fly-invariants.md
 create mode 100644 docs/codex-refactor-02052026.md

diff --git a/docs/analysis/fly-callgraph.md b/docs/analysis/fly-callgraph.md
new file mode 100644
index 0000000..a86c3a4
--- /dev/null
+++ b/docs/analysis/fly-callgraph.md
@@ -0,0 +1,95 @@
+# FLY Call Graph (2026-02-05)
+
+This document maps the current FLY execution flow from UI actions down to orchestration and execution. The goal is to identify extraction boundaries for a dedicated execution controller.
+
+## Entry Points (User Actions)
+
+- `FlyScreen.on_mount()`
+  - `coordinator.reset_stale_in_progress()`
+  - `_refresh_waypoint_list()`
+  - `_select_next_waypoint(include_in_progress=True)`
+  - `_update_git_status()` + timer
+  - `_update_project_metrics()`
+
+- `action_start()`
+  - Handles retry of selected failed waypoint
+  - Handles resume from `PAUSED`
+  - Handles start from `READY` or after `CHART_REVIEW` / `LAND_REVIEW`
+  - Transitions via `coordinator.transition(...)`
+  - Sets `execution_state = RUNNING`
+  - `_execute_current_waypoint()`
+
+- `action_pause()`
+  - Sets `execution_state = PAUSE_PENDING`
+  - Cancels executor if running (logs pause)
+
+- `action_skip()`
+  - Marks current waypoint skipped (via selection change)
+  - `_select_next_waypoint()`
+
+- `action_back()`
+  - Transitions `FLY_* -> CHART_REVIEW`
+  - Switches phase to `chart`
+
+- `action_forward()`
+  - Validates `LAND_REVIEW` availability
+  - `coordinator.transition(LAND_REVIEW)` + `_switch_to_land_screen()`
+
+- Intervention flow
+  - `_handle_intervention(...)` → `InterventionModal` → `_on_intervention_result(...)`
+
+## Execution Flow
+
+- `_execute_current_waypoint()`
+  - Marks waypoint `IN_PROGRESS` + saves flight plan
+  - Builds `WaypointExecutor` with callbacks and limits
+  - `run_worker(self._run_executor())`
+
+- `_run_executor()`
+  - `WaypointExecutor.execute()` → returns `ExecutionResult`
+
+- `on_worker_state_changed()`
+  - Handles `InterventionNeededError` or other failures
+  - Calls `_handle_execution_result(result)`
+
+- `_handle_execution_result(result)`
+  - SUCCESS
+    - Mark COMPLETE + save
+    - Commit via git (receipt validation)
+    - Parent epic check
+    - Select next waypoint
+    - If all complete: transition `LAND_REVIEW`
+  - INTERVENTION_NEEDED / MAX_ITERATIONS / FAILED
+    - Mark FAILED
+    - Transition `FLY_INTERVENTION`
+  - CANCELLED
+    - Transition `FLY_PAUSED`
+
+## Cross-Cutting Services
+
+- `JourneyCoordinator`
+  - Transition validation and persistence
+  - Waypoint selection and completion checks
+
+- `WaypointExecutor`
+  - Iterative execution loop
+  - Calls progress callback with `ExecutionContext`
+
+- `ExecutionLogReader` / `ExecutionLogWriter`
+  - Audit trail for each waypoint
+
+- `GitService` + `ReceiptValidator`
+  - Receipt validation
+  - Commit/tag integration
+
+---
+
+## Extraction Boundary (Target)
+
+Introduce `ExecutionController` to own the flow currently distributed across `FlyScreen`:
+- `start / pause / resume / skip / retry`
+- State transitions
+- Selection logic + execution sequencing
+- Handling of `ExecutionResult`
+
+`FlyScreen` should become a thin UI layer: inputs, rendering, and modal handling.
diff --git a/docs/analysis/fly-invariants.md b/docs/analysis/fly-invariants.md
new file mode 100644
index 0000000..daecf69
--- /dev/null
+++ b/docs/analysis/fly-invariants.md
@@ -0,0 +1,45 @@
+# FLY Invariants (2026-02-05)
+
+These invariants define expected behavior in FLY execution. They should be preserved during refactor and enforced through tests.
+
+## State and Transition Invariants
+
+- `JourneyCoordinator.transition(...)` is the single source of truth for journey state transitions.
+- `ExecutionState` is a UI execution mode, but must be consistent with `JourneyState`:
+  - `ExecutionState.RUNNING` implies `JourneyState.FLY_EXECUTING`.
+  - `ExecutionState.PAUSED` implies `JourneyState.FLY_PAUSED`.
+  - `ExecutionState.INTERVENTION` implies `JourneyState.FLY_INTERVENTION`.
+  - `ExecutionState.DONE` implies all waypoints complete and `JourneyState.LAND_REVIEW` is reachable.
+- Non-recoverable states should not be persisted as resume checkpoints.
+
+## Waypoint Status Invariants
+
+- When execution starts, current waypoint becomes `IN_PROGRESS`.
+- On success, waypoint must be marked `COMPLETE`, persisted, and logged.
+- On intervention or failure, waypoint must be marked `FAILED` (or `SKIPPED` for explicit skips).
+- Parent epic completion is checked after a child completes, but epics are not auto-completed.
+
+## Selection Invariants
+
+- Selection prefers resumable waypoints (`IN_PROGRESS`, `FAILED`) when resuming.
+- Selection should not allow a waypoint whose dependencies are incomplete.
+- Epics become eligible only when all children complete.
+
+## Execution Invariants
+
+- Execution uses `WaypointExecutor` exclusively.
+- UI must remain responsive (execution runs in background worker).
+- Progress updates are handled on main thread via `call_later`.
+- `ExecutionResult` drives state transitions; no silent fall-through.
+
+## Logging and Metrics Invariants
+
+- Each waypoint execution produces an execution log.
+- Cost and token metrics are updated after each waypoint.
+- Receipt validation must occur before auto-commit.
+
+## Recovery Invariants
+
+- Stale `IN_PROGRESS` waypoints are reset to `PENDING` on screen mount.
+- Intervention must surface a modal with explicit user action choices.
+- Rollback is best-effort and must not corrupt the flight plan state.
diff --git a/docs/codex-refactor-02052026.md b/docs/codex-refactor-02052026.md
new file mode 100644
index 0000000..accb858
--- /dev/null
+++ b/docs/codex-refactor-02052026.md
@@ -0,0 +1,236 @@
+# Waypoints Refactor Plan (2026-02-05)
+
+**Goal**: Address the current philosophical shortcomings (simplicity drift in FLY, incomplete flight tests, missing decision records, residual TODOs) while strengthening domain boundaries, testability, and iteration discipline.
+
+This plan follows the Waypoints philosophy: bikes not Rube Goldberg; explicit domain language; alternatives considered; staged implementation; tests first; artifacts and UX quality treated as first-class.
+
+---
+
+## 1) Problem Framing (Symptoms vs Root Causes)
+
+### Symptoms
+- `src/waypoints/tui/screens/fly.py` mixes UI, orchestration, execution, git, and process management.
+- Flight tests described in `docs/testing-strategy.md` are not implemented (only `flight-tests/self-host/`).
+- Architectural decisions are not documented in a durable, discoverable format (no ADRs).
+- TODOs indicate incomplete reliability paths (rollback, project status, prompt summarization).
+
+### Root Causes
+- FLY phase lacks a dedicated orchestration boundary with a minimal interface.
+- Testing strategy is documented but not operationalized into a repeatable pipeline.
+- Decision making is visible in review docs but not captured as formal architectural records.
+- Recovery and rollback are acknowledged but not embedded in execution flow.
+
+---
+
+## 2) Solution Space (Alternatives)
+
+### A) Minimal Reorg (Low risk, lowest impact)
+- Move a few helper methods out of `fly.py` and keep orchestration in screen.
+- Add one flight test (L0) to prove the pipeline.
+- Document a single ADR.
+
+**Pros**: Fast, minimal change.
+**Cons**: Doesn’t address cross-layer coupling or reliability; doesn’t scale.
+
+### B) Domain-First Refactor (Recommended)
+- Introduce a dedicated FLY orchestration service to separate UI from execution.
+- Implement a flight test harness with L0–L2 coverage.
+- Add a lightweight ADR system and document the most significant changes.
+
+**Pros**: Aligns with “bicycles,” clarifies boundaries, testable in isolation.
+**Cons**: Moderate effort; requires careful migrations.
+
+### C) Full Protocol-Driven Execution (High impact, high risk)
+- Redesign FLY as a strict protocol engine with structured reports and stateful iteration.
+- Add schema validation for all JSONL artifacts.
+- Build a full QA system for acceptance criteria.
+
+**Pros**: Strong correctness guarantees.
+**Cons**: Large refactor; not necessary to address current shortcomings.
+
+**Chosen**: **B) Domain-First Refactor**. It fixes current issues while keeping scope tight and allowing iterative upgrades to protocol rigor later.
+
+---
+
+## 3) Design Principles and Ubiquitous Language
+
+### Domain Language
+- **Execution Session**: the lifecycle of executing a flight plan.
+- **Execution Controller**: domain service that governs run/pause/resume, metrics, and waypoint transitions.
+- **Execution Report**: structured record emitted per waypoint attempt.
+- **Flight Test**: an input spec with expected artifacts and a validation script.
+- **Checkpoint**: a recoverable state boundary with persisted artifacts.
+
+### Boundaries
+- **TUI Screen**: render state, bind keys, dispatch domain commands.
+- **Orchestration Layer**: enforce business invariants and state transitions.
+- **Execution Engine**: run a waypoint with a protocol, return a report.
+- **Persistence Layer**: versioned artifacts, recovery, replay.
+
+---
+
+## 4) Implementation Plan (Phased)
+
+### Phase 0 — Baseline and Discovery (1-2 sessions)
+**Objective**: Confirm scope, inventory risk, and lock acceptance tests before refactor.
+
+**Tasks**
+- Map current FLY flow (screen → coordinator → executor → logs) into a call graph.
+- List all FLY entry points, side effects, and persistence paths.
+- Identify contract surfaces for extraction (inputs, outputs, invariants).
+
+**Artifacts**
+- `docs/analysis/fly-callgraph.md` (new)
+- `docs/analysis/fly-invariants.md` (new)
+
+**Acceptance Criteria**
+- All current FLY interactions documented and traceable to code.
+
+---
+
+### Phase 1 — Extract Execution Controller (Core Refactor)
+**Objective**: Remove orchestration logic from `fly.py` and centralize it in the domain layer.
+
+**Design**
+- Create `src/waypoints/orchestration/execution_controller.py`.
+- Provide a narrow interface:
+  - `start_execution()`
+  - `pause_execution()`
+  - `resume_execution()`
+  - `execute_next()`
+  - `handle_intervention()`
+- The controller owns:
+  - transitions between `FLY_READY`, `FLY_EXECUTING`, `FLY_PAUSED`, `FLY_INTERVENTION`, `LAND_REVIEW`
+  - selection of next waypoint via coordinator
+  - metrics aggregation per waypoint
+  - persistence of execution reports/logs
+
+**Tasks**
+- Move execution state transitions and waypoint selection logic into controller.
+- Keep UI-specific concerns in `fly.py` (rendering, key bindings, modal display).
+- Introduce a `ExecutionReport` data model in `src/waypoints/fly/`.
+
+**Tests (TDD)**
+- Add `tests/test_execution_controller.py` with happy-path and failure-path tests.
+- Ensure controller behavior is deterministic and easily mocked in TUI tests.
+
+**Acceptance Criteria**
+- `fly.py` no longer manages execution state transitions directly.
+- `ExecutionController` is test-covered and used by TUI.
+- All existing tests pass.
+
+---
+
+### Phase 2 — Flight Test Harness (BDD)
+**Objective**: Operationalize the documented testing strategy with L0–L2 flight tests.
+
+**Design**
+- Create structure:
+  - `flight-tests/L0-hello-world/`
+  - `flight-tests/L1-todo-cli/`
+  - `flight-tests/L2-rest-api/`
+- For each flight test:
+  - `input/idea.txt`
+  - `expected/min_files.txt`
+  - `expected/smoke_test.sh`
+  - `results/<timestamp>/` (generated)
+
+**Tasks**
+- Add a small runner in `scripts/run_flight_test.py`.
+- Document usage in `docs/testing-strategy.md`.
+
+**Tests**
+- Add `tests/test_flight_test_runner.py` to validate runner behavior.
+
+**Acceptance Criteria**
+- L0–L2 tests are runnable and repeatable locally.
+- Results are stored in timestamped directories.
+
+---
+
+### Phase 3 — Decision Records (ADR system)
+**Objective**: Capture architectural decisions in a durable, searchable format.
+
+**Design**
+- Add `docs/adr/README.md` (index).
+- Create ADRs for:
+  - FLY execution boundary extraction
+  - Flight test harness
+  - Execution report model
+
+**Acceptance Criteria**
+- ADR index linked from `docs/README.md` and `README.md`.
+
+---
+
+### Phase 4 — Reliability Polish (Targeted TODOs)
+**Objective**: Resolve remaining TODOs that affect reliability and trust.
+
+**Tasks**
+- Implement rollback in coordinator when GitService supports it (or define explicit TODO with issue ID).
+- Add `status` to `Project` model if still missing.
+- Replace prompt prefix usage in `llm/prompts/fly.py` with proper spec summary.
+
+**Acceptance Criteria**
+- All TODOs in `rg "TODO"` for core runtime are resolved or turned into tracked issues.
+
+---
+
+## 5) Acceptance Criteria (Global)
+
+- FLY orchestration is isolated in `ExecutionController` and test-covered.
+- TUI screens are thin and focused on display and user interaction.
+- L0–L2 flight tests can be executed with a single command.
+- ADRs exist and are linked from doc indexes.
+- No runtime TODOs remain untracked.
+
+---
+
+## 6) Testing Strategy Alignment
+
+**Unit**
+- `tests/test_execution_controller.py`
+- `tests/test_flight_test_runner.py`
+
+**Integration**
+- FLY screen tests should mock the controller and verify UI flow only.
+
+**BDD / Acceptance**
+- L0–L2 flight tests with smoke tests and minimal expected files.
+
+---
+
+## 7) Migration & Compatibility
+
+- Provide adapters in `fly.py` to minimize UI breakage during refactor.
+- Keep existing log formats; add new `ExecutionReport` as additive data.
+- If schema versioning is introduced, add migration in `models/schema.py`.
+
+---
+
+## 8) Work Breakdown (Issue-Oriented)
+
+1. **Execution Controller extraction**
+2. **Execution report model**
+3. **TUI FLY screen integration**
+4. **Flight test runner + L0**
+5. **L1 and L2 flight tests**
+6. **ADR system + first three ADRs**
+7. **TODO reliability fixes**
+
+---
+
+## 9) Definition of Done
+
+- New architecture reviewed, tests passing, and behavior preserved.
+- All acceptance criteria satisfied.
+- Docs updated and consistent with implementation.
+- Flight tests operational and repeatable.
+
+---
+
+## 10) Ownership and Iteration
+
+This plan is staged to keep every step testable and reviewable. Each phase is an MVP for the next: the execution controller enables better tests, the flight tests expose reliability gaps, and ADRs preserve context for future contributors.
+
+If any step requires scope expansion, create a new ADR and update the plan rather than silently extending complexity.

From ec022b4a729adb4cbe3182998723c99c742964af Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:35:11 -0500
Subject: [PATCH 2/8] feat(fly): extract execution controller

---
 src/waypoints/fly/execution_report.py         |  21 +
 src/waypoints/fly/state.py                    |  16 +
 src/waypoints/orchestration/__init__.py       |   6 +
 .../orchestration/execution_controller.py     | 336 +++++++++++++++
 src/waypoints/tui/screens/fly.py              | 395 +++++-------------
 tests/test_execution_controller.py            | 170 ++++++++
 tests/test_fly_screen.py                      |   6 +-
 7 files changed, 659 insertions(+), 291 deletions(-)
 create mode 100644 src/waypoints/fly/execution_report.py
 create mode 100644 src/waypoints/fly/state.py
 create mode 100644 src/waypoints/orchestration/execution_controller.py
 create mode 100644 tests/test_execution_controller.py

diff --git a/src/waypoints/fly/execution_report.py b/src/waypoints/fly/execution_report.py
new file mode 100644
index 0000000..5762ab4
--- /dev/null
+++ b/src/waypoints/fly/execution_report.py
@@ -0,0 +1,21 @@
+"""Execution report for a waypoint run."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+
+from waypoints.fly.executor import ExecutionResult
+
+
+@dataclass(frozen=True, slots=True)
+class ExecutionReport:
+    """Structured summary of a waypoint execution attempt."""
+
+    waypoint_id: str
+    result: ExecutionResult
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+    iterations: int | None = None
+    total_iterations: int | None = None
+    criteria_completed: set[int] = field(default_factory=set)
diff --git a/src/waypoints/fly/state.py b/src/waypoints/fly/state.py
new file mode 100644
index 0000000..9cfd1c1
--- /dev/null
+++ b/src/waypoints/fly/state.py
@@ -0,0 +1,16 @@
+"""Execution state model for the FLY phase."""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class ExecutionState(Enum):
+    """State of waypoint execution."""
+
+    IDLE = "idle"
+    RUNNING = "running"
+    PAUSE_PENDING = "pause_pending"  # Pause requested, finishing current waypoint
+    PAUSED = "paused"
+    DONE = "done"
+    INTERVENTION = "intervention"
diff --git a/src/waypoints/orchestration/__init__.py b/src/waypoints/orchestration/__init__.py
index b8fc042..319ab02 100644
--- a/src/waypoints/orchestration/__init__.py
+++ b/src/waypoints/orchestration/__init__.py
@@ -13,6 +13,10 @@
 """
 
 from waypoints.orchestration.coordinator import JourneyCoordinator
+from waypoints.orchestration.execution_controller import (
+    ExecutionController,
+    ExecutionDirective,
+)
 from waypoints.orchestration.types import (
     ChunkCallback,
     CompletionStatus,
@@ -24,6 +28,8 @@
 
 __all__ = [
     "JourneyCoordinator",
+    "ExecutionController",
+    "ExecutionDirective",
     "NextAction",
     "CompletionStatus",
     "ProgressCallback",
diff --git a/src/waypoints/orchestration/execution_controller.py b/src/waypoints/orchestration/execution_controller.py
new file mode 100644
index 0000000..c0be62a
--- /dev/null
+++ b/src/waypoints/orchestration/execution_controller.py
@@ -0,0 +1,336 @@
+"""Execution controller for FLY phase orchestration."""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Literal
+
+from waypoints.fly.execution_report import ExecutionReport
+from waypoints.fly.executor import (
+    ExecutionContext,
+    ExecutionResult,
+    WaypointExecutor,
+)
+from waypoints.fly.intervention import Intervention, InterventionAction, InterventionResult
+from waypoints.fly.state import ExecutionState
+from waypoints.models import JourneyState, Waypoint, WaypointStatus
+from waypoints.orchestration.coordinator import JourneyCoordinator
+
+if TYPE_CHECKING:
+    from waypoints.llm.metrics import MetricsCollector
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True, slots=True)
+class ExecutionDirective:
+    """Directive returned by the controller after an execution event."""
+
+    action: Literal["execute", "pause", "intervention", "land", "noop"]
+    waypoint: Waypoint | None = None
+    message: str | None = None
+    completed: Waypoint | None = None
+
+
+class ExecutionController:
+    """Orchestrates execution logic for the FLY phase."""
+
+    def __init__(self, coordinator: JourneyCoordinator) -> None:
+        self.coordinator = coordinator
+        self.execution_state = ExecutionState.IDLE
+        self._current_intervention: Intervention | None = None
+        self._additional_iterations = 0
+        self._execution_started_at: datetime | None = None
+        self.last_report: ExecutionReport | None = None
+
+    @property
+    def current_waypoint(self) -> Waypoint | None:
+        """Get the current waypoint from the coordinator."""
+        return self.coordinator.current_waypoint
+
+    @current_waypoint.setter
+    def current_waypoint(self, waypoint: Waypoint | None) -> None:
+        """Set the current waypoint on the coordinator."""
+        self.coordinator.current_waypoint = waypoint
+
+    def initialize(self) -> None:
+        """Initialize execution state on screen mount."""
+        self.coordinator.reset_stale_in_progress()
+        self.select_next_waypoint(include_in_progress=True)
+
+    def select_next_waypoint(self, include_in_progress: bool = False) -> Waypoint | None:
+        """Select the next eligible waypoint and update execution state."""
+        wp = self.coordinator.select_next_waypoint(include_failed=include_in_progress)
+        if wp:
+            return wp
+
+        status = self.coordinator.get_completion_status()
+        pending = status.pending + status.in_progress
+
+        if status.all_complete:
+            self.execution_state = ExecutionState.DONE
+        elif status.blocked > 0 or pending > 0 or status.failed > 0:
+            self.execution_state = ExecutionState.PAUSED
+        else:
+            self.execution_state = ExecutionState.PAUSED
+
+        return None
+
+    def start(self, selected_waypoint: Waypoint | None) -> ExecutionDirective:
+        """Start or resume execution."""
+        if selected_waypoint and selected_waypoint.status == WaypointStatus.FAILED:
+            selected_waypoint.status = WaypointStatus.PENDING
+            self.coordinator.save_flight_plan()
+            self.current_waypoint = selected_waypoint
+            self._transition_to_executing()
+            self.execution_state = ExecutionState.RUNNING
+            return ExecutionDirective(
+                action="execute",
+                waypoint=selected_waypoint,
+                message=f"Retrying {selected_waypoint.id}",
+            )
+
+        if self.execution_state == ExecutionState.DONE:
+            status = self.coordinator.get_completion_status()
+            if status.failed > 0 or status.blocked > 0:
+                return ExecutionDirective(
+                    action="noop",
+                    message="Select a failed waypoint and press 'r' to retry",
+                )
+            return ExecutionDirective(action="noop", message="All waypoints complete!")
+
+        if self.execution_state == ExecutionState.PAUSED:
+            self.select_next_waypoint(include_in_progress=True)
+            if not self.current_waypoint:
+                status = self.coordinator.get_completion_status()
+                if status.failed > 0:
+                    return ExecutionDirective(
+                        action="noop",
+                        message="Select a failed waypoint and press 'r' to retry",
+                    )
+                return ExecutionDirective(action="noop", message="No waypoints to resume")
+            self._transition_to_executing()
+            self.execution_state = ExecutionState.RUNNING
+            return ExecutionDirective(action="execute", waypoint=self.current_waypoint)
+
+        if not self.current_waypoint:
+            self.select_next_waypoint()
+            if not self.current_waypoint:
+                return ExecutionDirective(
+                    action="noop", message="No waypoints ready to execute"
+                )
+
+        self._transition_to_executing()
+        self.execution_state = ExecutionState.RUNNING
+        return ExecutionDirective(action="execute", waypoint=self.current_waypoint)
+
+    def request_pause(self) -> bool:
+        """Request pause after current waypoint."""
+        if self.execution_state != ExecutionState.RUNNING:
+            return False
+        self.execution_state = ExecutionState.PAUSE_PENDING
+        return True
+
+    def build_executor(
+        self,
+        *,
+        waypoint: Waypoint,
+        spec: str,
+        on_progress: Callable[[ExecutionContext], None] | None,
+        max_iterations: int,
+        metrics_collector: "MetricsCollector | None",
+        host_validations_enabled: bool,
+    ) -> WaypointExecutor:
+        """Create a WaypointExecutor and mark waypoint as in progress."""
+        waypoint.status = WaypointStatus.IN_PROGRESS
+        self.coordinator.save_flight_plan()
+        self._execution_started_at = datetime.now(UTC)
+
+        total_iterations = max_iterations + self.consume_additional_iterations()
+        return WaypointExecutor(
+            project=self.coordinator.project,
+            waypoint=waypoint,
+            spec=spec,
+            on_progress=on_progress,
+            max_iterations=total_iterations,
+            metrics_collector=metrics_collector,
+            host_validations_enabled=host_validations_enabled,
+        )
+
+    def handle_execution_result(
+        self, result: ExecutionResult | None
+    ) -> ExecutionDirective:
+        """Handle execution result and update state."""
+        waypoint = self.current_waypoint
+        completed_at = datetime.now(UTC)
+        normalized = result or ExecutionResult.FAILED
+
+        if waypoint:
+            self.last_report = ExecutionReport(
+                waypoint_id=waypoint.id,
+                result=normalized,
+                started_at=self._execution_started_at,
+                completed_at=completed_at,
+            )
+
+        if normalized == ExecutionResult.SUCCESS:
+            if waypoint:
+                waypoint.status = WaypointStatus.COMPLETE
+                waypoint.completed_at = completed_at
+                self.coordinator.save_flight_plan()
+                self.coordinator.check_parent_completion(waypoint)
+
+            if self.execution_state == ExecutionState.PAUSE_PENDING:
+                self.coordinator.transition(JourneyState.FLY_PAUSED)
+                self.execution_state = ExecutionState.PAUSED
+                return ExecutionDirective(
+                    action="pause", waypoint=waypoint, completed=waypoint
+                )
+
+            if self.execution_state == ExecutionState.RUNNING:
+                next_wp = self.select_next_waypoint()
+                if next_wp:
+                    return ExecutionDirective(
+                        action="execute",
+                        waypoint=next_wp,
+                        completed=waypoint,
+                    )
+                if self.execution_state == ExecutionState.DONE:
+                    self.coordinator.transition(JourneyState.LAND_REVIEW)
+                    return ExecutionDirective(action="land", completed=waypoint)
+                return ExecutionDirective(action="pause", completed=waypoint)
+
+            return ExecutionDirective(action="noop", completed=waypoint)
+
+        if normalized == ExecutionResult.CANCELLED:
+            self.coordinator.transition(JourneyState.FLY_PAUSED)
+            self.execution_state = ExecutionState.PAUSED
+            return ExecutionDirective(action="pause", waypoint=waypoint)
+
+        if normalized in (
+            ExecutionResult.INTERVENTION_NEEDED,
+            ExecutionResult.MAX_ITERATIONS,
+            ExecutionResult.FAILED,
+        ):
+            self._mark_waypoint_failed()
+            self.coordinator.transition(JourneyState.FLY_INTERVENTION)
+            self.execution_state = ExecutionState.INTERVENTION
+            message = self._result_message(normalized)
+            return ExecutionDirective(
+                action="intervention", waypoint=waypoint, message=message
+            )
+
+        self._mark_waypoint_failed()
+        self.coordinator.transition(JourneyState.FLY_INTERVENTION)
+        self.execution_state = ExecutionState.INTERVENTION
+        return ExecutionDirective(
+            action="intervention",
+            waypoint=waypoint,
+            message="Waypoint execution failed",
+        )
+
+    def prepare_intervention(self, intervention: Intervention) -> ExecutionDirective:
+        """Record an intervention and transition state."""
+        self._current_intervention = intervention
+        self._mark_waypoint_failed()
+        self.coordinator.transition(JourneyState.FLY_INTERVENTION)
+        self.execution_state = ExecutionState.INTERVENTION
+        return ExecutionDirective(
+            action="intervention",
+            waypoint=intervention.waypoint,
+            message=intervention.error_summary,
+        )
+
+    def resolve_intervention(
+        self, result: InterventionResult | None
+    ) -> ExecutionDirective:
+        """Resolve an intervention and return next directive."""
+        if result is None:
+            return ExecutionDirective(action="noop", message="Intervention cancelled")
+
+        if not self._current_intervention:
+            return ExecutionDirective(action="noop", message="No intervention to resolve")
+
+        waypoint = self._current_intervention.waypoint
+
+        if result.action == InterventionAction.RETRY:
+            self._additional_iterations = result.additional_iterations
+            waypoint.status = WaypointStatus.IN_PROGRESS
+            self.coordinator.save_flight_plan()
+            self.coordinator.transition(JourneyState.FLY_EXECUTING)
+            self.execution_state = ExecutionState.RUNNING
+            self._current_intervention = None
+            return ExecutionDirective(action="execute", waypoint=waypoint)
+
+        if result.action == InterventionAction.SKIP:
+            waypoint.status = WaypointStatus.SKIPPED
+            self.coordinator.save_flight_plan()
+            self.coordinator.transition(JourneyState.FLY_PAUSED)
+            self.coordinator.transition(JourneyState.FLY_EXECUTING)
+            self.execution_state = ExecutionState.RUNNING
+            next_wp = self.select_next_waypoint()
+            self._current_intervention = None
+            if next_wp:
+                return ExecutionDirective(action="execute", waypoint=next_wp)
+            if self.execution_state == ExecutionState.DONE:
+                self.coordinator.transition(JourneyState.LAND_REVIEW)
+                return ExecutionDirective(action="land")
+            return ExecutionDirective(action="pause")
+
+        if result.action == InterventionAction.EDIT:
+            self.coordinator.transition(JourneyState.FLY_PAUSED)
+            self.execution_state = ExecutionState.PAUSED
+            self._current_intervention = None
+            return ExecutionDirective(
+                action="pause",
+                message="Edit waypoint in flight plan, then retry",
+            )
+
+        if result.action == InterventionAction.ROLLBACK:
+            self.coordinator.transition(JourneyState.FLY_PAUSED)
+            self.coordinator.transition(JourneyState.FLY_READY)
+            self.execution_state = ExecutionState.IDLE
+            self._current_intervention = None
+            return ExecutionDirective(action="noop", message="Rollback requested")
+
+        if result.action == InterventionAction.ABORT:
+            self.coordinator.transition(JourneyState.FLY_PAUSED)
+            self.execution_state = ExecutionState.PAUSED
+            self._current_intervention = None
+            return ExecutionDirective(action="pause", message="Execution aborted")
+
+        self._current_intervention = None
+        return ExecutionDirective(action="pause")
+
+    def consume_additional_iterations(self) -> int:
+        """Consume additional iterations requested during intervention."""
+        extra = self._additional_iterations
+        self._additional_iterations = 0
+        return extra
+
+    def _transition_to_executing(self) -> None:
+        journey = self.coordinator.project.journey
+        if journey and journey.state in (
+            JourneyState.CHART_REVIEW,
+            JourneyState.LAND_REVIEW,
+        ):
+            self.coordinator.transition(JourneyState.FLY_READY)
+        self.coordinator.transition(JourneyState.FLY_EXECUTING)
+
+    def _mark_waypoint_failed(self) -> None:
+        if self.current_waypoint:
+            self.current_waypoint.status = WaypointStatus.FAILED
+            self.coordinator.save_flight_plan()
+
+    @staticmethod
+    def _result_message(result: ExecutionResult) -> str:
+        if result == ExecutionResult.INTERVENTION_NEEDED:
+            return "Human intervention needed"
+        if result == ExecutionResult.MAX_ITERATIONS:
+            return "Max iterations reached"
+        if result == ExecutionResult.FAILED:
+            return "Execution failed"
+        return "Execution failed"
diff --git a/src/waypoints/tui/screens/fly.py b/src/waypoints/tui/screens/fly.py
index deccd50..62dcfbe 100644
--- a/src/waypoints/tui/screens/fly.py
+++ b/src/waypoints/tui/screens/fly.py
@@ -4,7 +4,6 @@
 import re
 import subprocess
 from datetime import UTC, datetime
-from enum import Enum
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
 
@@ -23,9 +22,7 @@
     from waypoints.git.receipt import ChecklistReceipt
     from waypoints.tui.app import WaypointsApp
 
-from waypoints.fly.execution_log import (
-    ExecutionLog as ExecLogType,
-)
+from waypoints.fly.execution_log import ExecutionLog as ExecLogType
 from waypoints.fly.execution_log import (
     ExecutionLogReader,
 )
@@ -41,11 +38,12 @@
     InterventionNeededError,
     InterventionResult,
 )
+from waypoints.fly.state import ExecutionState
 from waypoints.git import GitConfig, GitService, ReceiptValidator
 from waypoints.models import JourneyState, Project
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.waypoint import Waypoint, WaypointStatus
-from waypoints.orchestration import JourneyCoordinator
+from waypoints.orchestration import ExecutionController, JourneyCoordinator
 from waypoints.tui.screens.intervention import InterventionModal
 from waypoints.tui.utils import (
     format_token_count,
@@ -95,17 +93,6 @@ def _format_project_metrics(
     return " · ".join(parts) if parts else ""
 
 
-class ExecutionState(Enum):
-    """State of waypoint execution."""
-
-    IDLE = "idle"
-    RUNNING = "running"
-    PAUSE_PENDING = "pause_pending"  # Pause requested, finishing current waypoint
-    PAUSED = "paused"
-    DONE = "done"
-    INTERVENTION = "intervention"
-
-
 # Regex patterns for markdown
 CODE_BLOCK_PATTERN = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL)
 BOLD_PATTERN = re.compile(r"\*\*(.+?)\*\*")
@@ -1256,10 +1243,9 @@ def __init__(
             project=project,
             flight_plan=flight_plan,
         )
+        self.execution_controller = ExecutionController(self.coordinator)
 
         self._executor: WaypointExecutor | None = None
-        self._current_intervention: Intervention | None = None
-        self._additional_iterations: int = 0
 
         # Timer tracking
         self._execution_start: datetime | None = None
@@ -1276,12 +1262,12 @@ def waypoints_app(self) -> "WaypointsApp":
     @property
     def current_waypoint(self) -> Waypoint | None:
         """Get the currently selected waypoint (delegated to coordinator)."""
-        return self.coordinator.current_waypoint
+        return self.execution_controller.current_waypoint
 
     @current_waypoint.setter
     def current_waypoint(self, waypoint: Waypoint | None) -> None:
         """Set the currently selected waypoint (delegated to coordinator)."""
-        self.coordinator.current_waypoint = waypoint
+        self.execution_controller.current_waypoint = waypoint
 
     def compose(self) -> ComposeResult:
         yield StatusHeader()
@@ -1313,14 +1299,17 @@ def on_mount(self) -> None:
         # Reflect initial state in status bar
         self._update_status_bar(self.execution_state)
 
-        # Clean up stale IN_PROGRESS from previous sessions (via coordinator)
-        self.coordinator.reset_stale_in_progress()
+        # Clean up stale IN_PROGRESS from previous sessions and select next waypoint
+        self.execution_controller.initialize()
 
         # Update waypoint list with cost data
         self._refresh_waypoint_list()
 
-        # Select resumable waypoint (failed/in-progress) or first pending
-        self._select_next_waypoint(include_in_progress=True)
+        # Sync UI with selected waypoint (if any)
+        self._sync_current_waypoint_details()
+
+        # Sync execution state after initialization
+        self.execution_state = self.execution_controller.execution_state
 
         # Update status bar with initial state (watcher doesn't fire on mount)
         self._update_status_bar(self.execution_state)
@@ -1424,6 +1413,24 @@ def _get_waypoint_tokens(self, waypoint_id: str) -> tuple[int, int] | None:
             return tokens_by_waypoint.get(waypoint_id)
         return None
 
+    def _sync_current_waypoint_details(
+        self, active_waypoint_id: str | None = None
+    ) -> None:
+        """Sync the detail panel with the current waypoint."""
+        if not self.current_waypoint:
+            return
+
+        detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
+        cost = self._get_waypoint_cost(self.current_waypoint.id)
+        tokens = self._get_waypoint_tokens(self.current_waypoint.id)
+        detail_panel.show_waypoint(
+            self.current_waypoint,
+            project=self.project,
+            active_waypoint_id=active_waypoint_id,
+            cost=cost,
+            tokens=tokens,
+        )
+
     def _get_completion_status(self) -> tuple[bool, int, int, int]:
         """Analyze waypoint completion status.
 
@@ -1449,40 +1456,19 @@ def _select_next_waypoint(self, include_in_progress: bool = False) -> None:
             "=== Selection round (include_in_progress=%s) ===", include_in_progress
         )
 
-        # Delegate selection to coordinator
-        wp = self.coordinator.select_next_waypoint(include_failed=include_in_progress)
+        # Delegate selection to execution controller
+        wp = self.execution_controller.select_next_waypoint(
+            include_in_progress=include_in_progress
+        )
 
         if wp:
             # Waypoint selected - update UI
-            logger.info("SELECTED %s via coordinator", wp.id)
-            detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
-            cost = self._get_waypoint_cost(wp.id)
-            tokens = self._get_waypoint_tokens(wp.id)
-            detail_panel.show_waypoint(
-                wp,
-                project=self.project,
-                active_waypoint_id=None,
-                cost=cost,
-                tokens=tokens,
-            )
+            logger.info("SELECTED %s via execution controller", wp.id)
+            self._sync_current_waypoint_details()
             return
 
-        # No eligible waypoints found - check why
-        all_complete, pending, failed, blocked = self._get_completion_status()
-
-        if all_complete:
-            logger.info("All waypoints complete - DONE")
-            self.execution_state = ExecutionState.DONE
-        elif blocked > 0:
-            logger.info("Waypoints blocked by %d failed waypoint(s)", failed)
-            self.execution_state = ExecutionState.PAUSED
-        elif pending > 0:
-            logger.info("%d waypoints pending with unmet dependencies", pending)
-            self.execution_state = ExecutionState.PAUSED
-        else:
-            # Only failed waypoints remain
-            logger.info("Only failed waypoints remain (%d)", failed)
-            self.execution_state = ExecutionState.PAUSED
+        # No eligible waypoints found - sync state from controller
+        self.execution_state = self.execution_controller.execution_state
 
     def _get_state_message(self, state: ExecutionState) -> str:
         """Get the status bar message for a given execution state."""
@@ -1611,89 +1597,20 @@ def watch_execution_state(self, state: ExecutionState) -> None:
 
     def action_start(self) -> None:
         """Start or resume waypoint execution."""
-        # Check if user has selected a specific failed waypoint to retry
         list_panel = self.query_one("#waypoint-list", WaypointListPanel)
         selected = list_panel.selected_waypoint
 
-        if selected and selected.status == WaypointStatus.FAILED:
-            # User wants to retry this specific failed waypoint
-            selected.status = WaypointStatus.PENDING
-            self._save_flight_plan()
-            self._refresh_waypoint_list()
-            self.current_waypoint = selected
-            self.notify(f"Retrying {selected.id}")
-
-            # Update detail panel to show this waypoint
-            detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
-            cost = self._get_waypoint_cost(selected.id)
-            tokens = self._get_waypoint_tokens(selected.id)
-            detail_panel.show_waypoint(
-                selected,
-                project=self.project,
-                active_waypoint_id=None,
-                cost=cost,
-                tokens=tokens,
-            )
-
-            # Transition journey state and execute
-            journey = self.project.journey
-            if journey and journey.state in (
-                JourneyState.FLY_PAUSED,
-                JourneyState.FLY_INTERVENTION,
-            ):
-                self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            elif journey and journey.state == JourneyState.CHART_REVIEW:
-                self.coordinator.transition(JourneyState.FLY_READY)
-                self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            else:
-                self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            self.execution_state = ExecutionState.RUNNING
-            self._execute_current_waypoint()
-            return
+        directive = self.execution_controller.start(selected)
+        if directive.message:
+            self.notify(directive.message)
 
-        if self.execution_state == ExecutionState.DONE:
-            # Check if there are actually failed waypoints to retry
-            _, _, failed, blocked = self._get_completion_status()
-            if failed > 0 or blocked > 0:
-                self.notify("Select a failed waypoint and press 'r' to retry")
-            else:
-                self.notify("All waypoints complete!")
-            return
+        self.execution_state = self.execution_controller.execution_state
 
-        # Handle resume from paused state
-        if self.execution_state == ExecutionState.PAUSED:
-            # Find waypoint to resume (in_progress first, then pending)
-            self._select_next_waypoint(include_in_progress=True)
-            if not self.current_waypoint:
-                # Check if there are failed waypoints user could retry
-                _, _, failed, blocked = self._get_completion_status()
-                if failed > 0:
-                    self.notify("Select a failed waypoint and press 'r' to retry")
-                else:
-                    self.notify("No waypoints to resume")
-                return
-            # Transition journey state: FLY_PAUSED -> FLY_EXECUTING
-            self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            self.execution_state = ExecutionState.RUNNING
-            self._execute_current_waypoint()
+        if directive.action != "execute":
             return
 
-        if not self.current_waypoint:
-            self._select_next_waypoint()
-            if not self.current_waypoint:
-                self.notify("No waypoints ready to execute")
-                return
-
-        # Transition journey state to FLY_EXECUTING
-        # Handle case where we came from Chart or Land (CHART_REVIEW/LAND_REVIEW)
-        journey = self.project.journey
-        if journey and journey.state in (
-            JourneyState.CHART_REVIEW,
-            JourneyState.LAND_REVIEW,
-        ):
-            self.coordinator.transition(JourneyState.FLY_READY)
-        self.coordinator.transition(JourneyState.FLY_EXECUTING)
-        self.execution_state = ExecutionState.RUNNING
+        self._refresh_waypoint_list()
+        self._sync_current_waypoint_details()
         self._execute_current_waypoint()
 
     def action_toggle_host_validations(self) -> None:
@@ -1714,8 +1631,8 @@ def action_toggle_host_validations(self) -> None:
 
     def action_pause(self) -> None:
         """Pause execution after current waypoint."""
-        if self.execution_state == ExecutionState.RUNNING:
-            self.execution_state = ExecutionState.PAUSE_PENDING
+        if self.execution_controller.request_pause():
+            self.execution_state = self.execution_controller.execution_state
             if self._executor:
                 self._executor.cancel()
                 # Log pause request
@@ -1815,10 +1732,6 @@ def _execute_current_waypoint(self) -> None:
         detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
         log = detail_panel.execution_log
 
-        # Update status to IN_PROGRESS
-        self.current_waypoint.status = WaypointStatus.IN_PROGRESS
-        self._save_flight_plan()
-
         # Mark this as the active waypoint for output tracking
         detail_panel._showing_output_for = self.current_waypoint.id
 
@@ -1839,16 +1752,12 @@ def _execute_current_waypoint(self) -> None:
         # Calculate max iterations (default + any additional from retry)
         from waypoints.fly.executor import MAX_ITERATIONS
 
-        max_iters = MAX_ITERATIONS + self._additional_iterations
-        self._additional_iterations = 0  # Reset for next execution
-
         # Create executor with progress callback
-        self._executor = WaypointExecutor(
-            project=self.project,
+        self._executor = self.execution_controller.build_executor(
             waypoint=self.current_waypoint,
             spec=self.spec,
             on_progress=self._on_execution_progress,
-            max_iterations=max_iters,
+            max_iterations=MAX_ITERATIONS,
             metrics_collector=self.waypoints_app.metrics_collector,
             host_validations_enabled=self.waypoints_app.host_validations_enabled,
         )
@@ -1973,98 +1882,64 @@ def _handle_execution_result(self, result: ExecutionResult | None) -> None:
         detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
         log = detail_panel.execution_log
 
+        completed_waypoint = self.current_waypoint
+
         # Update header cost display after execution
         self.waypoints_app.update_header_cost()
 
         # Update project metrics (cost and time) after execution
         self._update_project_metrics()
 
-        if self.current_waypoint:
-            cost = self._get_waypoint_cost(self.current_waypoint.id)
-            tokens = self._get_waypoint_tokens(self.current_waypoint.id)
+        if completed_waypoint:
+            cost = self._get_waypoint_cost(completed_waypoint.id)
+            tokens = self._get_waypoint_tokens(completed_waypoint.id)
             detail_panel.update_metrics(cost, tokens)
 
-        if result == ExecutionResult.SUCCESS:
-            # Mark complete
-            if self.current_waypoint:
-                self.current_waypoint.status = WaypointStatus.COMPLETE
-                self.current_waypoint.completed_at = datetime.now(UTC)
-                log.log_success(f"Waypoint {self.current_waypoint.id} complete!")
-
-                self._live_criteria_completed = (
-                    ExecutionLogReader.get_completed_criteria(
-                        self.project,
-                        self.current_waypoint.id,
-                    )
-                )
+        directive = self.execution_controller.handle_execution_result(result)
+        self.execution_state = self.execution_controller.execution_state
 
-                # Show verification summary
-                self._log_verification_summary(self.current_waypoint, log)
+        if directive.completed:
+            log.log_success(f"Waypoint {directive.completed.id} complete!")
 
-                # Check if parent epic should be auto-completed
-                self._check_parent_completion(self.current_waypoint)
+            self._live_criteria_completed = ExecutionLogReader.get_completed_criteria(
+                self.project,
+                directive.completed.id,
+            )
 
-                self._save_flight_plan()
+            # Show verification summary
+            self._log_verification_summary(directive.completed, log)
 
-                # Commit waypoint completion (validates receipt first)
-                self._commit_waypoint(self.current_waypoint)
+            # Commit waypoint completion (validates receipt first)
+            self._commit_waypoint(directive.completed)
 
-                # Reset live criteria tracking for next waypoint
-                self._live_criteria_completed = set()
+            # Reset live criteria tracking for next waypoint
+            self._live_criteria_completed = set()
 
             detail_panel.clear_iteration()
             self._refresh_waypoint_list()
 
-            # Move to next waypoint if not paused/pausing
-            if self.execution_state == ExecutionState.RUNNING:
-                self._select_next_waypoint()
-                if self.current_waypoint:
-                    self._execute_current_waypoint()
-                else:
-                    # _select_next_waypoint sets execution_state appropriately
-                    # Only transition to LAND_REVIEW if truly all complete
-                    # (state is DONE). mypy doesn't track state modification.
-                    if self.execution_state == ExecutionState.DONE:  # type: ignore[comparison-overlap]
-                        self.coordinator.transition(JourneyState.LAND_REVIEW)
-                        self._switch_to_land_screen()
-            elif self.execution_state == ExecutionState.PAUSE_PENDING:
-                # Pause was requested, now actually pause
-                # Transition journey state: FLY_EXECUTING -> FLY_PAUSED
-                self.coordinator.transition(JourneyState.FLY_PAUSED)
-                self.execution_state = ExecutionState.PAUSED
+        if directive.action == "execute":
+            self._sync_current_waypoint_details()
+            self._execute_current_waypoint()
+            return
 
-        elif result == ExecutionResult.INTERVENTION_NEEDED:
-            log.log_error("Human intervention needed")
-            self._mark_waypoint_failed()
-            # Transition journey state: FLY_EXECUTING -> FLY_INTERVENTION
-            self.coordinator.transition(JourneyState.FLY_INTERVENTION)
-            self.execution_state = ExecutionState.INTERVENTION
-            self.query_one(StatusHeader).set_error()
-            self.notify("Waypoint needs human intervention", severity="warning")
-
-        elif result == ExecutionResult.MAX_ITERATIONS:
-            log.log_error("Max iterations reached without completion")
-            self._mark_waypoint_failed()
-            # Transition journey state: FLY_EXECUTING -> FLY_INTERVENTION
-            self.coordinator.transition(JourneyState.FLY_INTERVENTION)
-            self.execution_state = ExecutionState.INTERVENTION
-            self.query_one(StatusHeader).set_error()
-            self.notify("Max iterations reached", severity="error")
-
-        elif result == ExecutionResult.CANCELLED:
-            log.write_log("Execution cancelled")
-            # Transition journey state: FLY_EXECUTING -> FLY_PAUSED
-            self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.execution_state = ExecutionState.PAUSED
-
-        else:  # FAILED or None
-            log.log_error("Execution failed")
-            self._mark_waypoint_failed()
-            # Transition journey state: FLY_EXECUTING -> FLY_INTERVENTION
-            self.coordinator.transition(JourneyState.FLY_INTERVENTION)
-            self.execution_state = ExecutionState.INTERVENTION
+        if directive.action == "land":
+            self._switch_to_land_screen()
+            return
+
+        if directive.action == "intervention":
+            log.log_error(directive.message or "Human intervention needed")
             self.query_one(StatusHeader).set_error()
-            self.notify("Waypoint execution failed", severity="error")
+            if directive.message:
+                self.notify(directive.message, severity="warning")
+            self._refresh_waypoint_list()
+            return
+
+        if directive.action == "pause":
+            if result == ExecutionResult.CANCELLED:
+                log.write_log("Execution cancelled")
+            self._refresh_waypoint_list()
+            return
 
     def _handle_intervention(self, intervention: Intervention) -> None:
         """Handle an intervention request by showing the modal."""
@@ -2076,15 +1951,9 @@ def _handle_intervention(self, intervention: Intervention) -> None:
         log.log_error(f"Intervention needed: {type_label}")
         log.write_log(intervention.error_summary[:500])
 
-        # Store the intervention for retry handling
-        self._current_intervention = intervention
-
-        # Mark waypoint as failed (can be retried)
-        self._mark_waypoint_failed()
-
-        # Transition journey state: FLY_EXECUTING -> FLY_INTERVENTION
-        self.coordinator.transition(JourneyState.FLY_INTERVENTION)
-        self.execution_state = ExecutionState.INTERVENTION
+        # Record the intervention and update state
+        self.execution_controller.prepare_intervention(intervention)
+        self.execution_state = self.execution_controller.execution_state
         self.query_one(StatusHeader).set_error()
 
         # Show the intervention modal
@@ -2118,82 +1987,38 @@ def _on_intervention_result(self, result: InterventionResult | None) -> None:
             )
 
         if result.action == InterventionAction.RETRY:
-            # Retry with additional iterations
             log.write_log(
                 f"Retrying with {result.additional_iterations} additional iterations"
             )
-            self._additional_iterations = result.additional_iterations
-
-            # Reset waypoint status for retry
-            if self.current_waypoint:
-                self.current_waypoint.status = WaypointStatus.IN_PROGRESS
-                self._save_flight_plan()
-                self._refresh_waypoint_list()
-
-            # Transition: FLY_INTERVENTION -> FLY_EXECUTING
-            self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            self.execution_state = ExecutionState.RUNNING
-            self.query_one(StatusHeader).set_normal()
-            self._execute_current_waypoint()
-
         elif result.action == InterventionAction.SKIP:
-            # Skip this waypoint and move to next
             log.write_log("Skipping waypoint")
-            if self.current_waypoint:
-                self.current_waypoint.status = WaypointStatus.SKIPPED
-                self._save_flight_plan()
-                self._refresh_waypoint_list()
-
-            # Transition: FLY_INTERVENTION -> FLY_PAUSED -> FLY_EXECUTING
-            self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.coordinator.transition(JourneyState.FLY_EXECUTING)
-            self.execution_state = ExecutionState.RUNNING
-            self.query_one(StatusHeader).set_normal()
-            self._select_next_waypoint()
-            if self.current_waypoint:
-                self._execute_current_waypoint()
-            else:
-                # _select_next_waypoint sets execution_state appropriately
-                # Only transition to LAND_REVIEW and notify if truly all complete
-                if self.execution_state == ExecutionState.DONE:
-                    self.coordinator.transition(JourneyState.LAND_REVIEW)
-                    self.notify("All waypoints complete!")
-                    self._switch_to_land_screen()
-
         elif result.action == InterventionAction.EDIT:
-            # Open waypoint editor - for now, just notify
             log.write_log("Edit waypoint requested")
             self.notify(
                 "Edit waypoint in flight plan, then press 'r' to retry",
                 severity="information",
             )
-            # Stay in intervention state until user edits and retries
-            # Transition: FLY_INTERVENTION -> FLY_PAUSED
-            self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.execution_state = ExecutionState.PAUSED
-            self.query_one(StatusHeader).set_normal()
-
         elif result.action == InterventionAction.ROLLBACK:
-            # Rollback to last safe tag
             log.write_log("Rolling back to last safe tag")
             self._rollback_to_safe_tag(result.rollback_tag)
-            # Transition: FLY_INTERVENTION -> FLY_READY
-            self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.coordinator.transition(JourneyState.FLY_READY)
-            self.execution_state = ExecutionState.IDLE
-            self.query_one(StatusHeader).set_normal()
-
         elif result.action == InterventionAction.ABORT:
-            # Abort execution
             log.write_log("Execution aborted")
-            # Transition: FLY_INTERVENTION -> FLY_PAUSED
-            self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.execution_state = ExecutionState.PAUSED
-            self.query_one(StatusHeader).set_normal()
             self.notify("Execution aborted")
 
-        # Clear the current intervention
-        self._current_intervention = None
+        directive = self.execution_controller.resolve_intervention(result)
+        if directive.message:
+            self.notify(directive.message)
+        self.execution_state = self.execution_controller.execution_state
+        self.query_one(StatusHeader).set_normal()
+
+        self._refresh_waypoint_list()
+
+        if directive.action == "execute":
+            self._sync_current_waypoint_details()
+            self._execute_current_waypoint()
+        elif directive.action == "land":
+            self.notify("All waypoints complete!")
+            self._switch_to_land_screen()
 
     def _rollback_to_safe_tag(self, tag: str | None) -> None:
         """Rollback git to the specified tag or find the last safe one."""
@@ -2231,14 +2056,6 @@ def _rollback_to_safe_tag(self, tag: str | None) -> None:
         else:
             self.notify(f"Rollback failed: {result.message}", severity="error")
 
-    def _mark_waypoint_failed(self) -> None:
-        """Mark the current waypoint as failed and save."""
-        if self.current_waypoint:
-            self.current_waypoint.status = WaypointStatus.FAILED
-            self._save_flight_plan()
-            # Update the tree display
-            self._refresh_waypoint_list()
-
     def _refresh_waypoint_list(
         self, execution_state: ExecutionState | None = None
     ) -> None:
diff --git a/tests/test_execution_controller.py b/tests/test_execution_controller.py
new file mode 100644
index 0000000..6e5c168
--- /dev/null
+++ b/tests/test_execution_controller.py
@@ -0,0 +1,170 @@
+"""Tests for ExecutionController behavior."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from waypoints.fly.executor import ExecutionResult
+from waypoints.fly.intervention import Intervention, InterventionAction, InterventionResult, InterventionType
+from waypoints.fly.state import ExecutionState
+from waypoints.models.flight_plan import FlightPlan
+from waypoints.models.journey import Journey, JourneyState
+from waypoints.models.waypoint import Waypoint, WaypointStatus
+from waypoints.orchestration import ExecutionController, JourneyCoordinator
+
+
+class DummyProject:
+    """Minimal project stub for execution controller tests."""
+
+    def __init__(self, *, journey_state: JourneyState) -> None:
+        self.slug = "test-project"
+        self.journey = Journey(state=journey_state, project_slug=self.slug)
+
+    def get_path(self) -> Path:
+        return Path("/tmp/test-project")
+
+    def save(self) -> None:
+        return None
+
+    def transition_journey(self, target: JourneyState, reason: str | None = None) -> None:
+        if self.journey is None:
+            self.journey = Journey.new(self.slug)
+        self.journey = self.journey.transition(target, reason=reason)
+
+
+def make_controller(
+    *,
+    journey_state: JourneyState,
+    flight_plan: FlightPlan,
+    current_waypoint: Waypoint | None = None,
+) -> ExecutionController:
+    project = DummyProject(journey_state=journey_state)
+    coordinator = JourneyCoordinator(project=project, flight_plan=flight_plan)
+    coordinator.current_waypoint = current_waypoint
+    controller = ExecutionController(coordinator)
+    return controller
+
+
+def test_start_retries_failed_selected() -> None:
+    flight_plan = FlightPlan()
+    failed = Waypoint(
+        id="WP-001",
+        title="Failed",
+        objective="Fix it",
+        status=WaypointStatus.FAILED,
+    )
+    flight_plan.add_waypoint(failed)
+
+    controller = make_controller(
+        journey_state=JourneyState.FLY_PAUSED,
+        flight_plan=flight_plan,
+        current_waypoint=failed,
+    )
+
+    directive = controller.start(failed)
+
+    assert directive.action == "execute"
+    assert controller.execution_state == ExecutionState.RUNNING
+    assert failed.status == WaypointStatus.PENDING
+    assert controller.coordinator.project.journey.state == JourneyState.FLY_EXECUTING
+
+
+def test_handle_success_transitions_to_land() -> None:
+    flight_plan = FlightPlan()
+    waypoint = Waypoint(
+        id="WP-001",
+        title="Only",
+        objective="Complete",
+        status=WaypointStatus.PENDING,
+    )
+    flight_plan.add_waypoint(waypoint)
+
+    controller = make_controller(
+        journey_state=JourneyState.FLY_EXECUTING,
+        flight_plan=flight_plan,
+        current_waypoint=waypoint,
+    )
+    controller.execution_state = ExecutionState.RUNNING
+
+    directive = controller.handle_execution_result(ExecutionResult.SUCCESS)
+
+    assert directive.action == "land"
+    assert waypoint.status == WaypointStatus.COMPLETE
+    assert waypoint.completed_at is not None
+    assert controller.coordinator.project.journey.state == JourneyState.LAND_REVIEW
+
+
+def test_prepare_intervention_marks_failed() -> None:
+    flight_plan = FlightPlan()
+    waypoint = Waypoint(
+        id="WP-002",
+        title="Needs help",
+        objective="Intervene",
+        status=WaypointStatus.PENDING,
+    )
+    flight_plan.add_waypoint(waypoint)
+
+    controller = make_controller(
+        journey_state=JourneyState.FLY_EXECUTING,
+        flight_plan=flight_plan,
+        current_waypoint=waypoint,
+    )
+
+    intervention = Intervention(
+        type=InterventionType.EXECUTION_ERROR,
+        waypoint=waypoint,
+        iteration=1,
+        max_iterations=10,
+        error_summary="boom",
+    )
+
+    directive = controller.prepare_intervention(intervention)
+
+    assert directive.action == "intervention"
+    assert controller.execution_state == ExecutionState.INTERVENTION
+    assert waypoint.status == WaypointStatus.FAILED
+    assert controller.coordinator.project.journey.state == JourneyState.FLY_INTERVENTION
+
+
+def test_resolve_intervention_skip_selects_next() -> None:
+    flight_plan = FlightPlan()
+    first = Waypoint(
+        id="WP-001",
+        title="First",
+        objective="Skip",
+        status=WaypointStatus.PENDING,
+    )
+    second = Waypoint(
+        id="WP-002",
+        title="Second",
+        objective="Next",
+        status=WaypointStatus.PENDING,
+    )
+    flight_plan.add_waypoint(first)
+    flight_plan.add_waypoint(second)
+
+    controller = make_controller(
+        journey_state=JourneyState.FLY_INTERVENTION,
+        flight_plan=flight_plan,
+        current_waypoint=first,
+    )
+
+    intervention = Intervention(
+        type=InterventionType.EXECUTION_ERROR,
+        waypoint=first,
+        iteration=2,
+        max_iterations=10,
+        error_summary="skip",
+    )
+    controller.prepare_intervention(intervention)
+
+    directive = controller.resolve_intervention(
+        InterventionResult(action=InterventionAction.SKIP)
+    )
+
+    assert first.status == WaypointStatus.SKIPPED
+    assert directive.action == "execute"
+    assert directive.waypoint == second
+    assert controller.execution_state == ExecutionState.RUNNING
diff --git a/tests/test_fly_screen.py b/tests/test_fly_screen.py
index 1d9a168..0f8f1ca 100644
--- a/tests/test_fly_screen.py
+++ b/tests/test_fly_screen.py
@@ -4,8 +4,9 @@
 
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.waypoint import Waypoint, WaypointStatus
-from waypoints.orchestration import JourneyCoordinator
-from waypoints.tui.screens.fly import ExecutionState, FlyScreen
+from waypoints.orchestration import ExecutionController, JourneyCoordinator
+from waypoints.fly.state import ExecutionState
+from waypoints.tui.screens.fly import FlyScreen
 
 
 def make_test_screen(flight_plan: FlightPlan) -> FlyScreen:
@@ -28,6 +29,7 @@ def get_path(self):
         project=MockProject(),  # type: ignore
         flight_plan=flight_plan,
     )
+    screen.execution_controller = ExecutionController(screen.coordinator)
     return screen
 
 

From 74edefb2c263132167e5e90116444e87d22b78ec Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:35:23 -0500
Subject: [PATCH 3/8] feat(testing): add flight test harness and fixtures

---
 docs/testing-strategy.md                      |  29 +++-
 .../L0-hello-world/expected/min_files.txt     |   4 +
 .../L0-hello-world/expected/smoke_test.sh     |   6 +
 flight-tests/L0-hello-world/input/idea.txt    |   1 +
 .../L1-todo-cli/expected/min_files.txt        |   4 +
 .../L1-todo-cli/expected/smoke_test.sh        |   5 +
 flight-tests/L1-todo-cli/input/idea.txt       |   1 +
 .../L2-rest-api/expected/min_files.txt        |   4 +
 .../L2-rest-api/expected/smoke_test.sh        |   5 +
 flight-tests/L2-rest-api/input/idea.txt       |   1 +
 scripts/run_flight_test.py                    | 138 ++++++++++++++++++
 tests/test_flight_test_runner.py              |  82 +++++++++++
 12 files changed, 273 insertions(+), 7 deletions(-)
 create mode 100644 flight-tests/L0-hello-world/expected/min_files.txt
 create mode 100755 flight-tests/L0-hello-world/expected/smoke_test.sh
 create mode 100644 flight-tests/L0-hello-world/input/idea.txt
 create mode 100644 flight-tests/L1-todo-cli/expected/min_files.txt
 create mode 100755 flight-tests/L1-todo-cli/expected/smoke_test.sh
 create mode 100644 flight-tests/L1-todo-cli/input/idea.txt
 create mode 100644 flight-tests/L2-rest-api/expected/min_files.txt
 create mode 100755 flight-tests/L2-rest-api/expected/smoke_test.sh
 create mode 100644 flight-tests/L2-rest-api/input/idea.txt
 create mode 100644 scripts/run_flight_test.py
 create mode 100644 tests/test_flight_test_runner.py

diff --git a/docs/testing-strategy.md b/docs/testing-strategy.md
index 097b57e..501589e 100644
--- a/docs/testing-strategy.md
+++ b/docs/testing-strategy.md
@@ -55,23 +55,38 @@ A set of reference projects with increasing complexity that Waypoints should be
 flight-tests/
 ├── L0-hello-world/
 │   ├── input/
-│   │   └── idea.txt              # "A CLI that prints hello world"
+│   │   └── idea.txt              # "A Python CLI named hello_world..."
 │   ├── expected/
 │   │   ├── min_files.txt         # Minimum expected files
-│   │   └── smoke_test.sh         # ./hello should print "Hello"
+│   │   └── smoke_test.sh         # uv run python -m hello_world
 │   └── results/                  # Generated on each run
-│       └── 2026-01-08-run1/
+│       └── 2026-02-05-120000/
 ├── L1-todo-cli/
 │   ├── input/
 │   │   └── idea.txt
 │   ├── expected/
 │   │   ├── min_files.txt
-│   │   ├── acceptance_criteria.yaml
+│   │   └── smoke_test.sh
+│   └── results/
+├── L2-rest-api/
+│   ├── input/
+│   │   └── idea.txt
+│   ├── expected/
+│   │   ├── min_files.txt
 │   │   └── smoke_test.sh
 │   └── results/
 ...
 ```
 
+Run a flight test against an existing generated project:
+
+```bash
+uv run python scripts/run_flight_test.py flight-tests/L0-hello-world   --project-path /path/to/generated/project
+```
+
+Each run writes a `meta.json` summary and optional `smoke_test.log` into the
+results directory for auditability and regression tracking.
+
 ### Flight Test Metrics
 
 For each flight test run, capture:
@@ -294,9 +309,9 @@ def verify_artifacts(project: Project) -> ArtifactReport:
 ## Implementation Roadmap
 
 ### Phase 1: Foundation
-1. Create `flight-tests/` directory structure
-2. Implement L0-L1 flight tests (hello world, todo CLI)
-3. Create smoke test runner
+1. Create `flight-tests/` directory structure (done)
+2. Implement L0-L2 flight tests (hello world, todo CLI, REST API) (done)
+3. Create smoke test runner (`scripts/run_flight_test.py`) (done)
 
 ### Phase 2: Quality Gates
 4. Implement LLM-as-judge for idea briefs and specs
diff --git a/flight-tests/L0-hello-world/expected/min_files.txt b/flight-tests/L0-hello-world/expected/min_files.txt
new file mode 100644
index 0000000..0a4e2a8
--- /dev/null
+++ b/flight-tests/L0-hello-world/expected/min_files.txt
@@ -0,0 +1,4 @@
+README.md
+pyproject.toml
+src/hello_world/__init__.py
+src/hello_world/__main__.py
diff --git a/flight-tests/L0-hello-world/expected/smoke_test.sh b/flight-tests/L0-hello-world/expected/smoke_test.sh
new file mode 100755
index 0000000..f51e8a4
--- /dev/null
+++ b/flight-tests/L0-hello-world/expected/smoke_test.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+uv sync --quiet
+output=$(uv run python -m hello_world)
+echo "$output" | grep -q "Hello, World"
diff --git a/flight-tests/L0-hello-world/input/idea.txt b/flight-tests/L0-hello-world/input/idea.txt
new file mode 100644
index 0000000..0181c04
--- /dev/null
+++ b/flight-tests/L0-hello-world/input/idea.txt
@@ -0,0 +1 @@
+Build a Python CLI named hello_world that prints "Hello, World!" when run.
diff --git a/flight-tests/L1-todo-cli/expected/min_files.txt b/flight-tests/L1-todo-cli/expected/min_files.txt
new file mode 100644
index 0000000..7868809
--- /dev/null
+++ b/flight-tests/L1-todo-cli/expected/min_files.txt
@@ -0,0 +1,4 @@
+README.md
+pyproject.toml
+src/todo_cli/__init__.py
+src/todo_cli/__main__.py
diff --git a/flight-tests/L1-todo-cli/expected/smoke_test.sh b/flight-tests/L1-todo-cli/expected/smoke_test.sh
new file mode 100755
index 0000000..7ace12a
--- /dev/null
+++ b/flight-tests/L1-todo-cli/expected/smoke_test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+uv sync --quiet
+uv run python -m todo_cli --help >/dev/null
diff --git a/flight-tests/L1-todo-cli/input/idea.txt b/flight-tests/L1-todo-cli/input/idea.txt
new file mode 100644
index 0000000..d3ca1fb
--- /dev/null
+++ b/flight-tests/L1-todo-cli/input/idea.txt
@@ -0,0 +1 @@
+Build a Python CLI named todo_cli with commands add/list/done and store tasks in a local JSON file.
diff --git a/flight-tests/L2-rest-api/expected/min_files.txt b/flight-tests/L2-rest-api/expected/min_files.txt
new file mode 100644
index 0000000..f7959ec
--- /dev/null
+++ b/flight-tests/L2-rest-api/expected/min_files.txt
@@ -0,0 +1,4 @@
+README.md
+pyproject.toml
+src/todo_api/__init__.py
+src/todo_api/__main__.py
diff --git a/flight-tests/L2-rest-api/expected/smoke_test.sh b/flight-tests/L2-rest-api/expected/smoke_test.sh
new file mode 100755
index 0000000..f35f5d6
--- /dev/null
+++ b/flight-tests/L2-rest-api/expected/smoke_test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+uv sync --quiet
+uv run python -m todo_api --help >/dev/null
diff --git a/flight-tests/L2-rest-api/input/idea.txt b/flight-tests/L2-rest-api/input/idea.txt
new file mode 100644
index 0000000..807c028
--- /dev/null
+++ b/flight-tests/L2-rest-api/input/idea.txt
@@ -0,0 +1 @@
+Build a Python REST API server named todo_api with CRUD endpoints for /todos and a /health endpoint. Use SQLite for storage.
diff --git a/scripts/run_flight_test.py b/scripts/run_flight_test.py
new file mode 100644
index 0000000..b07a859
--- /dev/null
+++ b/scripts/run_flight_test.py
@@ -0,0 +1,138 @@
+"""Flight test runner for Waypoints reference projects."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+
+
+@dataclass(frozen=True, slots=True)
+class SmokeResult:
+    """Outcome of smoke test execution."""
+
+    ran: bool
+    exit_code: int | None = None
+    log_path: Path | None = None
+
+
+def _timestamp() -> str:
+    return datetime.now(UTC).strftime("%Y-%m-%d-%H%M%S")
+
+
+def _read_lines(path: Path) -> list[str]:
+    return [line.strip() for line in path.read_text().splitlines() if line.strip()]
+
+
+def _run_smoke_test(script: Path, project_path: Path, results_dir: Path) -> SmokeResult:
+    log_path = results_dir / "smoke_test.log"
+    result = subprocess.run(
+        ["bash", str(script)],
+        cwd=project_path,
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    log_path.write_text(
+        f"$ bash {script}\n\n{result.stdout}\n{result.stderr}",
+        encoding="utf-8",
+    )
+    return SmokeResult(ran=True, exit_code=result.returncode, log_path=log_path)
+
+
+def _write_meta(results_dir: Path, data: dict[str, object]) -> None:
+    meta_path = results_dir / "meta.json"
+    meta_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
+
+
+def run(flight_test_dir: Path, project_path: Path, *, skip_smoke: bool) -> int:
+    input_dir = flight_test_dir / "input"
+    expected_dir = flight_test_dir / "expected"
+
+    idea_path = input_dir / "idea.txt"
+    min_files_path = expected_dir / "min_files.txt"
+    smoke_script = expected_dir / "smoke_test.sh"
+
+    if not idea_path.exists():
+        raise FileNotFoundError(f"Missing idea file: {idea_path}")
+    if not min_files_path.exists():
+        raise FileNotFoundError(f"Missing min_files file: {min_files_path}")
+    if not project_path.exists():
+        raise FileNotFoundError(f"Project path not found: {project_path}")
+
+    results_dir = flight_test_dir / "results" / _timestamp()
+    started_at = datetime.now(UTC)
+    results_dir.mkdir(parents=True, exist_ok=True)
+
+    shutil.copy(idea_path, results_dir / "idea.txt")
+
+    required_files = _read_lines(min_files_path)
+    missing = [
+        str(path)
+        for path in required_files
+        if not (project_path / path).exists()
+    ]
+
+    smoke_result = SmokeResult(ran=False)
+    if smoke_script.exists() and not skip_smoke:
+        smoke_result = _run_smoke_test(smoke_script, project_path, results_dir)
+
+    success = not missing and (
+        not smoke_result.ran or smoke_result.exit_code == 0
+    )
+
+    _write_meta(
+        results_dir,
+        {
+            "flight_test": flight_test_dir.name,
+            "project_path": str(project_path),
+            "started_at": started_at.isoformat(),
+            "completed_at": datetime.now(UTC).isoformat(),
+            "required_files": required_files,
+            "missing_files": missing,
+            "smoke_test": {
+                "ran": smoke_result.ran,
+                "exit_code": smoke_result.exit_code,
+                "log_path": str(smoke_result.log_path) if smoke_result.log_path else None,
+            },
+            "success": success,
+        },
+    )
+
+    summary = "PASS" if success else "FAIL"
+    print(f"{flight_test_dir.name}: {summary}")
+    if missing:
+        print("Missing files:")
+        for path in missing:
+            print(f"  - {path}")
+    if smoke_result.ran and smoke_result.exit_code != 0:
+        print(f"Smoke test failed (exit {smoke_result.exit_code}). See {smoke_result.log_path}")
+
+    return 0 if success else 1
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Run a Waypoints flight test.")
+    parser.add_argument("flight_test", type=Path, help="Path to flight test directory")
+    parser.add_argument(
+        "--project-path",
+        type=Path,
+        required=True,
+        help="Path to generated project to validate",
+    )
+    parser.add_argument(
+        "--skip-smoke",
+        action="store_true",
+        help="Skip running smoke_test.sh",
+    )
+
+    args = parser.parse_args()
+    return run(args.flight_test, args.project_path, skip_smoke=args.skip_smoke)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_flight_test_runner.py b/tests/test_flight_test_runner.py
new file mode 100644
index 0000000..6224258
--- /dev/null
+++ b/tests/test_flight_test_runner.py
@@ -0,0 +1,82 @@
+"""Tests for the flight test runner."""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.append(str(ROOT))
+
+from scripts.run_flight_test import run
+
+
+def _setup_flight_test(root: Path, *, min_files: list[str]) -> Path:
+    flight_test = root / "flight-test"
+    (flight_test / "input").mkdir(parents=True)
+    (flight_test / "expected").mkdir(parents=True)
+
+    (flight_test / "input" / "idea.txt").write_text(
+        "A test idea", encoding="utf-8"
+    )
+    (flight_test / "expected" / "min_files.txt").write_text(
+        "\n".join(min_files) + "\n", encoding="utf-8"
+    )
+    return flight_test
+
+
+def _latest_results(results_dir: Path) -> Path:
+    results = sorted(results_dir.iterdir())
+    assert results
+    return results[-1]
+
+
+def test_runner_records_success(tmp_path: Path) -> None:
+    flight_test = _setup_flight_test(tmp_path, min_files=["README.md"])
+    project_path = tmp_path / "project"
+    project_path.mkdir()
+    (project_path / "README.md").write_text("ok", encoding="utf-8")
+
+    exit_code = run(flight_test, project_path, skip_smoke=True)
+
+    assert exit_code == 0
+    results_dir = _latest_results(flight_test / "results")
+    meta = json.loads((results_dir / "meta.json").read_text(encoding="utf-8"))
+    assert meta["success"] is True
+    assert (results_dir / "idea.txt").exists()
+
+
+def test_runner_detects_missing_files(tmp_path: Path) -> None:
+    flight_test = _setup_flight_test(tmp_path, min_files=["README.md", "missing.txt"])
+    project_path = tmp_path / "project"
+    project_path.mkdir()
+    (project_path / "README.md").write_text("ok", encoding="utf-8")
+
+    exit_code = run(flight_test, project_path, skip_smoke=True)
+
+    assert exit_code == 1
+    results_dir = _latest_results(flight_test / "results")
+    meta = json.loads((results_dir / "meta.json").read_text(encoding="utf-8"))
+    assert meta["success"] is False
+    assert "missing.txt" in meta["missing_files"]
+
+
+def test_runner_writes_smoke_log(tmp_path: Path) -> None:
+    flight_test = _setup_flight_test(tmp_path, min_files=["README.md"])
+    project_path = tmp_path / "project"
+    project_path.mkdir()
+    (project_path / "README.md").write_text("ok", encoding="utf-8")
+
+    smoke_script = flight_test / "expected" / "smoke_test.sh"
+    smoke_script.write_text("#!/usr/bin/env bash\nexit 0\n", encoding="utf-8")
+    smoke_script.chmod(0o755)
+
+    exit_code = run(flight_test, project_path, skip_smoke=False)
+
+    assert exit_code == 0
+    results_dir = _latest_results(flight_test / "results")
+    meta = json.loads((results_dir / "meta.json").read_text(encoding="utf-8"))
+    assert meta["smoke_test"]["ran"] is True
+    log_path = Path(meta["smoke_test"]["log_path"])
+    assert log_path.exists()

From cfe88e83dd13deefd0ad83dbf2d2efafbb272a5a Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:35:30 -0500
Subject: [PATCH 4/8] docs: add ADRs and link documentation

---
 README.md                             |  1 +
 docs/README.md                        |  1 +
 docs/adr/0001-execution-controller.md | 28 +++++++++++++++++++++++++++
 docs/adr/0002-flight-test-harness.md  | 28 +++++++++++++++++++++++++++
 docs/adr/0003-execution-report.md     | 21 ++++++++++++++++++++
 docs/adr/README.md                    |  9 +++++++++
 6 files changed, 88 insertions(+)
 create mode 100644 docs/adr/0001-execution-controller.md
 create mode 100644 docs/adr/0002-flight-test-harness.md
 create mode 100644 docs/adr/0003-execution-report.md
 create mode 100644 docs/adr/README.md

diff --git a/README.md b/README.md
index e9b00b2..5d02844 100644
--- a/README.md
+++ b/README.md
@@ -167,6 +167,7 @@ docs/
 - [docs/journey-state-machine.md](./docs/journey-state-machine.md) - Journey states and transitions
 - [docs/genspec-format.md](./docs/genspec-format.md) - Genspec format reference
 - [docs/testing-strategy.md](./docs/testing-strategy.md) - Testing approach
+- [docs/adr/README.md](./docs/adr/README.md) - Architecture decision records
 - [docs/architecture-roadmap.md](./docs/architecture-roadmap.md) - Architecture roadmap
 
 ## How It Works
diff --git a/docs/README.md b/docs/README.md
index dfcf9ad..8a85713 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -13,6 +13,7 @@ This folder contains the product, architecture, and testing references for Waypo
 - [journey-state-machine.md](./journey-state-machine.md) - Journey states and transitions
 - [architecture-roadmap.md](./architecture-roadmap.md) - Long-term architecture plan
 - [unix-architecture-plan.md](./unix-architecture-plan.md) - UNIX-style architecture notes
+- [adr/README.md](./adr/README.md) - Architecture decision records
 
 ## Protocols and Formats
 
diff --git a/docs/adr/0001-execution-controller.md b/docs/adr/0001-execution-controller.md
new file mode 100644
index 0000000..6dd43f3
--- /dev/null
+++ b/docs/adr/0001-execution-controller.md
@@ -0,0 +1,28 @@
+# ADR 0001: Extract Execution Controller
+
+Date: 2026-02-05
+Status: Accepted
+
+## Context
+
+The FLY phase mixed UI, orchestration, execution, and state transitions inside
+`src/waypoints/tui/screens/fly.py`. This coupling made the execution flow harder
+to test, reason about, and evolve. A dedicated orchestration boundary was
+needed to align with the “bicycle” philosophy and centralize execution logic.
+
+## Decision
+
+Introduce `ExecutionController` in `src/waypoints/orchestration/` to own:
+- Execution state transitions
+- Waypoint selection and sequencing
+- Result handling and intervention flow
+
+Move `ExecutionState` into `src/waypoints/fly/state.py` to make it a shared
+execution concept rather than a UI-local enum.
+
+## Consequences
+
+- FLY screen becomes thinner and more focused on UI concerns.
+- Execution logic is testable in isolation with unit tests.
+- Additional orchestration features (rollback, richer reports) have a clear
+  home without bloating the UI layer.
diff --git a/docs/adr/0002-flight-test-harness.md b/docs/adr/0002-flight-test-harness.md
new file mode 100644
index 0000000..3f3f461
--- /dev/null
+++ b/docs/adr/0002-flight-test-harness.md
@@ -0,0 +1,28 @@
+# ADR 0002: Flight Test Harness
+
+Date: 2026-02-05
+Status: Accepted
+
+## Context
+
+The testing strategy defined flight tests (L0–L5) but lacked operational tooling.
+To improve iteration discipline, we needed a repeatable harness that records
+results and validates generated projects against minimal expectations.
+
+## Decision
+
+Add `scripts/run_flight_test.py` to execute a flight test against an existing
+project directory. The runner:
+- Creates timestamped results directories
+- Validates minimum expected files
+- Runs optional smoke tests
+- Writes a `meta.json` summary
+
+Seed L0–L2 fixtures under `flight-tests/` to make the harness immediately usable.
+
+## Consequences
+
+- Provides a repeatable baseline for flight test validation.
+- Creates an audit trail for regressions and improvements.
+- Keeps generation concerns decoupled from validation so the harness is usable
+  before full automation is in place.
diff --git a/docs/adr/0003-execution-report.md b/docs/adr/0003-execution-report.md
new file mode 100644
index 0000000..a062847
--- /dev/null
+++ b/docs/adr/0003-execution-report.md
@@ -0,0 +1,21 @@
+# ADR 0003: Execution Report Model
+
+Date: 2026-02-05
+Status: Accepted
+
+## Context
+
+Execution outcomes were logged but lacked a structured report for summarizing
+waypoint attempts. This made it hard to aggregate metrics or build future
+observability features on top of execution artifacts.
+
+## Decision
+
+Introduce `ExecutionReport` as a structured summary of a waypoint execution
+attempt, capturing result, timestamps, and completion data.
+
+## Consequences
+
+- Establishes a durable schema for execution summaries.
+- Enables future aggregation and reporting without parsing logs.
+- Keeps the report model independent of UI layers.
diff --git a/docs/adr/README.md b/docs/adr/README.md
new file mode 100644
index 0000000..bc7727f
--- /dev/null
+++ b/docs/adr/README.md
@@ -0,0 +1,9 @@
+# Architecture Decision Records
+
+This directory captures the key architectural decisions for Waypoints.
+
+## Index
+
+- [ADR 0001: Extract Execution Controller](./0001-execution-controller.md)
+- [ADR 0002: Flight Test Harness](./0002-flight-test-harness.md)
+- [ADR 0003: Execution Report Model](./0003-execution-report.md)

From 7f0d477fc97a086bb29f779c44dc9862c4f1e815 Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:35:39 -0500
Subject: [PATCH 5/8] feat(reliability): add project status and rollback

---
 src/waypoints/llm/prompts/fly.py           |  3 +--
 src/waypoints/models/__init__.py           |  3 ++-
 src/waypoints/models/project.py            | 17 +++++++++++++++++
 src/waypoints/orchestration/coordinator.py | 20 ++++++++++++++++----
 src/waypoints/tui/screens/land.py          |  4 +++-
 tests/test_models.py                       | 22 +++++++++++++++++++++-
 6 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/src/waypoints/llm/prompts/fly.py b/src/waypoints/llm/prompts/fly.py
index 2a50812..fd74d3b 100644
--- a/src/waypoints/llm/prompts/fly.py
+++ b/src/waypoints/llm/prompts/fly.py
@@ -47,8 +47,7 @@ def build_execution_prompt(
 {criteria_list}
 {resolution_notes}
 
-## Product Spec Summary
-## TODO: We should use proper summary of the spec not prefix!
+## Product Spec Excerpt (first 2000 chars)
 {spec[:2000]}{"..." if len(spec) > 2000 else ""}
 
 ## Working Directory
diff --git a/src/waypoints/models/__init__.py b/src/waypoints/models/__init__.py
index 0d35aba..ae1127e 100644
--- a/src/waypoints/models/__init__.py
+++ b/src/waypoints/models/__init__.py
@@ -12,7 +12,7 @@
     Journey,
     JourneyState,
 )
-from .project import Project, slugify
+from .project import Project, ProjectStatus, slugify
 from .session import SessionReader, SessionWriter
 from .state_manager import JourneyStateManager, StateGuardError
 from .waypoint import Waypoint, WaypointStatus
@@ -29,6 +29,7 @@
     "MessageRole",
     "PHASE_TO_STATE",
     "Project",
+    "ProjectStatus",
     "JourneyStateManager",
     "RECOVERABLE_STATES",
     "RECOVERY_MAP",
diff --git a/src/waypoints/models/project.py b/src/waypoints/models/project.py
index a192d35..3bcb2f6 100644
--- a/src/waypoints/models/project.py
+++ b/src/waypoints/models/project.py
@@ -6,6 +6,7 @@
 import re
 import shutil
 from dataclasses import dataclass, field
+from enum import Enum
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -36,6 +37,13 @@ def slugify(name: str) -> str:
     return slug or "unnamed-project"
 
 
+class ProjectStatus(Enum):
+    """Lifecycle status for a project."""
+
+    ACTIVE = "active"
+    CLOSED = "closed"
+
+
 @dataclass
 class Project:
     """A waypoints project containing sessions and documents."""
@@ -46,6 +54,7 @@ class Project:
     updated_at: datetime
     initial_idea: str = ""
     summary: str = ""  # LLM-generated project summary
+    status: ProjectStatus = ProjectStatus.ACTIVE
     journey: Journey | None = field(default=None, repr=False)
 
     @classmethod
@@ -107,6 +116,7 @@ def to_dict(self) -> dict[str, Any]:
             "updated_at": self.updated_at.isoformat(),
             "initial_idea": self.initial_idea,
             "summary": self.summary,
+            "status": self.status.value,
         }
         if self.journey is not None:
             data["journey"] = self.journey.to_dict()
@@ -121,6 +131,12 @@ def from_dict(cls, data: dict[str, Any]) -> "Project":
         if "journey" in data:
             journey = Journey.from_dict(data["journey"])
 
+        status_value = data.get("status", ProjectStatus.ACTIVE.value)
+        try:
+            status = ProjectStatus(status_value)
+        except ValueError:
+            status = ProjectStatus.ACTIVE
+
         return cls(
             name=data["name"],
             slug=data["slug"],
@@ -128,6 +144,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Project":
             updated_at=datetime.fromisoformat(data["updated_at"]),
             initial_idea=data.get("initial_idea", ""),
             summary=data.get("summary", ""),
+            status=status,
             journey=journey,
         )
 
diff --git a/src/waypoints/orchestration/coordinator.py b/src/waypoints/orchestration/coordinator.py
index 7f38407..bde4cf8 100644
--- a/src/waypoints/orchestration/coordinator.py
+++ b/src/waypoints/orchestration/coordinator.py
@@ -461,10 +461,22 @@ def handle_intervention(
                 return NextAction(action="complete")
 
         elif action == InterventionAction.ROLLBACK:
-            # Rollback to tag and pause
-            # TODO: Implement rollback when GitService supports it
-            # if self.git and rollback_tag:
-            #     self.git.rollback_to_tag(rollback_tag)
+            if not rollback_tag:
+                return NextAction(action="pause", message="Rollback tag required")
+
+            if self.git:
+                result = self.git.reset_hard(rollback_tag)
+                if not result.success:
+                    return NextAction(
+                        action="pause",
+                        message=f"Rollback failed: {result.message}",
+                    )
+            else:
+                return NextAction(
+                    action="pause",
+                    message="Rollback requested but git is not configured",
+                )
+
             waypoint.status = WaypointStatus.PENDING
             self.save_flight_plan()
             return NextAction(action="pause", message=f"Rolled back to {rollback_tag}")
diff --git a/src/waypoints/tui/screens/land.py b/src/waypoints/tui/screens/land.py
index d3691df..bb6c0de 100644
--- a/src/waypoints/tui/screens/land.py
+++ b/src/waypoints/tui/screens/land.py
@@ -25,6 +25,7 @@
 from waypoints.git.service import GitService
 from waypoints.llm.metrics import MetricsCollector
 from waypoints.models import JourneyState, Project
+from waypoints.models.project import ProjectStatus
 from waypoints.models.flight_plan import FlightPlan, FlightPlanReader
 from waypoints.models.waypoint import WaypointStatus
 from waypoints.orchestration import JourneyCoordinator
@@ -995,7 +996,8 @@ def action_new_iteration(self) -> None:
 
     def action_close_project(self) -> None:
         """Mark project as closed."""
-        # TODO: Add status field to Project model
+        self.project.status = ProjectStatus.CLOSED
+        self.project.save()
         self.notify(f"Project '{self.project.name}' marked as closed")
         from waypoints.tui.screens.project_selection import ProjectSelectionScreen
 
diff --git a/tests/test_models.py b/tests/test_models.py
index b109632..18fd69d 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -7,7 +7,7 @@
 import pytest
 
 from waypoints.models.flight_plan import FlightPlan, FlightPlanReader, FlightPlanWriter
-from waypoints.models.project import Project, slugify
+from waypoints.models.project import Project, ProjectStatus, slugify
 from waypoints.models.waypoint import Waypoint, WaypointStatus
 
 
@@ -664,6 +664,26 @@ def test_create_project(self, temp_projects_dir: Path) -> None:
         assert project.journey is not None
         assert (temp_projects_dir / "test-project" / "project.json").exists()
 
+    def test_project_status_defaults_active(self, temp_projects_dir: Path) -> None:
+        """New projects should default to ACTIVE status."""
+        project = Project.create("Status Test")
+
+        assert project.status == ProjectStatus.ACTIVE
+        assert project.to_dict()["status"] == ProjectStatus.ACTIVE.value
+
+    def test_project_status_from_dict(self) -> None:
+        """Deserialize project status from dictionary."""
+        data = {
+            "name": "Closed Project",
+            "slug": "closed-project",
+            "created_at": "2026-01-10T10:00:00",
+            "updated_at": "2026-01-10T11:00:00",
+            "status": "closed",
+        }
+        project = Project.from_dict(data)
+
+        assert project.status == ProjectStatus.CLOSED
+
     def test_project_directories_created(self, temp_projects_dir: Path) -> None:
         """Project directories are created on creation."""
         project = Project.create("Dir Test")

From cb0925889026251d3a3a2dbf1bea219391ac197d Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 19:54:30 -0500
Subject: [PATCH 6/8] chore: fix lint and typing issues

---
 scripts/run_flight_test.py                    | 12 ++++++++--
 src/waypoints/models/project.py               |  2 +-
 .../orchestration/execution_controller.py     | 21 +++++++++++++-----
 src/waypoints/tui/screens/land.py             |  2 +-
 tests/test_execution_controller.py            | 13 +++++++----
 tests/test_flight_test_runner.py              | 22 ++++++++++++++-----
 tests/test_fly_screen.py                      |  2 +-
 7 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/scripts/run_flight_test.py b/scripts/run_flight_test.py
index b07a859..f13cc16 100644
--- a/scripts/run_flight_test.py
+++ b/scripts/run_flight_test.py
@@ -85,6 +85,10 @@ def run(flight_test_dir: Path, project_path: Path, *, skip_smoke: bool) -> int:
         not smoke_result.ran or smoke_result.exit_code == 0
     )
 
+    log_path_value = (
+        str(smoke_result.log_path) if smoke_result.log_path else None
+    )
+
     _write_meta(
         results_dir,
         {
@@ -97,7 +101,7 @@ def run(flight_test_dir: Path, project_path: Path, *, skip_smoke: bool) -> int:
             "smoke_test": {
                 "ran": smoke_result.ran,
                 "exit_code": smoke_result.exit_code,
-                "log_path": str(smoke_result.log_path) if smoke_result.log_path else None,
+                "log_path": log_path_value,
             },
             "success": success,
         },
@@ -110,7 +114,11 @@ def run(flight_test_dir: Path, project_path: Path, *, skip_smoke: bool) -> int:
         for path in missing:
             print(f"  - {path}")
     if smoke_result.ran and smoke_result.exit_code != 0:
-        print(f"Smoke test failed (exit {smoke_result.exit_code}). See {smoke_result.log_path}")
+        message = (
+            f"Smoke test failed (exit {smoke_result.exit_code}). "
+            f"See {smoke_result.log_path}"
+        )
+        print(message)
 
     return 0 if success else 1
 
diff --git a/src/waypoints/models/project.py b/src/waypoints/models/project.py
index 3bcb2f6..a132442 100644
--- a/src/waypoints/models/project.py
+++ b/src/waypoints/models/project.py
@@ -6,8 +6,8 @@
 import re
 import shutil
 from dataclasses import dataclass, field
-from enum import Enum
 from datetime import UTC, datetime
+from enum import Enum
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
diff --git a/src/waypoints/orchestration/execution_controller.py b/src/waypoints/orchestration/execution_controller.py
index c0be62a..7b96bff 100644
--- a/src/waypoints/orchestration/execution_controller.py
+++ b/src/waypoints/orchestration/execution_controller.py
@@ -14,7 +14,11 @@
     ExecutionResult,
     WaypointExecutor,
 )
-from waypoints.fly.intervention import Intervention, InterventionAction, InterventionResult
+from waypoints.fly.intervention import (
+    Intervention,
+    InterventionAction,
+    InterventionResult,
+)
 from waypoints.fly.state import ExecutionState
 from waypoints.models import JourneyState, Waypoint, WaypointStatus
 from waypoints.orchestration.coordinator import JourneyCoordinator
@@ -61,7 +65,9 @@ def initialize(self) -> None:
         self.coordinator.reset_stale_in_progress()
         self.select_next_waypoint(include_in_progress=True)
 
-    def select_next_waypoint(self, include_in_progress: bool = False) -> Waypoint | None:
+    def select_next_waypoint(
+        self, include_in_progress: bool = False
+    ) -> Waypoint | None:
         """Select the next eligible waypoint and update execution state."""
         wp = self.coordinator.select_next_waypoint(include_failed=include_in_progress)
         if wp:
@@ -111,7 +117,9 @@ def start(self, selected_waypoint: Waypoint | None) -> ExecutionDirective:
                         action="noop",
                         message="Select a failed waypoint and press 'r' to retry",
                     )
-                return ExecutionDirective(action="noop", message="No waypoints to resume")
+                return ExecutionDirective(
+                    action="noop", message="No waypoints to resume"
+                )
             self._transition_to_executing()
             self.execution_state = ExecutionState.RUNNING
             return ExecutionDirective(action="execute", waypoint=self.current_waypoint)
@@ -198,7 +206,8 @@ def handle_execution_result(
                         waypoint=next_wp,
                         completed=waypoint,
                     )
-                if self.execution_state == ExecutionState.DONE:
+                status = self.coordinator.get_completion_status()
+                if status.all_complete:
                     self.coordinator.transition(JourneyState.LAND_REVIEW)
                     return ExecutionDirective(action="land", completed=waypoint)
                 return ExecutionDirective(action="pause", completed=waypoint)
@@ -252,7 +261,9 @@ def resolve_intervention(
             return ExecutionDirective(action="noop", message="Intervention cancelled")
 
         if not self._current_intervention:
-            return ExecutionDirective(action="noop", message="No intervention to resolve")
+            return ExecutionDirective(
+                action="noop", message="No intervention to resolve"
+            )
 
         waypoint = self._current_intervention.waypoint
 
diff --git a/src/waypoints/tui/screens/land.py b/src/waypoints/tui/screens/land.py
index bb6c0de..e4908ce 100644
--- a/src/waypoints/tui/screens/land.py
+++ b/src/waypoints/tui/screens/land.py
@@ -25,8 +25,8 @@
 from waypoints.git.service import GitService
 from waypoints.llm.metrics import MetricsCollector
 from waypoints.models import JourneyState, Project
-from waypoints.models.project import ProjectStatus
 from waypoints.models.flight_plan import FlightPlan, FlightPlanReader
+from waypoints.models.project import ProjectStatus
 from waypoints.models.waypoint import WaypointStatus
 from waypoints.orchestration import JourneyCoordinator
 from waypoints.tui.utils import format_duration, format_token_count
diff --git a/tests/test_execution_controller.py b/tests/test_execution_controller.py
index 6e5c168..8820081 100644
--- a/tests/test_execution_controller.py
+++ b/tests/test_execution_controller.py
@@ -4,10 +4,13 @@
 
 from pathlib import Path
 
-import pytest
-
 from waypoints.fly.executor import ExecutionResult
-from waypoints.fly.intervention import Intervention, InterventionAction, InterventionResult, InterventionType
+from waypoints.fly.intervention import (
+    Intervention,
+    InterventionAction,
+    InterventionResult,
+    InterventionType,
+)
 from waypoints.fly.state import ExecutionState
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.journey import Journey, JourneyState
@@ -28,7 +31,9 @@ def get_path(self) -> Path:
     def save(self) -> None:
         return None
 
-    def transition_journey(self, target: JourneyState, reason: str | None = None) -> None:
+    def transition_journey(
+        self, target: JourneyState, reason: str | None = None
+    ) -> None:
         if self.journey is None:
             self.journey = Journey.new(self.slug)
         self.journey = self.journey.transition(target, reason=reason)
diff --git a/tests/test_flight_test_runner.py b/tests/test_flight_test_runner.py
index 6224258..cd9d5c9 100644
--- a/tests/test_flight_test_runner.py
+++ b/tests/test_flight_test_runner.py
@@ -2,14 +2,24 @@
 
 from __future__ import annotations
 
+import importlib.util
 import json
-import sys
 from pathlib import Path
 
 ROOT = Path(__file__).resolve().parents[1]
-sys.path.append(str(ROOT))
 
-from scripts.run_flight_test import run
+
+def _load_run() -> callable:
+    module_path = ROOT / "scripts" / "run_flight_test.py"
+    spec = importlib.util.spec_from_file_location("run_flight_test", module_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError("Unable to load flight test runner")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.run
+
+
+RUN = _load_run()
 
 
 def _setup_flight_test(root: Path, *, min_files: list[str]) -> Path:
@@ -38,7 +48,7 @@ def test_runner_records_success(tmp_path: Path) -> None:
     project_path.mkdir()
     (project_path / "README.md").write_text("ok", encoding="utf-8")
 
-    exit_code = run(flight_test, project_path, skip_smoke=True)
+    exit_code = RUN(flight_test, project_path, skip_smoke=True)
 
     assert exit_code == 0
     results_dir = _latest_results(flight_test / "results")
@@ -53,7 +63,7 @@ def test_runner_detects_missing_files(tmp_path: Path) -> None:
     project_path.mkdir()
     (project_path / "README.md").write_text("ok", encoding="utf-8")
 
-    exit_code = run(flight_test, project_path, skip_smoke=True)
+    exit_code = RUN(flight_test, project_path, skip_smoke=True)
 
     assert exit_code == 1
     results_dir = _latest_results(flight_test / "results")
@@ -72,7 +82,7 @@ def test_runner_writes_smoke_log(tmp_path: Path) -> None:
     smoke_script.write_text("#!/usr/bin/env bash\nexit 0\n", encoding="utf-8")
     smoke_script.chmod(0o755)
 
-    exit_code = run(flight_test, project_path, skip_smoke=False)
+    exit_code = RUN(flight_test, project_path, skip_smoke=False)
 
     assert exit_code == 0
     results_dir = _latest_results(flight_test / "results")
diff --git a/tests/test_fly_screen.py b/tests/test_fly_screen.py
index 0f8f1ca..79ab2f8 100644
--- a/tests/test_fly_screen.py
+++ b/tests/test_fly_screen.py
@@ -2,10 +2,10 @@
 
 import pytest
 
+from waypoints.fly.state import ExecutionState
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.waypoint import Waypoint, WaypointStatus
 from waypoints.orchestration import ExecutionController, JourneyCoordinator
-from waypoints.fly.state import ExecutionState
 from waypoints.tui.screens.fly import FlyScreen
 
 

From 09f79459a51b8a0fbf409048cbe68a2028be7a5c Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 20:08:05 -0500
Subject: [PATCH 7/8] refactor(fly): delegate commit handling to coordinator

---
 src/waypoints/orchestration/__init__.py    |   4 +
 src/waypoints/orchestration/coordinator.py | 139 ++++++++++++++++-----
 src/waypoints/orchestration/types.py       |  20 +++
 src/waypoints/tui/screens/fly.py           | 107 +++-------------
 4 files changed, 149 insertions(+), 121 deletions(-)

diff --git a/src/waypoints/orchestration/__init__.py b/src/waypoints/orchestration/__init__.py
index 319ab02..ad9c5cc 100644
--- a/src/waypoints/orchestration/__init__.py
+++ b/src/waypoints/orchestration/__init__.py
@@ -19,6 +19,8 @@
 )
 from waypoints.orchestration.types import (
     ChunkCallback,
+    CommitNotice,
+    CommitOutcome,
     CompletionStatus,
     NextAction,
     ProgressCallback,
@@ -28,6 +30,8 @@
 
 __all__ = [
     "JourneyCoordinator",
+    "CommitNotice",
+    "CommitOutcome",
     "ExecutionController",
     "ExecutionDirective",
     "NextAction",
diff --git a/src/waypoints/orchestration/coordinator.py b/src/waypoints/orchestration/coordinator.py
index bde4cf8..60a427f 100644
--- a/src/waypoints/orchestration/coordinator.py
+++ b/src/waypoints/orchestration/coordinator.py
@@ -58,6 +58,8 @@
 )
 from waypoints.orchestration.types import (
     ChunkCallback,
+    CommitNotice,
+    CommitOutcome,
     CompletionStatus,
     NextAction,
     ProgressCallback,
@@ -360,7 +362,7 @@ def handle_execution_result(
 
             # Commit if git is available
             if self.git:
-                self._commit_waypoint(waypoint)
+                self.commit_waypoint(waypoint)
 
             # Find next waypoint
             next_wp = self.select_next_waypoint()
@@ -542,47 +544,116 @@ def get_completion_status(self) -> CompletionStatus:
 
     # ─── FLY Phase: Git Integration ──────────────────────────────────────
 
-    def _commit_waypoint(self, waypoint: Waypoint) -> bool:
-        """Commit waypoint changes to git.
-
-        Validates receipt exists before committing.
+    def commit_waypoint(self, waypoint: Waypoint) -> CommitOutcome:
+        """Commit waypoint completion if receipt is valid.
 
         Returns:
-            True if commit successful, False otherwise
+            CommitOutcome describing the commit result.
         """
-        if self.git is None:
-            return False
+        from waypoints.git import GitConfig, GitService, ReceiptValidator
+
+        config = GitConfig.load(self.project.slug)
+        if not config.auto_commit:
+            logger.debug("Auto-commit disabled, skipping")
+            return CommitOutcome(status="skipped", reason="auto_commit_disabled")
+
+        git = self.git or GitService(self.project.get_path())
+        notices: list[CommitNotice] = []
+
+        if not git.is_git_repo():
+            if config.auto_init:
+                init_result = git.init_repo()
+                if init_result.success:
+                    notices.append(
+                        CommitNotice(
+                            message="Initialized git repository",
+                            severity="info",
+                        )
+                    )
+                else:
+                    logger.warning("Failed to init git repo: %s", init_result.message)
+                    return CommitOutcome(
+                        status="skipped",
+                        reason="auto_init_failed",
+                        notices=tuple(notices),
+                    )
+            else:
+                logger.debug("Not a git repo and auto-init disabled")
+                return CommitOutcome(status="skipped", reason="auto_init_disabled")
+
+        if config.run_checklist:
+            validator = ReceiptValidator()
+            receipt_path = validator.find_latest_receipt(self.project, waypoint.id)
+
+            if receipt_path:
+                validation_result = validator.validate(receipt_path)
+                if not validation_result.valid:
+                    logger.warning(
+                        "Skipping commit - receipt invalid: %s",
+                        validation_result.message,
+                    )
+                    notices.append(
+                        CommitNotice(
+                            message=f"Skipping commit: {validation_result.message}",
+                            severity="warning",
+                        )
+                    )
+                    return CommitOutcome(
+                        status="skipped",
+                        reason="receipt_invalid",
+                        notices=tuple(notices),
+                    )
+                logger.info("Receipt validated: %s", receipt_path)
+            else:
+                logger.warning("Skipping commit - no receipt found for %s", waypoint.id)
+                notices.append(
+                    CommitNotice(
+                        message=f"Skipping commit: no receipt for {waypoint.id}",
+                        severity="warning",
+                    )
+                )
+                return CommitOutcome(
+                    status="skipped",
+                    reason="receipt_missing",
+                    notices=tuple(notices),
+                )
 
-        from waypoints.git.receipt import ReceiptValidator
+        git.stage_project_files(self.project.slug)
 
-        validator = ReceiptValidator()
-        receipt_path = validator.find_latest_receipt(self.project, waypoint.id)
+        commit_msg = f"feat({self.project.slug}): Complete {waypoint.title}"
+        result = git.commit(commit_msg)
 
-        if receipt_path is None:
-            logger.warning("No receipt found for %s, skipping commit", waypoint.id)
-            return False
+        if result.success:
+            if "Nothing to commit" in result.message:
+                logger.info("Nothing to commit for %s", waypoint.id)
+                return CommitOutcome(status="skipped", reason="nothing_to_commit")
 
-        result = validator.validate(receipt_path)
-        if not result.valid:
-            logger.warning("Receipt invalid for %s: %s", waypoint.id, result.message)
-            return False
+            notices.append(
+                CommitNotice(message=f"Committed: {waypoint.id}", severity="info")
+            )
+            commit_hash = git.get_head_commit() or ""
 
-        # Create commit
-        try:
-            # Stage all changed files
-            self.git.stage_files(".")
-            commit_result = self.git.commit(f"feat({waypoint.id}): {waypoint.title}")
-            if not commit_result.success:
-                logger.warning(
-                    "Commit failed for %s: %s", waypoint.id, commit_result.message
-                )
-                return False
-            self.git.tag(f"waypoint/{waypoint.id}")
-            logger.info("Committed waypoint: %s", waypoint.id)
-            return True
-        except Exception as e:
-            logger.error("Failed to commit waypoint %s: %s", waypoint.id, e)
-            return False
+            if config.create_waypoint_tags:
+                tag_name = f"{self.project.slug}/{waypoint.id}"
+                git.tag(tag_name, f"Completed waypoint: {waypoint.title}")
+
+            return CommitOutcome(
+                status="success",
+                commit_hash=commit_hash,
+                commit_msg=commit_msg,
+                notices=tuple(notices),
+            )
+
+        logger.error("Commit failed: %s", result.message)
+        notices.append(
+            CommitNotice(message=f"Commit failed: {result.message}", severity="error")
+        )
+        return CommitOutcome(
+            status="failure",
+            commit_msg=commit_msg,
+            message=result.message,
+            notices=tuple(notices),
+        )
 
     # ─── CHART Phase: Flight Plan Generation ─────────────────────────────
 
diff --git a/src/waypoints/orchestration/types.py b/src/waypoints/orchestration/types.py
index a80f2e4..876d612 100644
--- a/src/waypoints/orchestration/types.py
+++ b/src/waypoints/orchestration/types.py
@@ -38,6 +38,26 @@ class ProgressUpdate:
 # --- Result Types ---
 
 
+@dataclass(frozen=True, slots=True)
+class CommitNotice:
+    """User-facing notice from git commit handling."""
+
+    message: str
+    severity: Literal["info", "warning", "error"] = "info"
+
+
+@dataclass(frozen=True, slots=True)
+class CommitOutcome:
+    """Outcome of a git commit attempt for a waypoint."""
+
+    status: Literal["skipped", "success", "failure"]
+    commit_hash: str | None = None
+    commit_msg: str | None = None
+    message: str | None = None
+    reason: str | None = None
+    notices: tuple[CommitNotice, ...] = ()
+
+
 @dataclass
 class NextAction:
     """What should happen next after an operation.
diff --git a/src/waypoints/tui/screens/fly.py b/src/waypoints/tui/screens/fly.py
index 62dcfbe..b750e4d 100644
--- a/src/waypoints/tui/screens/fly.py
+++ b/src/waypoints/tui/screens/fly.py
@@ -39,7 +39,7 @@
     InterventionResult,
 )
 from waypoints.fly.state import ExecutionState
-from waypoints.git import GitConfig, GitService, ReceiptValidator
+from waypoints.git import GitService, ReceiptValidator
 from waypoints.models import JourneyState, Project
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.waypoint import Waypoint, WaypointStatus
@@ -1910,7 +1910,25 @@ def _handle_execution_result(self, result: ExecutionResult | None) -> None:
             self._log_verification_summary(directive.completed, log)
 
             # Commit waypoint completion (validates receipt first)
-            self._commit_waypoint(directive.completed)
+            commit_outcome = self.coordinator.commit_waypoint(directive.completed)
+            for notice in commit_outcome.notices:
+                if notice.severity == "info":
+                    self.notify(notice.message)
+                else:
+                    self.notify(notice.message, severity=notice.severity)
+            if self._executor and self._executor._log_writer:
+                if commit_outcome.status == "success":
+                    self._executor._log_writer.log_git_commit(
+                        True,
+                        commit_outcome.commit_hash or "",
+                        commit_outcome.commit_msg or "",
+                    )
+                elif commit_outcome.status == "failure":
+                    self._executor._log_writer.log_git_commit(
+                        False,
+                        "",
+                        commit_outcome.message or "Commit failed",
+                    )
 
             # Reset live criteria tracking for next waypoint
             self._live_criteria_completed = set()
@@ -2119,91 +2137,6 @@ def _log_verification_summary(self, waypoint: Waypoint, log: ExecutionLog) -> No
         else:
             log.write_log("[yellow]⚠ No receipt found[/]")
 
-    def _commit_waypoint(self, waypoint: Waypoint) -> None:
-        """Commit waypoint completion if receipt is valid.
-
-        Implements the "trust but verify" pattern:
-        - Model already produced receipt during execution
-        - We validate receipt exists and is well-formed
-        - If valid, commit the changes
-        - If invalid, skip commit but don't block
-        """
-        project_path = self.project.get_path()
-        config = GitConfig.load(self.project.slug)
-
-        if not config.auto_commit:
-            logger.debug("Auto-commit disabled, skipping")
-            return
-
-        git = GitService(project_path)
-
-        # Auto-init if needed
-        if not git.is_git_repo():
-            if config.auto_init:
-                init_result = git.init_repo()
-                if init_result.success:
-                    self.notify("Initialized git repository")
-                else:
-                    logger.warning("Failed to init git repo: %s", init_result.message)
-                    return
-            else:
-                logger.debug("Not a git repo and auto-init disabled")
-                return
-
-        # Validate receipt (the "dog" checking the "pilot's" work)
-        if config.run_checklist:
-            validator = ReceiptValidator()
-            receipt_path = validator.find_latest_receipt(self.project, waypoint.id)
-
-            if receipt_path:
-                validation_result = validator.validate(receipt_path)
-                if not validation_result.valid:
-                    logger.warning(
-                        "Skipping commit - receipt invalid: %s",
-                        validation_result.message,
-                    )
-                    self.notify(
-                        f"Skipping commit: {validation_result.message}",
-                        severity="warning",
-                    )
-                    return
-                logger.info("Receipt validated: %s", receipt_path)
-            else:
-                logger.warning("Skipping commit - no receipt found for %s", waypoint.id)
-                self.notify(
-                    f"Skipping commit: no receipt for {waypoint.id}", severity="warning"
-                )
-                return
-
-        # Stage project files and commit
-        git.stage_project_files(self.project.slug)
-
-        # Build commit message
-        commit_msg = f"feat({self.project.slug}): Complete {waypoint.title}"
-        result = git.commit(commit_msg)
-
-        if result.success:
-            if "Nothing to commit" not in result.message:
-                logger.info("Committed: %s", commit_msg)
-                self.notify(f"Committed: {waypoint.id}")
-                # Log successful git commit
-                if self._executor and self._executor._log_writer:
-                    commit_hash = git.get_head_commit() or ""
-                    self._executor._log_writer.log_git_commit(
-                        True, commit_hash, commit_msg
-                    )
-
-                # Create tag for waypoint if configured
-                if config.create_waypoint_tags:
-                    tag_name = f"{self.project.slug}/{waypoint.id}"
-                    git.tag(tag_name, f"Completed waypoint: {waypoint.title}")
-        else:
-            logger.error("Commit failed: %s", result.message)
-            self.notify(f"Commit failed: {result.message}", severity="error")
-            # Log failed git commit
-            if self._executor and self._executor._log_writer:
-                self._executor._log_writer.log_git_commit(False, "", result.message)
-
     def _check_parent_completion(self, completed_waypoint: Waypoint) -> None:
         """Check if parent epic is ready for execution.
 

From a2f181501b13f8cf4c746de84c7833385c152b4d Mon Sep 17 00:00:00 2001
From: Kulesh Shanmugasundaram <164083+kulesh@users.noreply.github.com>
Date: Thu, 5 Feb 2026 21:02:42 -0500
Subject: [PATCH 8/8] refactor(fly): move rollback and verification into
 orchestration

---
 src/waypoints/orchestration/__init__.py       |   4 +
 src/waypoints/orchestration/coordinator.py    |  70 +++++++++---
 .../orchestration/execution_controller.py     |  39 ++++++-
 src/waypoints/orchestration/types.py          |  22 ++++
 src/waypoints/tui/screens/fly.py              | 103 ++++++------------
 5 files changed, 152 insertions(+), 86 deletions(-)

diff --git a/src/waypoints/orchestration/__init__.py b/src/waypoints/orchestration/__init__.py
index ad9c5cc..2149e07 100644
--- a/src/waypoints/orchestration/__init__.py
+++ b/src/waypoints/orchestration/__init__.py
@@ -25,7 +25,9 @@
     NextAction,
     ProgressCallback,
     ProgressUpdate,
+    RollbackOutcome,
     TextStream,
+    VerificationSummary,
 )
 
 __all__ = [
@@ -38,6 +40,8 @@
     "CompletionStatus",
     "ProgressCallback",
     "ProgressUpdate",
+    "RollbackOutcome",
     "ChunkCallback",
     "TextStream",
+    "VerificationSummary",
 ]
diff --git a/src/waypoints/orchestration/coordinator.py b/src/waypoints/orchestration/coordinator.py
index 60a427f..49510fd 100644
--- a/src/waypoints/orchestration/coordinator.py
+++ b/src/waypoints/orchestration/coordinator.py
@@ -64,6 +64,8 @@
     NextAction,
     ProgressCallback,
     ProgressUpdate,
+    RollbackOutcome,
+    VerificationSummary,
 )
 
 if TYPE_CHECKING:
@@ -422,6 +424,35 @@ def check_parent_completion(self, waypoint: Waypoint) -> None:
 
     # ─── FLY Phase: Intervention Handling ────────────────────────────────
 
+    def rollback_to_tag(self, tag: str | None) -> RollbackOutcome:
+        """Rollback git state to a tag and reload the flight plan."""
+        if not tag:
+            return RollbackOutcome(status="failure", message="Rollback tag required")
+
+        from waypoints.git import GitService
+
+        git = self.git or GitService(self.project.get_path())
+        if not git.is_git_repo():
+            return RollbackOutcome(
+                status="failure",
+                message="Not a git repository - cannot rollback",
+            )
+
+        result = git.reset_hard(tag)
+        if not result.success:
+            return RollbackOutcome(
+                status="failure",
+                message=f"Rollback failed: {result.message}",
+            )
+
+        self._flight_plan = self._load_flight_plan()
+        self._current_waypoint = None
+        return RollbackOutcome(
+            status="success",
+            message=f"Rolled back to {tag}",
+            flight_plan=self._flight_plan,
+        )
+
     def handle_intervention(
         self,
         intervention: Intervention,
@@ -466,22 +497,10 @@ def handle_intervention(
             if not rollback_tag:
                 return NextAction(action="pause", message="Rollback tag required")
 
-            if self.git:
-                result = self.git.reset_hard(rollback_tag)
-                if not result.success:
-                    return NextAction(
-                        action="pause",
-                        message=f"Rollback failed: {result.message}",
-                    )
-            else:
-                return NextAction(
-                    action="pause",
-                    message="Rollback requested but git is not configured",
-                )
-
-            waypoint.status = WaypointStatus.PENDING
-            self.save_flight_plan()
-            return NextAction(action="pause", message=f"Rolled back to {rollback_tag}")
+            outcome = self.rollback_to_tag(rollback_tag)
+            if outcome.status == "failure":
+                return NextAction(action="pause", message=outcome.message)
+            return NextAction(action="pause", message=outcome.message)
 
         elif action == InterventionAction.ABORT:
             # Mark failed and stop
@@ -544,6 +563,25 @@ def get_completion_status(self) -> CompletionStatus:
 
     # ─── FLY Phase: Git Integration ──────────────────────────────────────
 
+    def build_verification_summary(
+        self, waypoint: Waypoint, completed_criteria: set[int]
+    ) -> VerificationSummary:
+        """Build verification summary for a waypoint."""
+        from waypoints.git.receipt import ReceiptValidator
+
+        validator = ReceiptValidator()
+        receipt_path = validator.find_latest_receipt(self.project, waypoint.id)
+        receipt_validation = None
+        if receipt_path:
+            receipt_validation = validator.validate(receipt_path)
+
+        return VerificationSummary(
+            total_criteria=len(waypoint.acceptance_criteria),
+            completed_criteria=frozenset(completed_criteria),
+            receipt_path=receipt_path,
+            receipt_validation=receipt_validation,
+        )
+
     def commit_waypoint(self, waypoint: Waypoint) -> CommitOutcome:
         """Commit waypoint completion if receipt is valid.
 
diff --git a/src/waypoints/orchestration/execution_controller.py b/src/waypoints/orchestration/execution_controller.py
index 7b96bff..68c053e 100644
--- a/src/waypoints/orchestration/execution_controller.py
+++ b/src/waypoints/orchestration/execution_controller.py
@@ -37,6 +37,7 @@ class ExecutionDirective:
     waypoint: Waypoint | None = None
     message: str | None = None
     completed: Waypoint | None = None
+    reload_flight_plan: bool = False
 
 
 class ExecutionController:
@@ -241,6 +242,28 @@ def handle_execution_result(
             message="Waypoint execution failed",
         )
 
+    def request_land(self) -> ExecutionDirective:
+        """Request transition to LAND, returning a directive."""
+        journey = self.coordinator.project.journey
+        if journey and journey.state == JourneyState.LAND_REVIEW:
+            return ExecutionDirective(action="land")
+
+        status = self.coordinator.get_completion_status()
+        if status.all_complete:
+            self.coordinator.transition(JourneyState.LAND_REVIEW)
+            return ExecutionDirective(action="land")
+
+        if self.execution_state == ExecutionState.DONE:
+            return ExecutionDirective(
+                action="pause",
+                message="Cannot land yet - some waypoints are blocked or failed",
+            )
+
+        return ExecutionDirective(
+            action="pause",
+            message="Cannot land yet - waypoints still in progress",
+        )
+
     def prepare_intervention(self, intervention: Intervention) -> ExecutionDirective:
         """Record an intervention and transition state."""
         self._current_intervention = intervention
@@ -301,11 +324,21 @@ def resolve_intervention(
             )
 
         if result.action == InterventionAction.ROLLBACK:
+            outcome = self.coordinator.rollback_to_tag(result.rollback_tag)
             self.coordinator.transition(JourneyState.FLY_PAUSED)
-            self.coordinator.transition(JourneyState.FLY_READY)
-            self.execution_state = ExecutionState.IDLE
             self._current_intervention = None
-            return ExecutionDirective(action="noop", message="Rollback requested")
+
+            if outcome.status == "success":
+                self.coordinator.transition(JourneyState.FLY_READY)
+                self.execution_state = ExecutionState.IDLE
+                return ExecutionDirective(
+                    action="pause",
+                    message=outcome.message,
+                    reload_flight_plan=True,
+                )
+
+            self.execution_state = ExecutionState.PAUSED
+            return ExecutionDirective(action="pause", message=outcome.message)
 
         if result.action == InterventionAction.ABORT:
             self.coordinator.transition(JourneyState.FLY_PAUSED)
diff --git a/src/waypoints/orchestration/types.py b/src/waypoints/orchestration/types.py
index 876d612..f9a85a9 100644
--- a/src/waypoints/orchestration/types.py
+++ b/src/waypoints/orchestration/types.py
@@ -6,10 +6,13 @@
 
 from collections.abc import AsyncIterator, Callable
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import TYPE_CHECKING, Literal
 
 if TYPE_CHECKING:
     from waypoints.fly.intervention import Intervention
+    from waypoints.git.receipt import ReceiptValidationResult
+    from waypoints.models.flight_plan import FlightPlan
     from waypoints.models.waypoint import Waypoint
 
 
@@ -58,6 +61,25 @@ class CommitOutcome:
     notices: tuple[CommitNotice, ...] = ()
 
 
+@dataclass(frozen=True, slots=True)
+class VerificationSummary:
+    """Summary of acceptance criteria and receipt validation for a waypoint."""
+
+    total_criteria: int
+    completed_criteria: frozenset[int]
+    receipt_path: Path | None = None
+    receipt_validation: "ReceiptValidationResult | None" = None
+
+
+@dataclass(frozen=True, slots=True)
+class RollbackOutcome:
+    """Outcome of a rollback attempt."""
+
+    status: Literal["success", "failure"]
+    message: str
+    flight_plan: "FlightPlan | None" = None
+
+
 @dataclass
 class NextAction:
     """What should happen next after an operation.
diff --git a/src/waypoints/tui/screens/fly.py b/src/waypoints/tui/screens/fly.py
index b750e4d..9afc730 100644
--- a/src/waypoints/tui/screens/fly.py
+++ b/src/waypoints/tui/screens/fly.py
@@ -39,7 +39,6 @@
     InterventionResult,
 )
 from waypoints.fly.state import ExecutionState
-from waypoints.git import GitService, ReceiptValidator
 from waypoints.models import JourneyState, Project
 from waypoints.models.flight_plan import FlightPlan
 from waypoints.models.waypoint import Waypoint, WaypointStatus
@@ -415,11 +414,16 @@ class WaypointDetailPanel(Vertical):
     """
 
     def __init__(
-        self, project: Project, flight_plan: FlightPlan, **kwargs: Any
+        self,
+        project: Project,
+        flight_plan: FlightPlan,
+        coordinator: JourneyCoordinator,
+        **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
         self._project = project
         self._flight_plan = flight_plan
+        self._coordinator = coordinator
         self._waypoint: Waypoint | None = None
         self._waypoint_cost: float | None = None
         self._waypoint_tokens: tuple[int, int] | None = None
@@ -555,6 +559,10 @@ def _format_metrics_line(
             metrics_parts.append(f"Cost: ${cost:.2f}")
         return " · ".join(metrics_parts)
 
+    def update_flight_plan(self, flight_plan: FlightPlan) -> None:
+        """Update the flight plan reference for detail rendering."""
+        self._flight_plan = flight_plan
+
     def _update_output_for_waypoint(
         self, waypoint: Waypoint, active_waypoint_id: str | None
     ) -> None:
@@ -862,6 +870,9 @@ def _log_historical_verification(
         )
         total_criteria = len(waypoint.acceptance_criteria)
         completed_count = len(completed_criteria)
+        summary = self._coordinator.build_verification_summary(
+            waypoint, completed_criteria
+        )
 
         if total_criteria > 0:
             for i, criterion in enumerate(waypoint.acceptance_criteria):
@@ -878,11 +889,8 @@ def _log_historical_verification(
                 )
 
         # Check receipt status
-        validator = ReceiptValidator()
-        receipt_path = validator.find_latest_receipt(self._project, waypoint.id)
-
-        if receipt_path:
-            result = validator.validate(receipt_path)
+        if summary.receipt_path and summary.receipt_validation:
+            result = summary.receipt_validation
             if result.valid:
                 log.write_log("[green]✓ Receipt validated[/]")
             else:
@@ -893,7 +901,7 @@ def _log_historical_verification(
             if result.receipt:
                 detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
                 detail_panel._log_soft_validation_evidence(
-                    log, result.receipt, receipt_path
+                    log, result.receipt, summary.receipt_path
                 )
         else:
             log.write_log("[yellow]⚠ No receipt found[/]")
@@ -1276,6 +1284,7 @@ def compose(self) -> ComposeResult:
             right=WaypointDetailPanel(
                 project=self.project,
                 flight_plan=self.flight_plan,
+                coordinator=self.coordinator,
                 id="waypoint-detail",
             ),
             left_pct=33,
@@ -2016,19 +2025,25 @@ def _on_intervention_result(self, result: InterventionResult | None) -> None:
                 "Edit waypoint in flight plan, then press 'r' to retry",
                 severity="information",
             )
-        elif result.action == InterventionAction.ROLLBACK:
-            log.write_log("Rolling back to last safe tag")
-            self._rollback_to_safe_tag(result.rollback_tag)
         elif result.action == InterventionAction.ABORT:
             log.write_log("Execution aborted")
             self.notify("Execution aborted")
 
         directive = self.execution_controller.resolve_intervention(result)
         if directive.message:
+            if result.action == InterventionAction.ROLLBACK:
+                log.write_log(directive.message)
             self.notify(directive.message)
         self.execution_state = self.execution_controller.execution_state
         self.query_one(StatusHeader).set_normal()
 
+        if directive.reload_flight_plan:
+            if self.coordinator.flight_plan:
+                self.flight_plan = self.coordinator.flight_plan
+                detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
+                detail_panel.update_flight_plan(self.flight_plan)
+            self._sync_current_waypoint_details()
+
         self._refresh_waypoint_list()
 
         if directive.action == "execute":
@@ -2038,42 +2053,6 @@ def _on_intervention_result(self, result: InterventionResult | None) -> None:
             self.notify("All waypoints complete!")
             self._switch_to_land_screen()
 
-    def _rollback_to_safe_tag(self, tag: str | None) -> None:
-        """Rollback git to the specified tag or find the last safe one."""
-        git = GitService(self.project.get_path())
-
-        if not git.is_git_repo():
-            self.notify("Not a git repository - cannot rollback", severity="error")
-            return
-
-        if tag:
-            # Use specified tag
-            target_tag = tag
-        else:
-            # Find last safe tag (project/WP-* pattern)
-            # This is a simplified version - a full implementation would list tags
-            self.notify(
-                "No rollback tag specified - please use git manually",
-                severity="warning",
-            )
-            return
-
-        # Perform the rollback
-        result = git.reset_hard(target_tag)
-        if result.success:
-            self.notify(f"Rolled back to {target_tag}")
-            # Reload flight plan from disk after reset
-            flight_plan_path = self.project.get_path() / "flight-plan.jsonl"
-            if flight_plan_path.exists():
-                from waypoints.models.flight_plan import FlightPlanReader
-
-                loaded = FlightPlanReader.load(self.project)
-                if loaded:
-                    self.flight_plan = loaded
-                    self._refresh_waypoint_list()
-        else:
-            self.notify(f"Rollback failed: {result.message}", severity="error")
-
     def _refresh_waypoint_list(
         self, execution_state: ExecutionState | None = None
     ) -> None:
@@ -2099,6 +2078,9 @@ def _log_verification_summary(self, waypoint: Waypoint, log: ExecutionLog) -> No
         log.log_heading("Verification Summary")
 
         # Report live acceptance criteria status
+        summary = self.coordinator.build_verification_summary(
+            waypoint, self._live_criteria_completed
+        )
         total_criteria = len(waypoint.acceptance_criteria)
         live_completed = len(self._live_criteria_completed)
 
@@ -2117,11 +2099,8 @@ def _log_verification_summary(self, waypoint: Waypoint, log: ExecutionLog) -> No
                 )
 
         # Check receipt status
-        validator = ReceiptValidator()
-        receipt_path = validator.find_latest_receipt(self.project, waypoint.id)
-
-        if receipt_path:
-            result = validator.validate(receipt_path)
+        if summary.receipt_path and summary.receipt_validation:
+            result = summary.receipt_validation
             if result.valid:
                 log.write_log("[green]✓ Receipt validated[/]")
             else:
@@ -2132,7 +2111,7 @@ def _log_verification_summary(self, waypoint: Waypoint, log: ExecutionLog) -> No
             if result.receipt:
                 detail_panel = self.query_one("#waypoint-detail", WaypointDetailPanel)
                 detail_panel._log_soft_validation_evidence(
-                    log, result.receipt, receipt_path
+                    log, result.receipt, summary.receipt_path
                 )
         else:
             log.write_log("[yellow]⚠ No receipt found[/]")
@@ -2151,22 +2130,12 @@ def _check_parent_completion(self, completed_waypoint: Waypoint) -> None:
 
     def action_forward(self) -> None:
         """Go forward to Land screen if available."""
-        # Check if Land is available (all waypoints complete or already in LAND_REVIEW)
-        journey = self.project.journey
-        if journey and journey.state == JourneyState.LAND_REVIEW:
+        directive = self.execution_controller.request_land()
+        if directive.action == "land":
             self._switch_to_land_screen()
             return
-
-        # Check if all waypoints are complete
-        all_complete, pending, failed, blocked = self._get_completion_status()
-        if all_complete:
-            self.coordinator.transition(JourneyState.LAND_REVIEW)
-            self._switch_to_land_screen()
-        elif self.execution_state == ExecutionState.DONE:
-            # DONE but not all_complete - blocked waypoints
-            self.notify("Cannot land yet - some waypoints are blocked or failed")
-        else:
-            self.notify("Cannot land yet - waypoints still in progress")
+        if directive.message:
+            self.notify(directive.message)
 
     def action_shrink_left(self) -> None:
         """Shrink the left pane."""