From f7b8daa072d57c665ce6822ce9a227ca5f4e45a3 Mon Sep 17 00:00:00 2001
From: Brian Douglas <bdougie@users.noreply.github.com>
Date: Tue, 10 Mar 2026 06:56:16 -0700
Subject: [PATCH 1/5] Add FLE-style backtracking with AlphaEvolve integration

BacktrackManager saves/restores game state via PyBoy save_state/load_state
to escape stuck navigation on Route 1. Snapshots on map change and
periodically; restores when stuck_turns exceeds threshold.

Four new evolvable params (bt_max_snapshots, bt_restore_threshold,
bt_max_attempts, bt_snapshot_interval) flow through evolve.py and
run_10_agents.py with two new variants: aggressive_bt and no_bt.
---
 scripts/agent.py            |  95 ++++++++++++++
 scripts/evolve.py           |  15 ++-
 scripts/run_10_agents.py    |  40 ++++--
 tests/test_agent.py         | 253 ++++++++++++++++++++++++++++++++++++
 tests/test_evolve.py        |  41 +++++-
 tests/test_run_10_agents.py |  10 +-
 6 files changed, 435 insertions(+), 19 deletions(-)

diff --git a/scripts/agent.py b/scripts/agent.py
index 7bee853..3a0f073 100644
--- a/scripts/agent.py
+++ b/scripts/agent.py
@@ -10,10 +10,13 @@
 """
 
 import argparse
+import io
 import json
 import sys
 import time
 import os
+from collections import deque
+from dataclasses import dataclass, field
 from pathlib import Path
 
 try:
@@ -332,6 +335,62 @@ def next_direction(self, state: OverworldState, turn: int = 0, stuck_turns: int
         return self._direction_toward_target(state, tx, ty, stuck_turns=stuck_turns)
 
 
+# ---------------------------------------------------------------------------
+# FLE-style backtracking
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Snapshot:
+    """A saved game state for backtracking."""
+
+    state_bytes: io.BytesIO
+    map_id: int
+    x: int
+    y: int
+    turn: int
+    attempts: int = 0
+
+
+class BacktrackManager:
+    """Save/restore game state to escape stuck navigation."""
+
+    def __init__(self, max_snapshots: int = 8, restore_threshold: int = 15, max_attempts: int = 3):
+        self.snapshots: deque[Snapshot] = deque(maxlen=max_snapshots)
+        self.max_snapshots = max_snapshots
+        self.restore_threshold = restore_threshold
+        self.max_attempts = max_attempts
+        self.total_restores = 0
+
+    def save_snapshot(self, pyboy, state: OverworldState, turn: int):
+        """Capture current game state into an in-memory snapshot."""
+        buf = io.BytesIO()
+        pyboy.save_state(buf)
+        buf.seek(0)
+        self.snapshots.append(Snapshot(buf, state.map_id, state.x, state.y, turn))
+
+    def should_restore(self, stuck_turns: int) -> bool:
+        """Check if we should restore a snapshot based on stuck duration."""
+        if stuck_turns < self.restore_threshold or not self.snapshots:
+            return False
+        return any(s.attempts < self.max_attempts for s in self.snapshots)
+
+    def restore(self, pyboy) -> Snapshot | None:
+        """Restore the most recent viable snapshot. Returns it or None."""
+        for i in range(len(self.snapshots) - 1, -1, -1):
+            snap = self.snapshots[i]
+            if snap.attempts < self.max_attempts:
+                del self.snapshots[i]
+                snap.state_bytes.seek(0)
+                pyboy.load_state(snap.state_bytes)
+                snap.attempts += 1
+                self.total_restores += 1
+                if snap.attempts < self.max_attempts:
+                    self.snapshots.append(snap)  # keep for more attempts
+                return snap
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Strategy engine
 # ---------------------------------------------------------------------------
@@ -417,6 +476,15 @@ def __init__(self, rom_path: str, strategy: str = "low", screenshots: bool = Fal
         else:
             self._evolve_door_cooldown = 8
 
+        # Backtracking support (FLE-style)
+        self.backtrack = BacktrackManager(
+            max_snapshots=int(self.evolve_params.get("bt_max_snapshots", 8)),
+            restore_threshold=int(self.evolve_params.get("bt_restore_threshold", 15)),
+            max_attempts=int(self.evolve_params.get("bt_max_attempts", 3)),
+        )
+        self._bt_snapshot_interval = int(self.evolve_params.get("bt_snapshot_interval", 50))
+        self._bt_last_map_id: int | None = None
+
         # Rebuild navigator with evolved params
         if self.evolve_params:
             self.navigator = Navigator(
@@ -690,6 +758,32 @@ def run_overworld(self):
         except Exception:
             pass  # game_wrapper may not be available in all contexts
 
+        # --- FLE backtracking ---
+        # Snapshot on map change
+        if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id:
+            self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+        self._bt_last_map_id = state.map_id
+
+        # Periodic snapshot when not stuck
+        if (self._bt_snapshot_interval > 0
+                and self.turn_count > 0
+                and self.turn_count % self._bt_snapshot_interval == 0
+                and self.stuck_turns == 0):
+            self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+
+        # Restore when stuck too long
+        if self.backtrack.should_restore(self.stuck_turns):
+            snap = self.backtrack.restore(self.pyboy)
+            if snap is not None:
+                self.stuck_turns = 0
+                self.recent_positions.clear()
+                state = self.memory.read_overworld_state()
+                self.log(
+                    f"BACKTRACK | Restored to turn {snap.turn} "
+                    f"map={snap.map_id} ({snap.x},{snap.y}) "
+                    f"attempt={snap.attempts}"
+                )
+
         # Diagnostic: capture screen and collision data at key positions
         if state.map_id == 37 and not hasattr(self, '_house_diag_done'):
             self._house_diag_done = True
@@ -782,6 +876,7 @@ def compute_fitness(self) -> dict:
             "badges": final.badges,
             "party_size": final.party_count,
             "stuck_count": len([e for e in self.events if "STUCK" in e]),
+            "backtrack_restores": self.backtrack.total_restores,
         }
 
     def run(self, max_turns: int = 100_000):
diff --git a/scripts/evolve.py b/scripts/evolve.py
index 3b6d97d..9c94cf3 100644
--- a/scripts/evolve.py
+++ b/scripts/evolve.py
@@ -29,6 +29,10 @@
     "door_cooldown": 8,
     "waypoint_skip_distance": 3,
     "axis_preference_map_0": "y",
+    "bt_max_snapshots": 8,
+    "bt_restore_threshold": 15,
+    "bt_max_attempts": 3,
+    "bt_snapshot_interval": 50,
 }
 
 
@@ -57,6 +61,7 @@ def score(fitness: dict) -> float:
         + fitness.get("battles_won", 0) * 10
         - fitness.get("stuck_count", 0) * 5
         - fitness.get("turns", 0) * 0.1
+        - fitness.get("backtrack_restores", 0) * 2
     )
 
 
@@ -142,6 +147,10 @@ def build_mutation_prompt(
 - door_cooldown: frames to walk away from a door after exiting (int, 4-16)
 - waypoint_skip_distance: max Manhattan distance to skip a waypoint when stuck (int, 1-8)
 - axis_preference_map_0: preferred movement axis on Pallet Town map ("x" or "y")
+- bt_max_snapshots: max number of backtrack snapshots to keep (int, 2-16)
+- bt_restore_threshold: stuck turns before restoring a snapshot (int, 8-30)
+- bt_max_attempts: max times to retry from the same snapshot (int, 1-5)
+- bt_snapshot_interval: turns between periodic snapshots when not stuck (int, 20-100)
 
 Propose ONE set of modified parameters to improve the score. Focus on reducing
 stuck_count and increasing maps_visited. Return ONLY valid JSON with the same
@@ -270,7 +279,11 @@ def _perturb(params: dict) -> dict:
     import random
 
     new = dict(params)
-    key = random.choice(["stuck_threshold", "door_cooldown", "waypoint_skip_distance"])
+    key = random.choice([
+        "stuck_threshold", "door_cooldown", "waypoint_skip_distance",
+        "bt_max_snapshots", "bt_restore_threshold", "bt_max_attempts",
+        "bt_snapshot_interval",
+    ])
     delta = random.choice([-2, -1, 1, 2])
     new[key] = max(1, new[key] + delta)
     # Randomly flip axis preference
diff --git a/scripts/run_10_agents.py b/scripts/run_10_agents.py
index 6ae80b1..edb75ff 100644
--- a/scripts/run_10_agents.py
+++ b/scripts/run_10_agents.py
@@ -22,37 +22,52 @@
 
 # 10 parameter variants to try — tuned for reaching rival battle
 # Previous winner: door_cooldown=4 beat baseline for Pokemon selection
+_BT_DEFAULTS = {
+    "bt_max_snapshots": 8, "bt_restore_threshold": 15,
+    "bt_max_attempts": 3, "bt_snapshot_interval": 50,
+}
+
 PARAM_VARIANTS = [
     # Baseline (previous winner door_cooldown=4)
     {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "y", "label": "baseline_4dc"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "baseline_4dc"},
     # Original defaults
     {"stuck_threshold": 8, "door_cooldown": 8, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "y", "label": "original"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "original"},
     # Very short door cooldown
     {"stuck_threshold": 8, "door_cooldown": 2, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "y", "label": "dc2"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "dc2"},
     # Low stuck + short door
     {"stuck_threshold": 4, "door_cooldown": 4, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "y", "label": "low_stuck_dc4"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "low_stuck_dc4"},
     # High stuck + short door
     {"stuck_threshold": 12, "door_cooldown": 4, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "y", "label": "high_stuck_dc4"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "high_stuck_dc4"},
     # Wide skip + short door
     {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 6,
-     "axis_preference_map_0": "y", "label": "wide_skip_dc4"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "wide_skip_dc4"},
     # Narrow skip + short door
     {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 1,
-     "axis_preference_map_0": "y", "label": "narrow_dc4"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "narrow_dc4"},
     # X-axis + short door
     {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3,
-     "axis_preference_map_0": "x", "label": "x_axis_dc4"},
+     "axis_preference_map_0": "x", **_BT_DEFAULTS, "label": "x_axis_dc4"},
     # Aggressive: low stuck + very short door + wide skip
     {"stuck_threshold": 3, "door_cooldown": 2, "waypoint_skip_distance": 5,
-     "axis_preference_map_0": "y", "label": "aggressive"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "aggressive"},
     # Moderate: medium stuck + short door
     {"stuck_threshold": 6, "door_cooldown": 6, "waypoint_skip_distance": 4,
-     "axis_preference_map_0": "y", "label": "moderate"},
+     "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "moderate"},
+    # Aggressive backtracking: low restore threshold, high retries
+    {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3,
+     "axis_preference_map_0": "y", "bt_max_snapshots": 8,
+     "bt_restore_threshold": 10, "bt_max_attempts": 5,
+     "bt_snapshot_interval": 50, "label": "aggressive_bt"},
+    # Backtracking disabled
+    {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3,
+     "axis_preference_map_0": "y", "bt_max_snapshots": 0,
+     "bt_restore_threshold": 999, "bt_max_attempts": 3,
+     "bt_snapshot_interval": 50, "label": "no_bt"},
 ]
 
 MAX_TURNS = 5000  # Intro + Pokemon selection + rival scripted sequence + battle + exit
@@ -67,6 +82,7 @@ def score(fitness: dict) -> float:
         + fitness.get("battles_won", 0) * 10
         - fitness.get("stuck_count", 0) * 5
         - fitness.get("turns", 0) * 0.1
+        - fitness.get("backtrack_restores", 0) * 2
     )
 
 
@@ -137,7 +153,7 @@ def main():
         print(f"ROM not found: {rom_path}")
         sys.exit(1)
 
-    print(f"[run_10] Launching 10 agents with {MAX_TURNS} max turns each")
+    print(f"[run_10] Launching {len(PARAM_VARIANTS)} agents with {MAX_TURNS} max turns each")
     print(f"[run_10] ROM: {rom_path}")
     print(f"[run_10] Running 5 at a time...\n")
 
@@ -174,7 +190,7 @@ def main():
     all_results.sort(key=lambda r: r["score"], reverse=True)
 
     print(f"\n{'='*70}")
-    print(f"[run_10] All 10 agents complete in {total_time:.1f}s")
+    print(f"[run_10] All {len(all_results)} agents complete in {total_time:.1f}s")
     print(f"{'='*70}\n")
     print(f"{'Rank':>4} {'Label':14s} {'Score':>8} {'Map':>4} {'Party':>5} "
           f"{'Stuck':>5} {'Turns':>5} {'Time':>6}")
diff --git a/tests/test_agent.py b/tests/test_agent.py
index bdefd0b..1096735 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -1,6 +1,7 @@
 """Comprehensive tests for agent.py — targeting 100% line coverage."""
 
 import importlib
+import io
 import json
 import os
 import runpy
@@ -25,6 +26,8 @@
     GameController,
     BattleStrategy,
     Navigator,
+    Snapshot,
+    BacktrackManager,
     StrategyEngine,
     PokemonAgent,
     main,
@@ -552,6 +555,256 @@ def _make_agent(tmp_path, screenshots=False, routes=None, type_chart_data=None):
     return ag
 
 
+# ===================================================================
+# BacktrackManager tests
+# ===================================================================
+
+
+class TestBacktrackManager:
+    """Tests for Snapshot dataclass and BacktrackManager."""
+
+    def test_snapshot_defaults(self):
+        buf = io.BytesIO(b"state")
+        snap = Snapshot(state_bytes=buf, map_id=1, x=5, y=10, turn=42)
+        assert snap.attempts == 0
+        assert snap.map_id == 1
+        assert snap.turn == 42
+
+    def test_init_defaults(self):
+        bm = BacktrackManager()
+        assert bm.max_snapshots == 8
+        assert bm.restore_threshold == 15
+        assert bm.max_attempts == 3
+        assert bm.total_restores == 0
+        assert len(bm.snapshots) == 0
+
+    def test_init_custom(self):
+        bm = BacktrackManager(max_snapshots=4, restore_threshold=10, max_attempts=5)
+        assert bm.max_snapshots == 4
+        assert bm.restore_threshold == 10
+        assert bm.max_attempts == 5
+
+    def test_save_snapshot(self):
+        bm = BacktrackManager(max_snapshots=3)
+        mock_pyboy = MagicMock()
+        state = OverworldState(map_id=1, x=5, y=10)
+
+        bm.save_snapshot(mock_pyboy, state, turn=10)
+        assert len(bm.snapshots) == 1
+        assert bm.snapshots[0].map_id == 1
+        assert bm.snapshots[0].x == 5
+        assert bm.snapshots[0].y == 10
+        assert bm.snapshots[0].turn == 10
+        mock_pyboy.save_state.assert_called_once()
+
+    def test_save_snapshot_deque_bounds(self):
+        bm = BacktrackManager(max_snapshots=2)
+        mock_pyboy = MagicMock()
+        for i in range(5):
+            state = OverworldState(map_id=i, x=i, y=i)
+            bm.save_snapshot(mock_pyboy, state, turn=i)
+        assert len(bm.snapshots) == 2
+        # Oldest snapshots should have been evicted
+        assert bm.snapshots[0].map_id == 3
+        assert bm.snapshots[1].map_id == 4
+
+    def test_should_restore_below_threshold(self):
+        bm = BacktrackManager(restore_threshold=15)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0)
+        assert bm.should_restore(14) is False
+
+    def test_should_restore_no_snapshots(self):
+        bm = BacktrackManager(restore_threshold=5)
+        assert bm.should_restore(10) is False
+
+    def test_should_restore_all_exhausted(self):
+        bm = BacktrackManager(restore_threshold=5, max_attempts=1)
+        snap = Snapshot(io.BytesIO(b"x"), map_id=0, x=0, y=0, turn=0, attempts=1)
+        bm.snapshots.append(snap)
+        assert bm.should_restore(10) is False
+
+    def test_should_restore_viable(self):
+        bm = BacktrackManager(restore_threshold=5, max_attempts=3)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0)
+        assert bm.should_restore(5) is True
+
+    def test_restore_loads_state(self):
+        bm = BacktrackManager(max_attempts=3)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=3, y=7), turn=20)
+
+        snap = bm.restore(mock_pyboy)
+        assert snap is not None
+        assert snap.map_id == 1
+        assert snap.x == 3
+        assert snap.y == 7
+        assert snap.turn == 20
+        assert snap.attempts == 1
+        assert bm.total_restores == 1
+        mock_pyboy.load_state.assert_called_once()
+
+    def test_restore_keeps_snapshot_if_attempts_remain(self):
+        bm = BacktrackManager(max_attempts=3)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=0, y=0), turn=10)
+
+        bm.restore(mock_pyboy)
+        # Snapshot re-appended with attempts=1
+        assert len(bm.snapshots) == 1
+        assert bm.snapshots[0].attempts == 1
+
+    def test_restore_removes_snapshot_at_max_attempts(self):
+        bm = BacktrackManager(max_attempts=1)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=0, y=0), turn=10)
+
+        snap = bm.restore(mock_pyboy)
+        assert snap is not None
+        assert snap.attempts == 1
+        # Not re-appended since attempts == max_attempts
+        assert len(bm.snapshots) == 0
+
+    def test_restore_none_when_all_exhausted(self):
+        bm = BacktrackManager(max_attempts=1)
+        snap = Snapshot(io.BytesIO(b"x"), map_id=0, x=0, y=0, turn=0, attempts=1)
+        bm.snapshots.append(snap)
+
+        mock_pyboy = MagicMock()
+        result = bm.restore(mock_pyboy)
+        assert result is None
+        assert bm.total_restores == 0
+
+    def test_restore_picks_most_recent_viable(self):
+        bm = BacktrackManager(max_attempts=2)
+        # First snapshot exhausted
+        exhausted = Snapshot(io.BytesIO(b"old"), map_id=0, x=0, y=0, turn=5, attempts=2)
+        bm.snapshots.append(exhausted)
+        # Second snapshot viable
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=3, y=3), turn=15)
+
+        snap = bm.restore(mock_pyboy)
+        assert snap is not None
+        assert snap.map_id == 1
+        assert snap.turn == 15
+
+    def test_total_restores_accumulates(self):
+        bm = BacktrackManager(max_attempts=5)
+        mock_pyboy = MagicMock()
+        bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0)
+
+        bm.restore(mock_pyboy)
+        bm.restore(mock_pyboy)
+        assert bm.total_restores == 2
+
+
+class TestBacktrackIntegration:
+    """Test BacktrackManager integration with PokemonAgent."""
+
+    def test_agent_has_backtrack_manager(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        assert hasattr(ag, "backtrack")
+        assert isinstance(ag.backtrack, BacktrackManager)
+
+    def test_agent_backtrack_defaults(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        assert ag.backtrack.max_snapshots == 8
+        assert ag.backtrack.restore_threshold == 15
+        assert ag.backtrack.max_attempts == 3
+        assert ag._bt_snapshot_interval == 50
+
+    def test_evolve_params_flow_to_backtrack(self, tmp_path):
+        params = {
+            "stuck_threshold": 8, "door_cooldown": 8,
+            "waypoint_skip_distance": 3, "axis_preference_map_0": "y",
+            "bt_max_snapshots": 4, "bt_restore_threshold": 10,
+            "bt_max_attempts": 5, "bt_snapshot_interval": 25,
+        }
+        ag = _make_agent_with_evolve(tmp_path, evolve_params=params)
+        assert ag.backtrack.max_snapshots == 4
+        assert ag.backtrack.restore_threshold == 10
+        assert ag.backtrack.max_attempts == 5
+        assert ag._bt_snapshot_interval == 25
+
+    def test_snapshot_on_map_change(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        state1 = OverworldState(map_id=0, x=5, y=5)
+        state2 = OverworldState(map_id=1, x=3, y=3)
+
+        ag.memory.read_overworld_state = MagicMock(return_value=state1)
+        ag._bt_last_map_id = 0  # set previous map
+        ag.run_overworld()
+
+        # No map change yet
+        initial_count = len(ag.backtrack.snapshots)
+
+        ag._bt_last_map_id = 0
+        ag.memory.read_overworld_state = MagicMock(return_value=state2)
+        ag.run_overworld()
+
+        # Map changed from 0 -> 1, should have saved a snapshot
+        assert len(ag.backtrack.snapshots) > initial_count
+
+    def test_periodic_snapshot(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag._bt_snapshot_interval = 5
+        state = OverworldState(map_id=0, x=5, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag._bt_last_map_id = 0
+        ag.stuck_turns = 0
+
+        # Run until turn_count hits the interval
+        for _ in range(6):
+            ag.turn_count += 1
+            if ag.turn_count % ag._bt_snapshot_interval == 0 and ag.stuck_turns == 0:
+                ag.backtrack.save_snapshot(ag.pyboy, state, ag.turn_count)
+
+        assert len(ag.backtrack.snapshots) == 1
+
+    def test_restore_on_stuck(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag.backtrack.restore_threshold = 3
+        state = OverworldState(map_id=0, x=5, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+
+        # Save a snapshot manually
+        ag.backtrack.save_snapshot(ag.pyboy, state, turn=0)
+        ag._bt_last_map_id = 0
+
+        # Simulate being stuck
+        ag.stuck_turns = 3
+        ag.run_overworld()
+
+        # Should have restored
+        assert ag.backtrack.total_restores == 1
+        assert ag.stuck_turns == 0
+
+    def test_compute_fitness_includes_backtrack_restores(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag.backtrack.total_restores = 7
+        ag.memory.read_overworld_state = MagicMock(
+            return_value=OverworldState(map_id=0, x=0, y=0)
+        )
+        fitness = ag.compute_fitness()
+        assert fitness["backtrack_restores"] == 7
+
+    def test_backtrack_event_logged(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag.backtrack.restore_threshold = 1
+        state = OverworldState(map_id=0, x=5, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag.backtrack.save_snapshot(ag.pyboy, state, turn=0)
+        ag._bt_last_map_id = 0
+        ag.stuck_turns = 1
+
+        ag.run_overworld()
+
+        backtrack_events = [e for e in ag.events if "BACKTRACK" in e]
+        assert len(backtrack_events) == 1
+
+
 # ===================================================================
 # StrategyEngine tests
 # ===================================================================
diff --git a/tests/test_evolve.py b/tests/test_evolve.py
index 012bd59..c65eaf0 100644
--- a/tests/test_evolve.py
+++ b/tests/test_evolve.py
@@ -45,6 +45,10 @@ def test_keys(self):
         assert "door_cooldown" in DEFAULT_PARAMS
         assert "waypoint_skip_distance" in DEFAULT_PARAMS
         assert "axis_preference_map_0" in DEFAULT_PARAMS
+        assert "bt_max_snapshots" in DEFAULT_PARAMS
+        assert "bt_restore_threshold" in DEFAULT_PARAMS
+        assert "bt_max_attempts" in DEFAULT_PARAMS
+        assert "bt_snapshot_interval" in DEFAULT_PARAMS
 
 
 # ── score() ────────────────────────────────────────────────────────────
@@ -84,6 +88,15 @@ def test_high_stuck_penalizes(self):
         stuck = dict(base, stuck_count=100)
         assert score(stuck) < score(base)
 
+    def test_backtrack_restores_penalizes(self):
+        base = {"final_map_id": 1, "badges": 0, "party_size": 0,
+                "battles_won": 0, "stuck_count": 0, "turns": 0,
+                "backtrack_restores": 0}
+        with_bt = dict(base, backtrack_restores=10)
+        assert score(with_bt) < score(base)
+        # Penalty is -2 per restore
+        assert score(base) - score(with_bt) == 20
+
 
 # ── run_agent() ────────────────────────────────────────────────────────
 
@@ -179,6 +192,13 @@ def test_includes_params_and_fitness(self):
         assert "stuck_threshold" in prompt
         assert '"turns": 100' in prompt
 
+    def test_includes_bt_descriptions(self):
+        prompt = build_mutation_prompt(DEFAULT_PARAMS, {})
+        assert "bt_max_snapshots" in prompt
+        assert "bt_restore_threshold" in prompt
+        assert "bt_max_attempts" in prompt
+        assert "bt_snapshot_interval" in prompt
+
     def test_includes_observations(self):
         obs = [{"priority": "important", "content": "Tool error: boom"}]
         prompt = build_mutation_prompt(DEFAULT_PARAMS, {}, obs)
@@ -237,12 +257,29 @@ def test_minimum_value_clamp(self):
         import random
         random.seed(0)
         params = dict(DEFAULT_PARAMS, stuck_threshold=1, door_cooldown=1,
-                      waypoint_skip_distance=1)
+                      waypoint_skip_distance=1, bt_max_snapshots=1,
+                      bt_restore_threshold=1, bt_max_attempts=1,
+                      bt_snapshot_interval=1)
         for _ in range(50):
             result = _perturb(params)
-            for key in ("stuck_threshold", "door_cooldown", "waypoint_skip_distance"):
+            for key in ("stuck_threshold", "door_cooldown", "waypoint_skip_distance",
+                        "bt_max_snapshots", "bt_restore_threshold",
+                        "bt_max_attempts", "bt_snapshot_interval"):
                 assert result[key] >= 1
 
+    def test_can_perturb_bt_keys(self):
+        """bt_* keys should be reachable by perturbation."""
+        import random
+        random.seed(123)
+        bt_changed = set()
+        for _ in range(200):
+            result = _perturb(DEFAULT_PARAMS)
+            for key in ("bt_max_snapshots", "bt_restore_threshold",
+                        "bt_max_attempts", "bt_snapshot_interval"):
+                if result[key] != DEFAULT_PARAMS[key]:
+                    bt_changed.add(key)
+        assert len(bt_changed) > 0
+
 
 # ── evolve() ───────────────────────────────────────────────────────────
 
diff --git a/tests/test_run_10_agents.py b/tests/test_run_10_agents.py
index 4d86cdf..8842953 100644
--- a/tests/test_run_10_agents.py
+++ b/tests/test_run_10_agents.py
@@ -25,12 +25,14 @@
 
 
 class TestParamVariants:
-    def test_has_10_variants(self):
-        assert len(PARAM_VARIANTS) == 10
+    def test_has_12_variants(self):
+        assert len(PARAM_VARIANTS) == 12
 
     def test_all_variants_have_required_keys(self):
         required = {"stuck_threshold", "door_cooldown", "waypoint_skip_distance",
-                     "axis_preference_map_0", "label"}
+                     "axis_preference_map_0", "label",
+                     "bt_max_snapshots", "bt_restore_threshold",
+                     "bt_max_attempts", "bt_snapshot_interval"}
         for i, variant in enumerate(PARAM_VARIANTS):
             missing = required - set(variant.keys())
             assert not missing, f"Variant {i} ({variant.get('label', '?')}) missing: {missing}"
@@ -240,7 +242,7 @@ def mock_run_one_agent(rom_path, params, agent_id):
         saved = tmp_path / "pokedex" / "evolve_results.json"
         assert saved.exists()
         data = json.loads(saved.read_text())
-        assert len(data) == 10
+        assert len(data) == len(PARAM_VARIANTS)
 
     def test_error_result_shows_fail(self, tmp_path, capsys):
         rom = tmp_path / "test.gb"

From 29210813dd610befb4d65518f25967ea1f52a26d Mon Sep 17 00:00:00 2001
From: Brian Douglas <bdougie@users.noreply.github.com>
Date: Tue, 10 Mar 2026 07:16:23 -0700
Subject: [PATCH 2/5] Remove unused field import and deduplicate score()

- Remove unused `field` import from dataclasses in agent.py
- Import `score()` from evolve.py in run_10_agents.py instead of duplicating it
---
 scripts/agent.py            |  2 +-
 scripts/run_10_agents.py    | 16 +++-------------
 tests/test_run_10_agents.py |  2 +-
 3 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/scripts/agent.py b/scripts/agent.py
index 3a0f073..cfb6414 100644
--- a/scripts/agent.py
+++ b/scripts/agent.py
@@ -16,7 +16,7 @@
 import time
 import os
 from collections import deque
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 
 try:
diff --git a/scripts/run_10_agents.py b/scripts/run_10_agents.py
index edb75ff..ee95af1 100644
--- a/scripts/run_10_agents.py
+++ b/scripts/run_10_agents.py
@@ -20,6 +20,9 @@
 SCRIPT_DIR = Path(__file__).parent
 AGENT_SCRIPT = SCRIPT_DIR / "agent.py"
 
+# Re-use the canonical scoring function from evolve.py
+from evolve import score
+
 # 10 parameter variants to try — tuned for reaching rival battle
 # Previous winner: door_cooldown=4 beat baseline for Pokemon selection
 _BT_DEFAULTS = {
@@ -73,19 +76,6 @@
 MAX_TURNS = 5000  # Intro + Pokemon selection + rival scripted sequence + battle + exit
 
 
-def score(fitness: dict) -> float:
-    """Composite fitness score."""
-    return (
-        fitness.get("final_map_id", 0) * 1000
-        + fitness.get("badges", 0) * 5000
-        + fitness.get("party_size", 0) * 500
-        + fitness.get("battles_won", 0) * 10
-        - fitness.get("stuck_count", 0) * 5
-        - fitness.get("turns", 0) * 0.1
-        - fitness.get("backtrack_restores", 0) * 2
-    )
-
-
 def run_one_agent(rom_path: str, params: dict, agent_id: int) -> dict:
     """Run a single agent and return results."""
     label = params.get("label", f"agent_{agent_id}")
diff --git a/tests/test_run_10_agents.py b/tests/test_run_10_agents.py
index 8842953..66347a6 100644
--- a/tests/test_run_10_agents.py
+++ b/tests/test_run_10_agents.py
@@ -15,7 +15,7 @@
 from run_10_agents import (
     PARAM_VARIANTS,
     MAX_TURNS,
-    score,
+    score,  # re-exported from evolve
     run_one_agent,
     main,
 )

From e6ec35b7d4c4d14ffcce1c8198f1a66304cd735e Mon Sep 17 00:00:00 2001
From: Brian Douglas <bdougie@users.noreply.github.com>
Date: Tue, 10 Mar 2026 07:45:33 -0700
Subject: [PATCH 3/5] Fix backtrack restore: reset script-gate flags, skip
 duplicate snapshots

- Reset _oak_wait_done, _pallet_diag_done, _house_diag_done, _lab_phase,
  _lab_turns, _lab_exit_turns on backtrack restore so one-time game
  sequences (Oak encounter, lab phases) can re-trigger after restore
- Skip periodic snapshots when position matches the last snapshot to
  avoid poisoning the pool with stuck-adjacent positions
---
 scripts/agent.py    | 16 ++++++++++++++--
 tests/test_agent.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/scripts/agent.py b/scripts/agent.py
index cfb6414..720a5e9 100644
--- a/scripts/agent.py
+++ b/scripts/agent.py
@@ -764,12 +764,18 @@ def run_overworld(self):
             self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
         self._bt_last_map_id = state.map_id
 
-        # Periodic snapshot when not stuck
+        # Periodic snapshot when making progress (not stuck, and position
+        # differs from last snapshot to avoid poisoning the pool)
         if (self._bt_snapshot_interval > 0
                 and self.turn_count > 0
                 and self.turn_count % self._bt_snapshot_interval == 0
                 and self.stuck_turns == 0):
-            self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+            last_snap = self.backtrack.snapshots[-1] if self.backtrack.snapshots else None
+            if (last_snap is None
+                    or last_snap.map_id != state.map_id
+                    or last_snap.x != state.x
+                    or last_snap.y != state.y):
+                self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
 
         # Restore when stuck too long
         if self.backtrack.should_restore(self.stuck_turns):
@@ -777,6 +783,12 @@ def run_overworld(self):
             if snap is not None:
                 self.stuck_turns = 0
                 self.recent_positions.clear()
+                # Reset script-gate flags so one-time sequences can re-trigger
+                for attr in ('_oak_wait_done', '_pallet_diag_done',
+                             '_house_diag_done', '_lab_phase', '_lab_turns',
+                             '_lab_exit_turns'):
+                    if hasattr(self, attr):
+                        delattr(self, attr)
                 state = self.memory.read_overworld_state()
                 self.log(
                     f"BACKTRACK | Restored to turn {snap.turn} "
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 1096735..2840936 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -804,6 +804,45 @@ def test_backtrack_event_logged(self, tmp_path):
         backtrack_events = [e for e in ag.events if "BACKTRACK" in e]
         assert len(backtrack_events) == 1
 
+    def test_restore_resets_script_gate_flags(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag.backtrack.restore_threshold = 1
+        state = OverworldState(map_id=0, x=5, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag.backtrack.save_snapshot(ag.pyboy, state, turn=0)
+        ag._bt_last_map_id = 0
+        ag.stuck_turns = 1
+
+        # Set flags that should be cleared on restore
+        ag._oak_wait_done = True
+        ag._pallet_diag_done = True
+        ag._house_diag_done = True
+
+        ag.run_overworld()
+
+        assert not hasattr(ag, '_oak_wait_done')
+        assert not hasattr(ag, '_pallet_diag_done')
+        assert not hasattr(ag, '_house_diag_done')
+
+    def test_periodic_snapshot_skips_duplicate_position(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        ag._bt_snapshot_interval = 1  # every turn
+        state = OverworldState(map_id=0, x=5, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag._bt_last_map_id = 0
+        ag.stuck_turns = 0
+
+        # First overworld call at turn 1 should snapshot
+        ag.turn_count = 1
+        ag.run_overworld()
+        assert len(ag.backtrack.snapshots) == 1
+
+        # Second call at same position should NOT add another
+        ag.turn_count = 2
+        ag.stuck_turns = 0
+        ag.run_overworld()
+        assert len(ag.backtrack.snapshots) == 1
+
 
 # ===================================================================
 # StrategyEngine tests

From 032e129a54a3ecaf58b716ad7b2e24a9a009ad95 Mon Sep 17 00:00:00 2001
From: Brian Douglas <bdougie@users.noreply.github.com>
Date: Tue, 10 Mar 2026 08:25:07 -0700
Subject: [PATCH 4/5] Fix backtrack guard in Oak's Lab to prevent undoing
 Charmander pickup

The backtrack guard checked `map_id == 40 AND party_count == 0`, but
party_count changes to 1 the moment the agent picks up Charmander.
This allowed backtracking to fire immediately after the pickup, wiping
out progress.  Change guard to `map_id == 40` (entire lab is protected).

Also revert Oak trigger to PR #10's proven brute-force approach (4 rounds
of mash_a + wait) instead of script-state-aware gating that read 0xD5F1
while still on Pallet Town map where the address is meaningless.

ROM test confirms: agent picks Charmander, wins rival battle, exits lab.
---
 scripts/agent.py    | 55 ++++++++++++++++--------------
 tests/test_agent.py | 81 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/scripts/agent.py b/scripts/agent.py
index 720a5e9..9f1fa35 100644
--- a/scripts/agent.py
+++ b/scripts/agent.py
@@ -560,29 +560,28 @@ def choose_overworld_action(self, state: OverworldState) -> str:
             return "left"  # sidestep to avoid door on return north
 
         # In Oak's lab with no Pokemon: walk to Pokeball table and pick one.
-        # Oak stands near (5,2) blocking north. Pressing A near him loops
-        # his dialogue. Going too far south triggers "Don't go away!"
-        # Strategy: A to dismiss text, down 1 to dodge Oak, right, up to table.
+        # Oak's Lab script (0xD5F1) tracks the cutscene state but we don't
+        # gate on it — the phases below handle all states by pressing B to
+        # dismiss dialogue and navigating to the Pokeball table.
+        # Pokeball sprites at (6,3)=Charmander, (7,3)=Squirtle, (8,3)=Bulbasaur.
+        # Interact from y=4 facing UP.
         if state.map_id == 40 and state.party_count == 0:
             lab_script = self.memory._read(0xD5F1)
+            if not hasattr(self, '_lab_turns'):
+                self._lab_turns = 0
+            self._lab_turns += 1
+
             if self.turn_count % 50 == 0:
                 self.log(f"LAB | script={lab_script} pos=({state.x},{state.y}) "
                          f"turn={self.turn_count}")
                 if self.turn_count % 200 == 0:
                     self.take_screenshot(f"lab_t{self.turn_count}", force=True)
 
-            if not hasattr(self, '_lab_turns'):
-                self._lab_turns = 0
-            self._lab_turns += 1
-
-            # Pokeball sprites are at (6,3), (7,3), (8,3) ON the table.
-            # Interact from y=4 facing UP, or y=2 facing DOWN.
-            # Simplest path: B(clear) → down to y=4 → right to x=6 → up+A
             if not hasattr(self, '_lab_phase'):
                 self._lab_phase = 0
 
             if self._lab_phase == 0:
-                # Dismiss Oak's text with B, then move south
+                # Dismiss text with B, move south to y=4 (interaction row)
                 if state.y >= 4:
                     self._lab_phase = 1
                     self.log(f"LAB | phase 0→1 south at ({state.x},{state.y})")
@@ -592,16 +591,15 @@ def choose_overworld_action(self, state: OverworldState) -> str:
                 return "down"
 
             elif self._lab_phase == 1:
-                # Go east to Pokeball column (x=6 = Charmander)
+                # Move east to x=6 (Charmander's Pokeball column)
                 if state.x >= 6:
                     self._lab_phase = 2
                     self.log(f"LAB | phase 1→2 at pokeball column ({state.x},{state.y})")
-                    return "up"  # face the table
+                    return "up"
                 return "right"
 
             else:
-                # Phase 2: face up toward Pokeball at (6,3) and press A
-                # Alternate up (to face table) and A (to interact)
+                # Phase 2: at Pokeball — face up and press A to interact
                 if self._lab_turns % 2 == 0:
                     return "up"
                 return "a"
@@ -759,14 +757,21 @@ def run_overworld(self):
             pass  # game_wrapper may not be available in all contexts
 
         # --- FLE backtracking ---
-        # Snapshot on map change
-        if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id:
-            self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+        # Skip all backtracking while in Oak's Lab (map 40).
+        # The lab has multiple scripted sequences (picking starter, rival battle)
+        # that look "stuck" but are progressing.  Restoring mid-sequence
+        # undoes progress even after the player picks up a Pokemon.
+        in_oaks_lab = (state.map_id == 40)
+
+        # Snapshot on map change (skip in Oak's Lab)
+        if not in_oaks_lab:
+            if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id:
+                self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
         self._bt_last_map_id = state.map_id
 
-        # Periodic snapshot when making progress (not stuck, and position
-        # differs from last snapshot to avoid poisoning the pool)
-        if (self._bt_snapshot_interval > 0
+        # Periodic snapshot when making progress (skip in Oak's Lab)
+        if (not in_oaks_lab
+                and self._bt_snapshot_interval > 0
                 and self.turn_count > 0
                 and self.turn_count % self._bt_snapshot_interval == 0
                 and self.stuck_turns == 0):
@@ -777,8 +782,8 @@ def run_overworld(self):
                     or last_snap.y != state.y):
                 self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
 
-        # Restore when stuck too long
-        if self.backtrack.should_restore(self.stuck_turns):
+        # Restore when stuck too long (skip in Oak's Lab)
+        if not in_oaks_lab and self.backtrack.should_restore(self.stuck_turns):
             snap = self.backtrack.restore(self.pyboy)
             if snap is not None:
                 self.stuck_turns = 0
@@ -843,7 +848,9 @@ def run_overworld(self):
 
         action = self.choose_overworld_action(state)
 
-        if action in {"up", "down", "left", "right"}:
+        if action == "wait":
+            self.controller.wait(30)
+        elif action in {"up", "down", "left", "right"}:
             self.controller.move(action)
         elif action == "b":
             self.controller.press("b", hold_frames=20, release_frames=12)
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 2840936..7f95f78 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -824,6 +824,37 @@ def test_restore_resets_script_gate_flags(self, tmp_path):
         assert not hasattr(ag, '_pallet_diag_done')
         assert not hasattr(ag, '_house_diag_done')
 
+    def test_backtrack_skipped_in_oaks_lab(self, tmp_path):
+        """Backtrack should NOT trigger in Oak's Lab (map 40) at all."""
+        ag = _make_agent(tmp_path)
+        ag.backtrack.restore_threshold = 1
+        state = OverworldState(map_id=40, party_count=0, x=5, y=3)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag.backtrack.save_snapshot(ag.pyboy, state, turn=0)
+        ag._bt_last_map_id = 40
+        ag.stuck_turns = 5  # well above threshold
+
+        with patch.object(agent, "Image", None):
+            ag.run_overworld()
+
+        # Should NOT have restored despite being stuck
+        assert ag.backtrack.total_restores == 0
+
+    def test_backtrack_skipped_in_oaks_lab_with_party(self, tmp_path):
+        """Backtrack should NOT trigger in Oak's Lab even after getting Pokemon."""
+        ag = _make_agent(tmp_path)
+        ag.backtrack.restore_threshold = 1
+        state = OverworldState(map_id=40, party_count=1, x=7, y=5)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag.backtrack.save_snapshot(ag.pyboy, state, turn=0)
+        ag._bt_last_map_id = 40
+        ag.stuck_turns = 5
+
+        with patch.object(agent, "Image", None):
+            ag.run_overworld()
+
+        assert ag.backtrack.total_restores == 0
+
     def test_periodic_snapshot_skips_duplicate_position(self, tmp_path):
         ag = _make_agent(tmp_path)
         ag._bt_snapshot_interval = 1  # every turn
@@ -1915,7 +1946,7 @@ class TestOaksLabPhases:
     """Cover lab phases 0->1->2 with no Pokemon and lab with Pokemon."""
 
     def test_lab_phase0_y_ge_4_transitions_to_phase1(self, tmp_path):
-        """Lines 493-496: phase 0, y>=4 -> transition to phase 1, return 'right'."""
+        """Phase 0, y>=4 -> transition to phase 1, return 'right'."""
         ag = _make_agent(tmp_path)
         with patch.object(agent, "Image", None):
             state = OverworldState(map_id=40, party_count=0, x=3, y=4)
@@ -1925,7 +1956,7 @@ def test_lab_phase0_y_ge_4_transitions_to_phase1(self, tmp_path):
         assert any("phase 0" in e for e in ag.events)
 
     def test_lab_phase0_odd_turn_returns_b(self, tmp_path):
-        """Lines 497-498: phase 0, _lab_turns odd -> return 'b'."""
+        """Phase 0, _lab_turns odd -> return 'b'."""
         ag = _make_agent(tmp_path)
         ag._lab_turns = 0  # will be incremented to 1 (odd)
         ag._lab_phase = 0
@@ -1935,7 +1966,7 @@ def test_lab_phase0_odd_turn_returns_b(self, tmp_path):
         assert result == "b"
 
     def test_lab_phase0_even_turn_returns_down(self, tmp_path):
-        """Lines 498-499: phase 0, _lab_turns even -> return 'down'."""
+        """Phase 0, _lab_turns even -> return 'down'."""
         ag = _make_agent(tmp_path)
         ag._lab_turns = 1  # will be incremented to 2 (even)
         ag._lab_phase = 0
@@ -1945,7 +1976,7 @@ def test_lab_phase0_even_turn_returns_down(self, tmp_path):
         assert result == "down"
 
     def test_lab_phase1_x_ge_6_transitions_to_phase2(self, tmp_path):
-        """Lines 503-506: phase 1, x>=6 -> transition to phase 2, return 'up'."""
+        """Phase 1, x>=6 -> transition to phase 2, return 'up'."""
         ag = _make_agent(tmp_path)
         ag._lab_phase = 1
         ag._lab_turns = 0
@@ -1957,7 +1988,7 @@ def test_lab_phase1_x_ge_6_transitions_to_phase2(self, tmp_path):
         assert any("phase 1" in e for e in ag.events)
 
     def test_lab_phase1_x_lt_6_returns_right(self, tmp_path):
-        """Line 507: phase 1, x<6 -> return 'right'."""
+        """Phase 1, x<6 -> return 'right'."""
         ag = _make_agent(tmp_path)
         ag._lab_phase = 1
         ag._lab_turns = 0
@@ -1967,7 +1998,7 @@ def test_lab_phase1_x_lt_6_returns_right(self, tmp_path):
         assert result == "right"
 
     def test_lab_phase2_even_turn_returns_up(self, tmp_path):
-        """Lines 512-513: phase 2, _lab_turns even -> return 'up'."""
+        """Phase 2, _lab_turns even -> return 'up'."""
         ag = _make_agent(tmp_path)
         ag._lab_phase = 2
         ag._lab_turns = 1  # incremented to 2 (even)
@@ -1977,7 +2008,7 @@ def test_lab_phase2_even_turn_returns_up(self, tmp_path):
         assert result == "up"
 
     def test_lab_phase2_odd_turn_returns_a(self, tmp_path):
-        """Line 514: phase 2, _lab_turns odd -> return 'a'."""
+        """Phase 2, _lab_turns odd -> return 'a'."""
         ag = _make_agent(tmp_path)
         ag._lab_phase = 2
         ag._lab_turns = 0  # incremented to 1 (odd)
@@ -2108,7 +2139,9 @@ def test_oak_wait_at_y_le_1(self, tmp_path):
         ag = _make_agent(tmp_path)
         state = OverworldState(map_id=0, x=5, y=1, party_count=0)
         post_wait_state = OverworldState(map_id=40, x=5, y=3, party_count=0)
-        ag.memory.read_overworld_state = MagicMock(side_effect=[state, post_wait_state])
+        # read_overworld_state called: (1) top of run_overworld, (2) inside oak trigger
+        ag.memory.read_overworld_state = MagicMock(
+            side_effect=[state, post_wait_state])
         ag.controller = MagicMock()
         ag.collision_map = MagicMock()
         ag.collision_map.grid = [[1] * 10 for _ in range(9)]
@@ -2120,10 +2153,14 @@ def test_oak_wait_at_y_le_1(self, tmp_path):
         assert hasattr(ag, '_oak_wait_done')
         assert ag._oak_wait_done is True
         assert any("OAK TRIGGER" in e for e in ag.events)
-        # Should have called wait(600) for Oak walk
+        # Should have called wait(600) for initial Oak walk
         ag.controller.wait.assert_any_call(600)
-        # Should have called mash_a 4 times
+        # 4 rounds of mash_a(30) + wait(300)
         assert ag.controller.mash_a.call_count == 4
+        for c in ag.controller.mash_a.call_args_list:
+            assert c == call(30, delay=30)
+        wait_300_calls = [c for c in ag.controller.wait.call_args_list if c == call(300)]
+        assert len(wait_300_calls) == 4
 
     def test_oak_wait_only_once(self, tmp_path):
         """Lines 673: _oak_wait_done already set -> skip Oak sequence."""
@@ -2186,6 +2223,30 @@ def test_b_action_presses_b(self, tmp_path):
         assert ag.last_overworld_action == "b"
 
 
+# ===================================================================
+# run_overworld -- Wait action dispatch
+# ===================================================================
+
+
+class TestRunOverworldWaitAction:
+    """Cover action == 'wait' -> controller.wait() with no button press."""
+
+    def test_wait_action_just_waits(self, tmp_path):
+        ag = _make_agent(tmp_path)
+        state = OverworldState(map_id=40, x=5, y=3)
+        ag.memory.read_overworld_state = MagicMock(return_value=state)
+        ag.choose_overworld_action = MagicMock(return_value="wait")
+        ag.controller = MagicMock()
+        ag.turn_count = 1
+
+        ag.run_overworld()
+
+        ag.controller.wait.assert_called_once_with(30)
+        ag.controller.press.assert_not_called()
+        ag.controller.move.assert_not_called()
+        assert ag.last_overworld_action == "wait"
+
+
 # ===================================================================
 # run_overworld -- Waypoint info logging (711-715)
 # ===================================================================

From cba2a14010c63a1beae0260e95acb2dcf62a5fda Mon Sep 17 00:00:00 2001
From: Brian Douglas <bdougie@users.noreply.github.com>
Date: Tue, 10 Mar 2026 08:30:58 -0700
Subject: [PATCH 5/5] Add FLE backtracking section to README with paper
 reference

Documents the Factorio Learning Environment-inspired backtracking system:
snapshot/restore mechanics, evolvable parameters, and Oak's Lab guard.
Adds FLE paper to references list.
---
 README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/README.md b/README.md
index 1cf0293..c159765 100644
--- a/README.md
+++ b/README.md
@@ -156,6 +156,7 @@ pokemon-agent/
 │   ├── tape_reader.py       # Tapes SQLite reader (stdlib only)
 │   ├── observer.py          # heuristic observation extractor
 │   ├── observe_cli.py       # CLI for running the observer
+│   ├── pathfinding.py       # collision map + backtrack manager
 │   ├── evolve.py            # AlphaEvolve strategy evolution harness
 │   └── run_10_agents.py     # parallel multi-agent evaluation runner
 ├── references/
@@ -185,8 +186,24 @@ Target turn counts for community benchmarking. Fork it, improve the strategy, po
 | 8 badges | ~200,000 | ~100,000 | ~60,000 |
 | Elite Four | ~300,000 | ~150,000 | ~80,000 |
 
+## FLE-Style Backtracking
+
+Inspired by the [Factorio Learning Environment](https://arxiv.org/abs/2503.09617)'s `BacktrackingAgent`, the agent snapshots game state at key moments (map changes, periodic intervals) and restores when stuck. This directly addresses navigation dead-ends like Route 1's y=28 blocker — instead of wasting turns in a loop, the agent reverts to a known-good state and tries an alternate path.
+
+Snapshots use PyBoy's `save_state()`/`load_state()` with in-memory `BytesIO` buffers (~130KB each, <1ms). A bounded deque keeps the most recent 8 snapshots. Each snapshot tracks its restore count, and after 3 failed attempts from the same snapshot it's discarded. Four parameters control the behavior and are evolvable through AlphaEvolve:
+
+| Parameter | Default | Description |
+|---|---|---|
+| `bt_max_snapshots` | 8 | Max snapshots in the deque |
+| `bt_restore_threshold` | 15 | Stuck turns before restoring |
+| `bt_max_attempts` | 3 | Retries per snapshot |
+| `bt_snapshot_interval` | 50 | Periodic snapshot frequency |
+
+Scripted areas like Oak's Lab (map 40) disable backtracking entirely — the lab's multi-phase cutscene looks "stuck" but is progressing naturally.
+
 ## Inspiration & References
 
+- [Factorio Learning Environment](https://arxiv.org/abs/2503.09617) — Backtracking agent patterns, structured observations, and incremental report distillation for game-playing LLM agents
 - [AlphaEvolve](https://arxiv.org/abs/2506.13131) — DeepMind's LLM-driven code evolution framework
 - [Discovering Multiagent Learning Algorithms with LLMs](https://arxiv.org/abs/2602.16928) — AlphaEvolve applied to game-playing agents
 - [ClaudePlaysPokemon](https://www.twitch.tv/claudeplayspokemon) — Anthropic's Claude-plays-Pokemon Twitch stream