papercomputeco · bdougie · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/README.md b/README.md
@@ -156,6 +156,7 @@ pokemon-agent/
 │   ├── tape_reader.py       # Tapes SQLite reader (stdlib only)
 │   ├── observer.py          # heuristic observation extractor
 │   ├── observe_cli.py       # CLI for running the observer
+│   ├── pathfinding.py       # collision map + backtrack manager
 │   ├── evolve.py            # AlphaEvolve strategy evolution harness
 │   └── run_10_agents.py     # parallel multi-agent evaluation runner
 ├── references/
@@ -185,8 +186,24 @@ Target turn counts for community benchmarking. Fork it, improve the strategy, po
 | 8 badges | ~200,000 | ~100,000 | ~60,000 |
 | Elite Four | ~300,000 | ~150,000 | ~80,000 |
 
+## FLE-Style Backtracking
+
+Inspired by the [Factorio Learning Environment](https://arxiv.org/abs/2503.09617)'s `BacktrackingAgent`, the agent snapshots game state at key moments (map changes, periodic intervals) and restores when stuck. This directly addresses navigation dead-ends like Route 1's y=28 blocker — instead of wasting turns in a loop, the agent reverts to a known-good state and tries an alternate path.
+
+Snapshots use PyBoy's `save_state()`/`load_state()` with in-memory `BytesIO` buffers (~130KB each, <1ms). A bounded deque keeps the most recent 8 snapshots. Each snapshot tracks its restore count, and after 3 failed attempts from the same snapshot it's discarded. Four parameters control the behavior and are evolvable through AlphaEvolve:
+
+| Parameter | Default | Description |
+|---|---|---|
+| `bt_max_snapshots` | 8 | Max snapshots in the deque |
+| `bt_restore_threshold` | 15 | Stuck turns before restoring |
+| `bt_max_attempts` | 3 | Retries per snapshot |
+| `bt_snapshot_interval` | 50 | Periodic snapshot frequency |
+
+Scripted areas like Oak's Lab (map 40) disable backtracking entirely — the lab's multi-phase cutscene looks "stuck" but is progressing naturally.
+
 ## Inspiration & References
 
+- [Factorio Learning Environment](https://arxiv.org/abs/2503.09617) — Backtracking agent patterns, structured observations, and incremental report distillation for game-playing LLM agents
 - [AlphaEvolve](https://arxiv.org/abs/2506.13131) — DeepMind's LLM-driven code evolution framework
 - [Discovering Multiagent Learning Algorithms with LLMs](https://arxiv.org/abs/2602.16928) — AlphaEvolve applied to game-playing agents
 - [ClaudePlaysPokemon](https://www.twitch.tv/claudeplayspokemon) — Anthropic's Claude-plays-Pokemon Twitch stream

diff --git a/scripts/agent.py b/scripts/agent.py
@@ -10,10 +10,13 @@
 """
 
 import argparse
+import io
 import json
 import sys
 import time
 import os
+from collections import deque
+from dataclasses import dataclass
 from pathlib import Path
 
 try:
@@ -332,6 +335,62 @@ def next_direction(self, state: OverworldState, turn: int = 0, stuck_turns: int
         return self._direction_toward_target(state, tx, ty, stuck_turns=stuck_turns)
 
 
+# ---------------------------------------------------------------------------
+# FLE-style backtracking
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Snapshot:
+    """A saved game state for backtracking."""
+
+    state_bytes: io.BytesIO
+    map_id: int
+    x: int
+    y: int
+    turn: int
+    attempts: int = 0
+
+
+class BacktrackManager:
+    """Save/restore game state to escape stuck navigation."""
+
+    def __init__(self, max_snapshots: int = 8, restore_threshold: int = 15, max_attempts: int = 3):
+        self.snapshots: deque[Snapshot] = deque(maxlen=max_snapshots)
+        self.max_snapshots = max_snapshots
+        self.restore_threshold = restore_threshold
+        self.max_attempts = max_attempts
+        self.total_restores = 0
+
+    def save_snapshot(self, pyboy, state: OverworldState, turn: int):
+        """Capture current game state into an in-memory snapshot."""
+        buf = io.BytesIO()
+        pyboy.save_state(buf)
+        buf.seek(0)
+        self.snapshots.append(Snapshot(buf, state.map_id, state.x, state.y, turn))
+
+    def should_restore(self, stuck_turns: int) -> bool:
+        """Check if we should restore a snapshot based on stuck duration."""
+        if stuck_turns < self.restore_threshold or not self.snapshots:
+            return False
+        return any(s.attempts < self.max_attempts for s in self.snapshots)
+
+    def restore(self, pyboy) -> Snapshot | None:
+        """Restore the most recent viable snapshot. Returns it or None."""
+        for i in range(len(self.snapshots) - 1, -1, -1):
+            snap = self.snapshots[i]
+            if snap.attempts < self.max_attempts:
+                del self.snapshots[i]
+                snap.state_bytes.seek(0)
+                pyboy.load_state(snap.state_bytes)
+                snap.attempts += 1
+                self.total_restores += 1
+                if snap.attempts < self.max_attempts:
+                    self.snapshots.append(snap)  # keep for more attempts
+                return snap
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Strategy engine
 # ---------------------------------------------------------------------------
@@ -417,6 +476,15 @@ def __init__(self, rom_path: str, strategy: str = "low", screenshots: bool = Fal
         else:
             self._evolve_door_cooldown = 8
 
+        # Backtracking support (FLE-style)
+        self.backtrack = BacktrackManager(
+            max_snapshots=int(self.evolve_params.get("bt_max_snapshots", 8)),
+            restore_threshold=int(self.evolve_params.get("bt_restore_threshold", 15)),
+            max_attempts=int(self.evolve_params.get("bt_max_attempts", 3)),
+        )
+        self._bt_snapshot_interval = int(self.evolve_params.get("bt_snapshot_interval", 50))
+        self._bt_last_map_id: int | None = None
+
         # Rebuild navigator with evolved params
         if self.evolve_params:
             self.navigator = Navigator(
@@ -492,29 +560,28 @@ def choose_overworld_action(self, state: OverworldState) -> str:
             return "left"  # sidestep to avoid door on return north
 
         # In Oak's lab with no Pokemon: walk to Pokeball table and pick one.
-        # Oak stands near (5,2) blocking north. Pressing A near him loops
-        # his dialogue. Going too far south triggers "Don't go away!"
-        # Strategy: A to dismiss text, down 1 to dodge Oak, right, up to table.
+        # Oak's Lab script (0xD5F1) tracks the cutscene state but we don't
+        # gate on it — the phases below handle all states by pressing B to
+        # dismiss dialogue and navigating to the Pokeball table.
+        # Pokeball sprites at (6,3)=Charmander, (7,3)=Squirtle, (8,3)=Bulbasaur.
+        # Interact from y=4 facing UP.
         if state.map_id == 40 and state.party_count == 0:
             lab_script = self.memory._read(0xD5F1)
+            if not hasattr(self, '_lab_turns'):
+                self._lab_turns = 0
+            self._lab_turns += 1
+
             if self.turn_count % 50 == 0:
                 self.log(f"LAB | script={lab_script} pos=({state.x},{state.y}) "
                          f"turn={self.turn_count}")
                 if self.turn_count % 200 == 0:
                     self.take_screenshot(f"lab_t{self.turn_count}", force=True)
 
-            if not hasattr(self, '_lab_turns'):
-                self._lab_turns = 0
-            self._lab_turns += 1
-
-            # Pokeball sprites are at (6,3), (7,3), (8,3) ON the table.
-            # Interact from y=4 facing UP, or y=2 facing DOWN.
-            # Simplest path: B(clear) → down to y=4 → right to x=6 → up+A
             if not hasattr(self, '_lab_phase'):
                 self._lab_phase = 0
 
             if self._lab_phase == 0:
-                # Dismiss Oak's text with B, then move south
+                # Dismiss text with B, move south to y=4 (interaction row)
                 if state.y >= 4:
                     self._lab_phase = 1
                     self.log(f"LAB | phase 0→1 south at ({state.x},{state.y})")
@@ -524,16 +591,15 @@ def choose_overworld_action(self, state: OverworldState) -> str:
                 return "down"
 
             elif self._lab_phase == 1:
-                # Go east to Pokeball column (x=6 = Charmander)
+                # Move east to x=6 (Charmander's Pokeball column)
                 if state.x >= 6:
                     self._lab_phase = 2
                     self.log(f"LAB | phase 1→2 at pokeball column ({state.x},{state.y})")
-                    return "up"  # face the table
+                    return "up"
                 return "right"
 
             else:
-                # Phase 2: face up toward Pokeball at (6,3) and press A
-                # Alternate up (to face table) and A (to interact)
+                # Phase 2: at Pokeball — face up and press A to interact
                 if self._lab_turns % 2 == 0:
                     return "up"
                 return "a"
@@ -690,6 +756,51 @@ def run_overworld(self):
         except Exception:
             pass  # game_wrapper may not be available in all contexts
 
+        # --- FLE backtracking ---
+        # Skip all backtracking while in Oak's Lab (map 40).
+        # The lab has multiple scripted sequences (picking starter, rival battle)
+        # that look "stuck" but are progressing.  Restoring mid-sequence
+        # undoes progress even after the player picks up a Pokemon.
+        in_oaks_lab = (state.map_id == 40)
+
+        # Snapshot on map change (skip in Oak's Lab)
+        if not in_oaks_lab:
+            if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id:
+                self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+        self._bt_last_map_id = state.map_id
+
+        # Periodic snapshot when making progress (skip in Oak's Lab)
+        if (not in_oaks_lab
+                and self._bt_snapshot_interval > 0
+                and self.turn_count > 0
+                and self.turn_count % self._bt_snapshot_interval == 0
+                and self.stuck_turns == 0):
+            last_snap = self.backtrack.snapshots[-1] if self.backtrack.snapshots else None
+            if (last_snap is None
+                    or last_snap.map_id != state.map_id
+                    or last_snap.x != state.x
+                    or last_snap.y != state.y):
+                self.backtrack.save_snapshot(self.pyboy, state, self.turn_count)
+
+        # Restore when stuck too long (skip in Oak's Lab)
+        if not in_oaks_lab and self.backtrack.should_restore(self.stuck_turns):
+            snap = self.backtrack.restore(self.pyboy)
+            if snap is not None:
+                self.stuck_turns = 0
+                self.recent_positions.clear()
+                # Reset script-gate flags so one-time sequences can re-trigger
+                for attr in ('_oak_wait_done', '_pallet_diag_done',
+                             '_house_diag_done', '_lab_phase', '_lab_turns',
+                             '_lab_exit_turns'):
+                    if hasattr(self, attr):
+                        delattr(self, attr)
+                state = self.memory.read_overworld_state()
+                self.log(
+                    f"BACKTRACK | Restored to turn {snap.turn} "
+                    f"map={snap.map_id} ({snap.x},{snap.y}) "
+                    f"attempt={snap.attempts}"
+                )
+
         # Diagnostic: capture screen and collision data at key positions
         if state.map_id == 37 and not hasattr(self, '_house_diag_done'):
             self._house_diag_done = True
@@ -737,7 +848,9 @@ def run_overworld(self):
 
         action = self.choose_overworld_action(state)
 
-        if action in {"up", "down", "left", "right"}:
+        if action == "wait":
+            self.controller.wait(30)
+        elif action in {"up", "down", "left", "right"}:
             self.controller.move(action)
         elif action == "b":
             self.controller.press("b", hold_frames=20, release_frames=12)
@@ -782,6 +895,7 @@ def compute_fitness(self) -> dict:
             "badges": final.badges,
             "party_size": final.party_count,
             "stuck_count": len([e for e in self.events if "STUCK" in e]),
+            "backtrack_restores": self.backtrack.total_restores,
         }
 
     def run(self, max_turns: int = 100_000):

diff --git a/scripts/evolve.py b/scripts/evolve.py
@@ -29,6 +29,10 @@
     "door_cooldown": 8,
     "waypoint_skip_distance": 3,
     "axis_preference_map_0": "y",
+    "bt_max_snapshots": 8,
+    "bt_restore_threshold": 15,
+    "bt_max_attempts": 3,
+    "bt_snapshot_interval": 50,
 }
 
 
@@ -57,6 +61,7 @@ def score(fitness: dict) -> float:
         + fitness.get("battles_won", 0) * 10
         - fitness.get("stuck_count", 0) * 5
         - fitness.get("turns", 0) * 0.1
+        - fitness.get("backtrack_restores", 0) * 2
     )
 
 
@@ -142,6 +147,10 @@ def build_mutation_prompt(
 - door_cooldown: frames to walk away from a door after exiting (int, 4-16)
 - waypoint_skip_distance: max Manhattan distance to skip a waypoint when stuck (int, 1-8)
 - axis_preference_map_0: preferred movement axis on Pallet Town map ("x" or "y")
+- bt_max_snapshots: max number of backtrack snapshots to keep (int, 2-16)
+- bt_restore_threshold: stuck turns before restoring a snapshot (int, 8-30)
+- bt_max_attempts: max times to retry from the same snapshot (int, 1-5)
+- bt_snapshot_interval: turns between periodic snapshots when not stuck (int, 20-100)
 
 Propose ONE set of modified parameters to improve the score. Focus on reducing
 stuck_count and increasing maps_visited. Return ONLY valid JSON with the same
@@ -270,7 +279,11 @@ def _perturb(params: dict) -> dict:
     import random
 
     new = dict(params)
-    key = random.choice(["stuck_threshold", "door_cooldown", "waypoint_skip_distance"])
+    key = random.choice([
+        "stuck_threshold", "door_cooldown", "waypoint_skip_distance",
+        "bt_max_snapshots", "bt_restore_threshold", "bt_max_attempts",
+        "bt_snapshot_interval",
+    ])
     delta = random.choice([-2, -1, 1, 2])
     new[key] = max(1, new[key] + delta)
     # Randomly flip axis preference