From f7b8daa072d57c665ce6822ce9a227ca5f4e45a3 Mon Sep 17 00:00:00 2001 From: Brian Douglas Date: Tue, 10 Mar 2026 06:56:16 -0700 Subject: [PATCH 1/5] Add FLE-style backtracking with AlphaEvolve integration BacktrackManager saves/restores game state via PyBoy save_state/load_state to escape stuck navigation on Route 1. Snapshots on map change and periodically; restores when stuck_turns exceeds threshold. Four new evolvable params (bt_max_snapshots, bt_restore_threshold, bt_max_attempts, bt_snapshot_interval) flow through evolve.py and run_10_agents.py with two new variants: aggressive_bt and no_bt. --- scripts/agent.py | 95 ++++++++++++++ scripts/evolve.py | 15 ++- scripts/run_10_agents.py | 40 ++++-- tests/test_agent.py | 253 ++++++++++++++++++++++++++++++++++++ tests/test_evolve.py | 41 +++++- tests/test_run_10_agents.py | 10 +- 6 files changed, 435 insertions(+), 19 deletions(-) diff --git a/scripts/agent.py b/scripts/agent.py index 7bee853..3a0f073 100644 --- a/scripts/agent.py +++ b/scripts/agent.py @@ -10,10 +10,13 @@ """ import argparse +import io import json import sys import time import os +from collections import deque +from dataclasses import dataclass, field from pathlib import Path try: @@ -332,6 +335,62 @@ def next_direction(self, state: OverworldState, turn: int = 0, stuck_turns: int return self._direction_toward_target(state, tx, ty, stuck_turns=stuck_turns) +# --------------------------------------------------------------------------- +# FLE-style backtracking +# --------------------------------------------------------------------------- + + +@dataclass +class Snapshot: + """A saved game state for backtracking.""" + + state_bytes: io.BytesIO + map_id: int + x: int + y: int + turn: int + attempts: int = 0 + + +class BacktrackManager: + """Save/restore game state to escape stuck navigation.""" + + def __init__(self, max_snapshots: int = 8, restore_threshold: int = 15, max_attempts: int = 3): + self.snapshots: deque[Snapshot] = deque(maxlen=max_snapshots) + self.max_snapshots = max_snapshots + self.restore_threshold = restore_threshold + self.max_attempts = max_attempts + self.total_restores = 0 + + def save_snapshot(self, pyboy, state: OverworldState, turn: int): + """Capture current game state into an in-memory snapshot.""" + buf = io.BytesIO() + pyboy.save_state(buf) + buf.seek(0) + self.snapshots.append(Snapshot(buf, state.map_id, state.x, state.y, turn)) + + def should_restore(self, stuck_turns: int) -> bool: + """Check if we should restore a snapshot based on stuck duration.""" + if stuck_turns < self.restore_threshold or not self.snapshots: + return False + return any(s.attempts < self.max_attempts for s in self.snapshots) + + def restore(self, pyboy) -> Snapshot | None: + """Restore the most recent viable snapshot. Returns it or None.""" + for i in range(len(self.snapshots) - 1, -1, -1): + snap = self.snapshots[i] + if snap.attempts < self.max_attempts: + del self.snapshots[i] + snap.state_bytes.seek(0) + pyboy.load_state(snap.state_bytes) + snap.attempts += 1 + self.total_restores += 1 + if snap.attempts < self.max_attempts: + self.snapshots.append(snap) # keep for more attempts + return snap + return None + + # --------------------------------------------------------------------------- # Strategy engine # --------------------------------------------------------------------------- @@ -417,6 +476,15 @@ def __init__(self, rom_path: str, strategy: str = "low", screenshots: bool = Fal else: self._evolve_door_cooldown = 8 + # Backtracking support (FLE-style) + self.backtrack = BacktrackManager( + max_snapshots=int(self.evolve_params.get("bt_max_snapshots", 8)), + restore_threshold=int(self.evolve_params.get("bt_restore_threshold", 15)), + max_attempts=int(self.evolve_params.get("bt_max_attempts", 3)), + ) + self._bt_snapshot_interval = int(self.evolve_params.get("bt_snapshot_interval", 50)) + self._bt_last_map_id: int | None = None + # Rebuild navigator with evolved params if self.evolve_params: self.navigator = Navigator( @@ -690,6 +758,32 @@ def run_overworld(self): except Exception: pass # game_wrapper may not be available in all contexts + # --- FLE backtracking --- + # Snapshot on map change + if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id: + self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) + self._bt_last_map_id = state.map_id + + # Periodic snapshot when not stuck + if (self._bt_snapshot_interval > 0 + and self.turn_count > 0 + and self.turn_count % self._bt_snapshot_interval == 0 + and self.stuck_turns == 0): + self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) + + # Restore when stuck too long + if self.backtrack.should_restore(self.stuck_turns): + snap = self.backtrack.restore(self.pyboy) + if snap is not None: + self.stuck_turns = 0 + self.recent_positions.clear() + state = self.memory.read_overworld_state() + self.log( + f"BACKTRACK | Restored to turn {snap.turn} " + f"map={snap.map_id} ({snap.x},{snap.y}) " + f"attempt={snap.attempts}" + ) + # Diagnostic: capture screen and collision data at key positions if state.map_id == 37 and not hasattr(self, '_house_diag_done'): self._house_diag_done = True @@ -782,6 +876,7 @@ def compute_fitness(self) -> dict: "badges": final.badges, "party_size": final.party_count, "stuck_count": len([e for e in self.events if "STUCK" in e]), + "backtrack_restores": self.backtrack.total_restores, } def run(self, max_turns: int = 100_000): diff --git a/scripts/evolve.py b/scripts/evolve.py index 3b6d97d..9c94cf3 100644 --- a/scripts/evolve.py +++ b/scripts/evolve.py @@ -29,6 +29,10 @@ "door_cooldown": 8, "waypoint_skip_distance": 3, "axis_preference_map_0": "y", + "bt_max_snapshots": 8, + "bt_restore_threshold": 15, + "bt_max_attempts": 3, + "bt_snapshot_interval": 50, } @@ -57,6 +61,7 @@ def score(fitness: dict) -> float: + fitness.get("battles_won", 0) * 10 - fitness.get("stuck_count", 0) * 5 - fitness.get("turns", 0) * 0.1 + - fitness.get("backtrack_restores", 0) * 2 ) @@ -142,6 +147,10 @@ def build_mutation_prompt( - door_cooldown: frames to walk away from a door after exiting (int, 4-16) - waypoint_skip_distance: max Manhattan distance to skip a waypoint when stuck (int, 1-8) - axis_preference_map_0: preferred movement axis on Pallet Town map ("x" or "y") +- bt_max_snapshots: max number of backtrack snapshots to keep (int, 2-16) +- bt_restore_threshold: stuck turns before restoring a snapshot (int, 8-30) +- bt_max_attempts: max times to retry from the same snapshot (int, 1-5) +- bt_snapshot_interval: turns between periodic snapshots when not stuck (int, 20-100) Propose ONE set of modified parameters to improve the score. Focus on reducing stuck_count and increasing maps_visited. Return ONLY valid JSON with the same @@ -270,7 +279,11 @@ def _perturb(params: dict) -> dict: import random new = dict(params) - key = random.choice(["stuck_threshold", "door_cooldown", "waypoint_skip_distance"]) + key = random.choice([ + "stuck_threshold", "door_cooldown", "waypoint_skip_distance", + "bt_max_snapshots", "bt_restore_threshold", "bt_max_attempts", + "bt_snapshot_interval", + ]) delta = random.choice([-2, -1, 1, 2]) new[key] = max(1, new[key] + delta) # Randomly flip axis preference diff --git a/scripts/run_10_agents.py b/scripts/run_10_agents.py index 6ae80b1..edb75ff 100644 --- a/scripts/run_10_agents.py +++ b/scripts/run_10_agents.py @@ -22,37 +22,52 @@ # 10 parameter variants to try — tuned for reaching rival battle # Previous winner: door_cooldown=4 beat baseline for Pokemon selection +_BT_DEFAULTS = { + "bt_max_snapshots": 8, "bt_restore_threshold": 15, + "bt_max_attempts": 3, "bt_snapshot_interval": 50, +} + PARAM_VARIANTS = [ # Baseline (previous winner door_cooldown=4) {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3, - "axis_preference_map_0": "y", "label": "baseline_4dc"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "baseline_4dc"}, # Original defaults {"stuck_threshold": 8, "door_cooldown": 8, "waypoint_skip_distance": 3, - "axis_preference_map_0": "y", "label": "original"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "original"}, # Very short door cooldown {"stuck_threshold": 8, "door_cooldown": 2, "waypoint_skip_distance": 3, - "axis_preference_map_0": "y", "label": "dc2"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "dc2"}, # Low stuck + short door {"stuck_threshold": 4, "door_cooldown": 4, "waypoint_skip_distance": 3, - "axis_preference_map_0": "y", "label": "low_stuck_dc4"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "low_stuck_dc4"}, # High stuck + short door {"stuck_threshold": 12, "door_cooldown": 4, "waypoint_skip_distance": 3, - "axis_preference_map_0": "y", "label": "high_stuck_dc4"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "high_stuck_dc4"}, # Wide skip + short door {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 6, - "axis_preference_map_0": "y", "label": "wide_skip_dc4"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "wide_skip_dc4"}, # Narrow skip + short door {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 1, - "axis_preference_map_0": "y", "label": "narrow_dc4"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "narrow_dc4"}, # X-axis + short door {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3, - "axis_preference_map_0": "x", "label": "x_axis_dc4"}, + "axis_preference_map_0": "x", **_BT_DEFAULTS, "label": "x_axis_dc4"}, # Aggressive: low stuck + very short door + wide skip {"stuck_threshold": 3, "door_cooldown": 2, "waypoint_skip_distance": 5, - "axis_preference_map_0": "y", "label": "aggressive"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "aggressive"}, # Moderate: medium stuck + short door {"stuck_threshold": 6, "door_cooldown": 6, "waypoint_skip_distance": 4, - "axis_preference_map_0": "y", "label": "moderate"}, + "axis_preference_map_0": "y", **_BT_DEFAULTS, "label": "moderate"}, + # Aggressive backtracking: low restore threshold, high retries + {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3, + "axis_preference_map_0": "y", "bt_max_snapshots": 8, + "bt_restore_threshold": 10, "bt_max_attempts": 5, + "bt_snapshot_interval": 50, "label": "aggressive_bt"}, + # Backtracking disabled + {"stuck_threshold": 8, "door_cooldown": 4, "waypoint_skip_distance": 3, + "axis_preference_map_0": "y", "bt_max_snapshots": 0, + "bt_restore_threshold": 999, "bt_max_attempts": 3, + "bt_snapshot_interval": 50, "label": "no_bt"}, ] MAX_TURNS = 5000 # Intro + Pokemon selection + rival scripted sequence + battle + exit @@ -67,6 +82,7 @@ def score(fitness: dict) -> float: + fitness.get("battles_won", 0) * 10 - fitness.get("stuck_count", 0) * 5 - fitness.get("turns", 0) * 0.1 + - fitness.get("backtrack_restores", 0) * 2 ) @@ -137,7 +153,7 @@ def main(): print(f"ROM not found: {rom_path}") sys.exit(1) - print(f"[run_10] Launching 10 agents with {MAX_TURNS} max turns each") + print(f"[run_10] Launching {len(PARAM_VARIANTS)} agents with {MAX_TURNS} max turns each") print(f"[run_10] ROM: {rom_path}") print(f"[run_10] Running 5 at a time...\n") @@ -174,7 +190,7 @@ def main(): all_results.sort(key=lambda r: r["score"], reverse=True) print(f"\n{'='*70}") - print(f"[run_10] All 10 agents complete in {total_time:.1f}s") + print(f"[run_10] All {len(all_results)} agents complete in {total_time:.1f}s") print(f"{'='*70}\n") print(f"{'Rank':>4} {'Label':14s} {'Score':>8} {'Map':>4} {'Party':>5} " f"{'Stuck':>5} {'Turns':>5} {'Time':>6}") diff --git a/tests/test_agent.py b/tests/test_agent.py index bdefd0b..1096735 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -1,6 +1,7 @@ """Comprehensive tests for agent.py — targeting 100% line coverage.""" import importlib +import io import json import os import runpy @@ -25,6 +26,8 @@ GameController, BattleStrategy, Navigator, + Snapshot, + BacktrackManager, StrategyEngine, PokemonAgent, main, @@ -552,6 +555,256 @@ def _make_agent(tmp_path, screenshots=False, routes=None, type_chart_data=None): return ag +# =================================================================== +# BacktrackManager tests +# =================================================================== + + +class TestBacktrackManager: + """Tests for Snapshot dataclass and BacktrackManager.""" + + def test_snapshot_defaults(self): + buf = io.BytesIO(b"state") + snap = Snapshot(state_bytes=buf, map_id=1, x=5, y=10, turn=42) + assert snap.attempts == 0 + assert snap.map_id == 1 + assert snap.turn == 42 + + def test_init_defaults(self): + bm = BacktrackManager() + assert bm.max_snapshots == 8 + assert bm.restore_threshold == 15 + assert bm.max_attempts == 3 + assert bm.total_restores == 0 + assert len(bm.snapshots) == 0 + + def test_init_custom(self): + bm = BacktrackManager(max_snapshots=4, restore_threshold=10, max_attempts=5) + assert bm.max_snapshots == 4 + assert bm.restore_threshold == 10 + assert bm.max_attempts == 5 + + def test_save_snapshot(self): + bm = BacktrackManager(max_snapshots=3) + mock_pyboy = MagicMock() + state = OverworldState(map_id=1, x=5, y=10) + + bm.save_snapshot(mock_pyboy, state, turn=10) + assert len(bm.snapshots) == 1 + assert bm.snapshots[0].map_id == 1 + assert bm.snapshots[0].x == 5 + assert bm.snapshots[0].y == 10 + assert bm.snapshots[0].turn == 10 + mock_pyboy.save_state.assert_called_once() + + def test_save_snapshot_deque_bounds(self): + bm = BacktrackManager(max_snapshots=2) + mock_pyboy = MagicMock() + for i in range(5): + state = OverworldState(map_id=i, x=i, y=i) + bm.save_snapshot(mock_pyboy, state, turn=i) + assert len(bm.snapshots) == 2 + # Oldest snapshots should have been evicted + assert bm.snapshots[0].map_id == 3 + assert bm.snapshots[1].map_id == 4 + + def test_should_restore_below_threshold(self): + bm = BacktrackManager(restore_threshold=15) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0) + assert bm.should_restore(14) is False + + def test_should_restore_no_snapshots(self): + bm = BacktrackManager(restore_threshold=5) + assert bm.should_restore(10) is False + + def test_should_restore_all_exhausted(self): + bm = BacktrackManager(restore_threshold=5, max_attempts=1) + snap = Snapshot(io.BytesIO(b"x"), map_id=0, x=0, y=0, turn=0, attempts=1) + bm.snapshots.append(snap) + assert bm.should_restore(10) is False + + def test_should_restore_viable(self): + bm = BacktrackManager(restore_threshold=5, max_attempts=3) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0) + assert bm.should_restore(5) is True + + def test_restore_loads_state(self): + bm = BacktrackManager(max_attempts=3) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=3, y=7), turn=20) + + snap = bm.restore(mock_pyboy) + assert snap is not None + assert snap.map_id == 1 + assert snap.x == 3 + assert snap.y == 7 + assert snap.turn == 20 + assert snap.attempts == 1 + assert bm.total_restores == 1 + mock_pyboy.load_state.assert_called_once() + + def test_restore_keeps_snapshot_if_attempts_remain(self): + bm = BacktrackManager(max_attempts=3) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=0, y=0), turn=10) + + bm.restore(mock_pyboy) + # Snapshot re-appended with attempts=1 + assert len(bm.snapshots) == 1 + assert bm.snapshots[0].attempts == 1 + + def test_restore_removes_snapshot_at_max_attempts(self): + bm = BacktrackManager(max_attempts=1) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=0, y=0), turn=10) + + snap = bm.restore(mock_pyboy) + assert snap is not None + assert snap.attempts == 1 + # Not re-appended since attempts == max_attempts + assert len(bm.snapshots) == 0 + + def test_restore_none_when_all_exhausted(self): + bm = BacktrackManager(max_attempts=1) + snap = Snapshot(io.BytesIO(b"x"), map_id=0, x=0, y=0, turn=0, attempts=1) + bm.snapshots.append(snap) + + mock_pyboy = MagicMock() + result = bm.restore(mock_pyboy) + assert result is None + assert bm.total_restores == 0 + + def test_restore_picks_most_recent_viable(self): + bm = BacktrackManager(max_attempts=2) + # First snapshot exhausted + exhausted = Snapshot(io.BytesIO(b"old"), map_id=0, x=0, y=0, turn=5, attempts=2) + bm.snapshots.append(exhausted) + # Second snapshot viable + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=1, x=3, y=3), turn=15) + + snap = bm.restore(mock_pyboy) + assert snap is not None + assert snap.map_id == 1 + assert snap.turn == 15 + + def test_total_restores_accumulates(self): + bm = BacktrackManager(max_attempts=5) + mock_pyboy = MagicMock() + bm.save_snapshot(mock_pyboy, OverworldState(map_id=0, x=0, y=0), turn=0) + + bm.restore(mock_pyboy) + bm.restore(mock_pyboy) + assert bm.total_restores == 2 + + +class TestBacktrackIntegration: + """Test BacktrackManager integration with PokemonAgent.""" + + def test_agent_has_backtrack_manager(self, tmp_path): + ag = _make_agent(tmp_path) + assert hasattr(ag, "backtrack") + assert isinstance(ag.backtrack, BacktrackManager) + + def test_agent_backtrack_defaults(self, tmp_path): + ag = _make_agent(tmp_path) + assert ag.backtrack.max_snapshots == 8 + assert ag.backtrack.restore_threshold == 15 + assert ag.backtrack.max_attempts == 3 + assert ag._bt_snapshot_interval == 50 + + def test_evolve_params_flow_to_backtrack(self, tmp_path): + params = { + "stuck_threshold": 8, "door_cooldown": 8, + "waypoint_skip_distance": 3, "axis_preference_map_0": "y", + "bt_max_snapshots": 4, "bt_restore_threshold": 10, + "bt_max_attempts": 5, "bt_snapshot_interval": 25, + } + ag = _make_agent_with_evolve(tmp_path, evolve_params=params) + assert ag.backtrack.max_snapshots == 4 + assert ag.backtrack.restore_threshold == 10 + assert ag.backtrack.max_attempts == 5 + assert ag._bt_snapshot_interval == 25 + + def test_snapshot_on_map_change(self, tmp_path): + ag = _make_agent(tmp_path) + state1 = OverworldState(map_id=0, x=5, y=5) + state2 = OverworldState(map_id=1, x=3, y=3) + + ag.memory.read_overworld_state = MagicMock(return_value=state1) + ag._bt_last_map_id = 0 # set previous map + ag.run_overworld() + + # No map change yet + initial_count = len(ag.backtrack.snapshots) + + ag._bt_last_map_id = 0 + ag.memory.read_overworld_state = MagicMock(return_value=state2) + ag.run_overworld() + + # Map changed from 0 -> 1, should have saved a snapshot + assert len(ag.backtrack.snapshots) > initial_count + + def test_periodic_snapshot(self, tmp_path): + ag = _make_agent(tmp_path) + ag._bt_snapshot_interval = 5 + state = OverworldState(map_id=0, x=5, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag._bt_last_map_id = 0 + ag.stuck_turns = 0 + + # Run until turn_count hits the interval + for _ in range(6): + ag.turn_count += 1 + if ag.turn_count % ag._bt_snapshot_interval == 0 and ag.stuck_turns == 0: + ag.backtrack.save_snapshot(ag.pyboy, state, ag.turn_count) + + assert len(ag.backtrack.snapshots) == 1 + + def test_restore_on_stuck(self, tmp_path): + ag = _make_agent(tmp_path) + ag.backtrack.restore_threshold = 3 + state = OverworldState(map_id=0, x=5, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + + # Save a snapshot manually + ag.backtrack.save_snapshot(ag.pyboy, state, turn=0) + ag._bt_last_map_id = 0 + + # Simulate being stuck + ag.stuck_turns = 3 + ag.run_overworld() + + # Should have restored + assert ag.backtrack.total_restores == 1 + assert ag.stuck_turns == 0 + + def test_compute_fitness_includes_backtrack_restores(self, tmp_path): + ag = _make_agent(tmp_path) + ag.backtrack.total_restores = 7 + ag.memory.read_overworld_state = MagicMock( + return_value=OverworldState(map_id=0, x=0, y=0) + ) + fitness = ag.compute_fitness() + assert fitness["backtrack_restores"] == 7 + + def test_backtrack_event_logged(self, tmp_path): + ag = _make_agent(tmp_path) + ag.backtrack.restore_threshold = 1 + state = OverworldState(map_id=0, x=5, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag.backtrack.save_snapshot(ag.pyboy, state, turn=0) + ag._bt_last_map_id = 0 + ag.stuck_turns = 1 + + ag.run_overworld() + + backtrack_events = [e for e in ag.events if "BACKTRACK" in e] + assert len(backtrack_events) == 1 + + # =================================================================== # StrategyEngine tests # =================================================================== diff --git a/tests/test_evolve.py b/tests/test_evolve.py index 012bd59..c65eaf0 100644 --- a/tests/test_evolve.py +++ b/tests/test_evolve.py @@ -45,6 +45,10 @@ def test_keys(self): assert "door_cooldown" in DEFAULT_PARAMS assert "waypoint_skip_distance" in DEFAULT_PARAMS assert "axis_preference_map_0" in DEFAULT_PARAMS + assert "bt_max_snapshots" in DEFAULT_PARAMS + assert "bt_restore_threshold" in DEFAULT_PARAMS + assert "bt_max_attempts" in DEFAULT_PARAMS + assert "bt_snapshot_interval" in DEFAULT_PARAMS # ── score() ──────────────────────────────────────────────────────────── @@ -84,6 +88,15 @@ def test_high_stuck_penalizes(self): stuck = dict(base, stuck_count=100) assert score(stuck) < score(base) + def test_backtrack_restores_penalizes(self): + base = {"final_map_id": 1, "badges": 0, "party_size": 0, + "battles_won": 0, "stuck_count": 0, "turns": 0, + "backtrack_restores": 0} + with_bt = dict(base, backtrack_restores=10) + assert score(with_bt) < score(base) + # Penalty is -2 per restore + assert score(base) - score(with_bt) == 20 + # ── run_agent() ──────────────────────────────────────────────────────── @@ -179,6 +192,13 @@ def test_includes_params_and_fitness(self): assert "stuck_threshold" in prompt assert '"turns": 100' in prompt + def test_includes_bt_descriptions(self): + prompt = build_mutation_prompt(DEFAULT_PARAMS, {}) + assert "bt_max_snapshots" in prompt + assert "bt_restore_threshold" in prompt + assert "bt_max_attempts" in prompt + assert "bt_snapshot_interval" in prompt + def test_includes_observations(self): obs = [{"priority": "important", "content": "Tool error: boom"}] prompt = build_mutation_prompt(DEFAULT_PARAMS, {}, obs) @@ -237,12 +257,29 @@ def test_minimum_value_clamp(self): import random random.seed(0) params = dict(DEFAULT_PARAMS, stuck_threshold=1, door_cooldown=1, - waypoint_skip_distance=1) + waypoint_skip_distance=1, bt_max_snapshots=1, + bt_restore_threshold=1, bt_max_attempts=1, + bt_snapshot_interval=1) for _ in range(50): result = _perturb(params) - for key in ("stuck_threshold", "door_cooldown", "waypoint_skip_distance"): + for key in ("stuck_threshold", "door_cooldown", "waypoint_skip_distance", + "bt_max_snapshots", "bt_restore_threshold", + "bt_max_attempts", "bt_snapshot_interval"): assert result[key] >= 1 + def test_can_perturb_bt_keys(self): + """bt_* keys should be reachable by perturbation.""" + import random + random.seed(123) + bt_changed = set() + for _ in range(200): + result = _perturb(DEFAULT_PARAMS) + for key in ("bt_max_snapshots", "bt_restore_threshold", + "bt_max_attempts", "bt_snapshot_interval"): + if result[key] != DEFAULT_PARAMS[key]: + bt_changed.add(key) + assert len(bt_changed) > 0 + # ── evolve() ─────────────────────────────────────────────────────────── diff --git a/tests/test_run_10_agents.py b/tests/test_run_10_agents.py index 4d86cdf..8842953 100644 --- a/tests/test_run_10_agents.py +++ b/tests/test_run_10_agents.py @@ -25,12 +25,14 @@ class TestParamVariants: - def test_has_10_variants(self): - assert len(PARAM_VARIANTS) == 10 + def test_has_12_variants(self): + assert len(PARAM_VARIANTS) == 12 def test_all_variants_have_required_keys(self): required = {"stuck_threshold", "door_cooldown", "waypoint_skip_distance", - "axis_preference_map_0", "label"} + "axis_preference_map_0", "label", + "bt_max_snapshots", "bt_restore_threshold", + "bt_max_attempts", "bt_snapshot_interval"} for i, variant in enumerate(PARAM_VARIANTS): missing = required - set(variant.keys()) assert not missing, f"Variant {i} ({variant.get('label', '?')}) missing: {missing}" @@ -240,7 +242,7 @@ def mock_run_one_agent(rom_path, params, agent_id): saved = tmp_path / "pokedex" / "evolve_results.json" assert saved.exists() data = json.loads(saved.read_text()) - assert len(data) == 10 + assert len(data) == len(PARAM_VARIANTS) def test_error_result_shows_fail(self, tmp_path, capsys): rom = tmp_path / "test.gb" From 29210813dd610befb4d65518f25967ea1f52a26d Mon Sep 17 00:00:00 2001 From: Brian Douglas Date: Tue, 10 Mar 2026 07:16:23 -0700 Subject: [PATCH 2/5] Remove unused field import and deduplicate score() - Remove unused `field` import from dataclasses in agent.py - Import `score()` from evolve.py in run_10_agents.py instead of duplicating it --- scripts/agent.py | 2 +- scripts/run_10_agents.py | 16 +++------------- tests/test_run_10_agents.py | 2 +- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/scripts/agent.py b/scripts/agent.py index 3a0f073..cfb6414 100644 --- a/scripts/agent.py +++ b/scripts/agent.py @@ -16,7 +16,7 @@ import time import os from collections import deque -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path try: diff --git a/scripts/run_10_agents.py b/scripts/run_10_agents.py index edb75ff..ee95af1 100644 --- a/scripts/run_10_agents.py +++ b/scripts/run_10_agents.py @@ -20,6 +20,9 @@ SCRIPT_DIR = Path(__file__).parent AGENT_SCRIPT = SCRIPT_DIR / "agent.py" +# Re-use the canonical scoring function from evolve.py +from evolve import score + # 10 parameter variants to try — tuned for reaching rival battle # Previous winner: door_cooldown=4 beat baseline for Pokemon selection _BT_DEFAULTS = { @@ -73,19 +76,6 @@ MAX_TURNS = 5000 # Intro + Pokemon selection + rival scripted sequence + battle + exit -def score(fitness: dict) -> float: - """Composite fitness score.""" - return ( - fitness.get("final_map_id", 0) * 1000 - + fitness.get("badges", 0) * 5000 - + fitness.get("party_size", 0) * 500 - + fitness.get("battles_won", 0) * 10 - - fitness.get("stuck_count", 0) * 5 - - fitness.get("turns", 0) * 0.1 - - fitness.get("backtrack_restores", 0) * 2 - ) - - def run_one_agent(rom_path: str, params: dict, agent_id: int) -> dict: """Run a single agent and return results.""" label = params.get("label", f"agent_{agent_id}") diff --git a/tests/test_run_10_agents.py b/tests/test_run_10_agents.py index 8842953..66347a6 100644 --- a/tests/test_run_10_agents.py +++ b/tests/test_run_10_agents.py @@ -15,7 +15,7 @@ from run_10_agents import ( PARAM_VARIANTS, MAX_TURNS, - score, + score, # re-exported from evolve run_one_agent, main, ) From e6ec35b7d4c4d14ffcce1c8198f1a66304cd735e Mon Sep 17 00:00:00 2001 From: Brian Douglas Date: Tue, 10 Mar 2026 07:45:33 -0700 Subject: [PATCH 3/5] Fix backtrack restore: reset script-gate flags, skip duplicate snapshots - Reset _oak_wait_done, _pallet_diag_done, _house_diag_done, _lab_phase, _lab_turns, _lab_exit_turns on backtrack restore so one-time game sequences (Oak encounter, lab phases) can re-trigger after restore - Skip periodic snapshots when position matches the last snapshot to avoid poisoning the pool with stuck-adjacent positions --- scripts/agent.py | 16 ++++++++++++++-- tests/test_agent.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/scripts/agent.py b/scripts/agent.py index cfb6414..720a5e9 100644 --- a/scripts/agent.py +++ b/scripts/agent.py @@ -764,12 +764,18 @@ def run_overworld(self): self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) self._bt_last_map_id = state.map_id - # Periodic snapshot when not stuck + # Periodic snapshot when making progress (not stuck, and position + # differs from last snapshot to avoid poisoning the pool) if (self._bt_snapshot_interval > 0 and self.turn_count > 0 and self.turn_count % self._bt_snapshot_interval == 0 and self.stuck_turns == 0): - self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) + last_snap = self.backtrack.snapshots[-1] if self.backtrack.snapshots else None + if (last_snap is None + or last_snap.map_id != state.map_id + or last_snap.x != state.x + or last_snap.y != state.y): + self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) # Restore when stuck too long if self.backtrack.should_restore(self.stuck_turns): @@ -777,6 +783,12 @@ def run_overworld(self): if snap is not None: self.stuck_turns = 0 self.recent_positions.clear() + # Reset script-gate flags so one-time sequences can re-trigger + for attr in ('_oak_wait_done', '_pallet_diag_done', + '_house_diag_done', '_lab_phase', '_lab_turns', + '_lab_exit_turns'): + if hasattr(self, attr): + delattr(self, attr) state = self.memory.read_overworld_state() self.log( f"BACKTRACK | Restored to turn {snap.turn} " diff --git a/tests/test_agent.py b/tests/test_agent.py index 1096735..2840936 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -804,6 +804,45 @@ def test_backtrack_event_logged(self, tmp_path): backtrack_events = [e for e in ag.events if "BACKTRACK" in e] assert len(backtrack_events) == 1 + def test_restore_resets_script_gate_flags(self, tmp_path): + ag = _make_agent(tmp_path) + ag.backtrack.restore_threshold = 1 + state = OverworldState(map_id=0, x=5, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag.backtrack.save_snapshot(ag.pyboy, state, turn=0) + ag._bt_last_map_id = 0 + ag.stuck_turns = 1 + + # Set flags that should be cleared on restore + ag._oak_wait_done = True + ag._pallet_diag_done = True + ag._house_diag_done = True + + ag.run_overworld() + + assert not hasattr(ag, '_oak_wait_done') + assert not hasattr(ag, '_pallet_diag_done') + assert not hasattr(ag, '_house_diag_done') + + def test_periodic_snapshot_skips_duplicate_position(self, tmp_path): + ag = _make_agent(tmp_path) + ag._bt_snapshot_interval = 1 # every turn + state = OverworldState(map_id=0, x=5, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag._bt_last_map_id = 0 + ag.stuck_turns = 0 + + # First overworld call at turn 1 should snapshot + ag.turn_count = 1 + ag.run_overworld() + assert len(ag.backtrack.snapshots) == 1 + + # Second call at same position should NOT add another + ag.turn_count = 2 + ag.stuck_turns = 0 + ag.run_overworld() + assert len(ag.backtrack.snapshots) == 1 + # =================================================================== # StrategyEngine tests From 032e129a54a3ecaf58b716ad7b2e24a9a009ad95 Mon Sep 17 00:00:00 2001 From: Brian Douglas Date: Tue, 10 Mar 2026 08:25:07 -0700 Subject: [PATCH 4/5] Fix backtrack guard in Oak's Lab to prevent undoing Charmander pickup The backtrack guard checked `map_id == 40 AND party_count == 0`, but party_count changes to 1 the moment the agent picks up Charmander. This allowed backtracking to fire immediately after the pickup, wiping out progress. Change guard to `map_id == 40` (entire lab is protected). Also revert Oak trigger to PR #10's proven brute-force approach (4 rounds of mash_a + wait) instead of script-state-aware gating that read 0xD5F1 while still on Pallet Town map where the address is meaningless. ROM test confirms: agent picks Charmander, wins rival battle, exits lab. --- scripts/agent.py | 55 ++++++++++++++++-------------- tests/test_agent.py | 81 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 102 insertions(+), 34 deletions(-) diff --git a/scripts/agent.py b/scripts/agent.py index 720a5e9..9f1fa35 100644 --- a/scripts/agent.py +++ b/scripts/agent.py @@ -560,29 +560,28 @@ def choose_overworld_action(self, state: OverworldState) -> str: return "left" # sidestep to avoid door on return north # In Oak's lab with no Pokemon: walk to Pokeball table and pick one. - # Oak stands near (5,2) blocking north. Pressing A near him loops - # his dialogue. Going too far south triggers "Don't go away!" - # Strategy: A to dismiss text, down 1 to dodge Oak, right, up to table. + # Oak's Lab script (0xD5F1) tracks the cutscene state but we don't + # gate on it — the phases below handle all states by pressing B to + # dismiss dialogue and navigating to the Pokeball table. + # Pokeball sprites at (6,3)=Charmander, (7,3)=Squirtle, (8,3)=Bulbasaur. + # Interact from y=4 facing UP. if state.map_id == 40 and state.party_count == 0: lab_script = self.memory._read(0xD5F1) + if not hasattr(self, '_lab_turns'): + self._lab_turns = 0 + self._lab_turns += 1 + if self.turn_count % 50 == 0: self.log(f"LAB | script={lab_script} pos=({state.x},{state.y}) " f"turn={self.turn_count}") if self.turn_count % 200 == 0: self.take_screenshot(f"lab_t{self.turn_count}", force=True) - if not hasattr(self, '_lab_turns'): - self._lab_turns = 0 - self._lab_turns += 1 - - # Pokeball sprites are at (6,3), (7,3), (8,3) ON the table. - # Interact from y=4 facing UP, or y=2 facing DOWN. - # Simplest path: B(clear) → down to y=4 → right to x=6 → up+A if not hasattr(self, '_lab_phase'): self._lab_phase = 0 if self._lab_phase == 0: - # Dismiss Oak's text with B, then move south + # Dismiss text with B, move south to y=4 (interaction row) if state.y >= 4: self._lab_phase = 1 self.log(f"LAB | phase 0→1 south at ({state.x},{state.y})") @@ -592,16 +591,15 @@ def choose_overworld_action(self, state: OverworldState) -> str: return "down" elif self._lab_phase == 1: - # Go east to Pokeball column (x=6 = Charmander) + # Move east to x=6 (Charmander's Pokeball column) if state.x >= 6: self._lab_phase = 2 self.log(f"LAB | phase 1→2 at pokeball column ({state.x},{state.y})") - return "up" # face the table + return "up" return "right" else: - # Phase 2: face up toward Pokeball at (6,3) and press A - # Alternate up (to face table) and A (to interact) + # Phase 2: at Pokeball — face up and press A to interact if self._lab_turns % 2 == 0: return "up" return "a" @@ -759,14 +757,21 @@ def run_overworld(self): pass # game_wrapper may not be available in all contexts # --- FLE backtracking --- - # Snapshot on map change - if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id: - self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) + # Skip all backtracking while in Oak's Lab (map 40). + # The lab has multiple scripted sequences (picking starter, rival battle) + # that look "stuck" but are progressing. Restoring mid-sequence + # undoes progress even after the player picks up a Pokemon. + in_oaks_lab = (state.map_id == 40) + + # Snapshot on map change (skip in Oak's Lab) + if not in_oaks_lab: + if self._bt_last_map_id is not None and state.map_id != self._bt_last_map_id: + self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) self._bt_last_map_id = state.map_id - # Periodic snapshot when making progress (not stuck, and position - # differs from last snapshot to avoid poisoning the pool) - if (self._bt_snapshot_interval > 0 + # Periodic snapshot when making progress (skip in Oak's Lab) + if (not in_oaks_lab + and self._bt_snapshot_interval > 0 and self.turn_count > 0 and self.turn_count % self._bt_snapshot_interval == 0 and self.stuck_turns == 0): @@ -777,8 +782,8 @@ def run_overworld(self): or last_snap.y != state.y): self.backtrack.save_snapshot(self.pyboy, state, self.turn_count) - # Restore when stuck too long - if self.backtrack.should_restore(self.stuck_turns): + # Restore when stuck too long (skip in Oak's Lab) + if not in_oaks_lab and self.backtrack.should_restore(self.stuck_turns): snap = self.backtrack.restore(self.pyboy) if snap is not None: self.stuck_turns = 0 @@ -843,7 +848,9 @@ def run_overworld(self): action = self.choose_overworld_action(state) - if action in {"up", "down", "left", "right"}: + if action == "wait": + self.controller.wait(30) + elif action in {"up", "down", "left", "right"}: self.controller.move(action) elif action == "b": self.controller.press("b", hold_frames=20, release_frames=12) diff --git a/tests/test_agent.py b/tests/test_agent.py index 2840936..7f95f78 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -824,6 +824,37 @@ def test_restore_resets_script_gate_flags(self, tmp_path): assert not hasattr(ag, '_pallet_diag_done') assert not hasattr(ag, '_house_diag_done') + def test_backtrack_skipped_in_oaks_lab(self, tmp_path): + """Backtrack should NOT trigger in Oak's Lab (map 40) at all.""" + ag = _make_agent(tmp_path) + ag.backtrack.restore_threshold = 1 + state = OverworldState(map_id=40, party_count=0, x=5, y=3) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag.backtrack.save_snapshot(ag.pyboy, state, turn=0) + ag._bt_last_map_id = 40 + ag.stuck_turns = 5 # well above threshold + + with patch.object(agent, "Image", None): + ag.run_overworld() + + # Should NOT have restored despite being stuck + assert ag.backtrack.total_restores == 0 + + def test_backtrack_skipped_in_oaks_lab_with_party(self, tmp_path): + """Backtrack should NOT trigger in Oak's Lab even after getting Pokemon.""" + ag = _make_agent(tmp_path) + ag.backtrack.restore_threshold = 1 + state = OverworldState(map_id=40, party_count=1, x=7, y=5) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag.backtrack.save_snapshot(ag.pyboy, state, turn=0) + ag._bt_last_map_id = 40 + ag.stuck_turns = 5 + + with patch.object(agent, "Image", None): + ag.run_overworld() + + assert ag.backtrack.total_restores == 0 + def test_periodic_snapshot_skips_duplicate_position(self, tmp_path): ag = _make_agent(tmp_path) ag._bt_snapshot_interval = 1 # every turn @@ -1915,7 +1946,7 @@ class TestOaksLabPhases: """Cover lab phases 0->1->2 with no Pokemon and lab with Pokemon.""" def test_lab_phase0_y_ge_4_transitions_to_phase1(self, tmp_path): - """Lines 493-496: phase 0, y>=4 -> transition to phase 1, return 'right'.""" + """Phase 0, y>=4 -> transition to phase 1, return 'right'.""" ag = _make_agent(tmp_path) with patch.object(agent, "Image", None): state = OverworldState(map_id=40, party_count=0, x=3, y=4) @@ -1925,7 +1956,7 @@ def test_lab_phase0_y_ge_4_transitions_to_phase1(self, tmp_path): assert any("phase 0" in e for e in ag.events) def test_lab_phase0_odd_turn_returns_b(self, tmp_path): - """Lines 497-498: phase 0, _lab_turns odd -> return 'b'.""" + """Phase 0, _lab_turns odd -> return 'b'.""" ag = _make_agent(tmp_path) ag._lab_turns = 0 # will be incremented to 1 (odd) ag._lab_phase = 0 @@ -1935,7 +1966,7 @@ def test_lab_phase0_odd_turn_returns_b(self, tmp_path): assert result == "b" def test_lab_phase0_even_turn_returns_down(self, tmp_path): - """Lines 498-499: phase 0, _lab_turns even -> return 'down'.""" + """Phase 0, _lab_turns even -> return 'down'.""" ag = _make_agent(tmp_path) ag._lab_turns = 1 # will be incremented to 2 (even) ag._lab_phase = 0 @@ -1945,7 +1976,7 @@ def test_lab_phase0_even_turn_returns_down(self, tmp_path): assert result == "down" def test_lab_phase1_x_ge_6_transitions_to_phase2(self, tmp_path): - """Lines 503-506: phase 1, x>=6 -> transition to phase 2, return 'up'.""" + """Phase 1, x>=6 -> transition to phase 2, return 'up'.""" ag = _make_agent(tmp_path) ag._lab_phase = 1 ag._lab_turns = 0 @@ -1957,7 +1988,7 @@ def test_lab_phase1_x_ge_6_transitions_to_phase2(self, tmp_path): assert any("phase 1" in e for e in ag.events) def test_lab_phase1_x_lt_6_returns_right(self, tmp_path): - """Line 507: phase 1, x<6 -> return 'right'.""" + """Phase 1, x<6 -> return 'right'.""" ag = _make_agent(tmp_path) ag._lab_phase = 1 ag._lab_turns = 0 @@ -1967,7 +1998,7 @@ def test_lab_phase1_x_lt_6_returns_right(self, tmp_path): assert result == "right" def test_lab_phase2_even_turn_returns_up(self, tmp_path): - """Lines 512-513: phase 2, _lab_turns even -> return 'up'.""" + """Phase 2, _lab_turns even -> return 'up'.""" ag = _make_agent(tmp_path) ag._lab_phase = 2 ag._lab_turns = 1 # incremented to 2 (even) @@ -1977,7 +2008,7 @@ def test_lab_phase2_even_turn_returns_up(self, tmp_path): assert result == "up" def test_lab_phase2_odd_turn_returns_a(self, tmp_path): - """Line 514: phase 2, _lab_turns odd -> return 'a'.""" + """Phase 2, _lab_turns odd -> return 'a'.""" ag = _make_agent(tmp_path) ag._lab_phase = 2 ag._lab_turns = 0 # incremented to 1 (odd) @@ -2108,7 +2139,9 @@ def test_oak_wait_at_y_le_1(self, tmp_path): ag = _make_agent(tmp_path) state = OverworldState(map_id=0, x=5, y=1, party_count=0) post_wait_state = OverworldState(map_id=40, x=5, y=3, party_count=0) - ag.memory.read_overworld_state = MagicMock(side_effect=[state, post_wait_state]) + # read_overworld_state called: (1) top of run_overworld, (2) inside oak trigger + ag.memory.read_overworld_state = MagicMock( + side_effect=[state, post_wait_state]) ag.controller = MagicMock() ag.collision_map = MagicMock() ag.collision_map.grid = [[1] * 10 for _ in range(9)] @@ -2120,10 +2153,14 @@ def test_oak_wait_at_y_le_1(self, tmp_path): assert hasattr(ag, '_oak_wait_done') assert ag._oak_wait_done is True assert any("OAK TRIGGER" in e for e in ag.events) - # Should have called wait(600) for Oak walk + # Should have called wait(600) for initial Oak walk ag.controller.wait.assert_any_call(600) - # Should have called mash_a 4 times + # 4 rounds of mash_a(30) + wait(300) assert ag.controller.mash_a.call_count == 4 + for c in ag.controller.mash_a.call_args_list: + assert c == call(30, delay=30) + wait_300_calls = [c for c in ag.controller.wait.call_args_list if c == call(300)] + assert len(wait_300_calls) == 4 def test_oak_wait_only_once(self, tmp_path): """Lines 673: _oak_wait_done already set -> skip Oak sequence.""" @@ -2186,6 +2223,30 @@ def test_b_action_presses_b(self, tmp_path): assert ag.last_overworld_action == "b" +# =================================================================== +# run_overworld -- Wait action dispatch +# =================================================================== + + +class TestRunOverworldWaitAction: + """Cover action == 'wait' -> controller.wait() with no button press.""" + + def test_wait_action_just_waits(self, tmp_path): + ag = _make_agent(tmp_path) + state = OverworldState(map_id=40, x=5, y=3) + ag.memory.read_overworld_state = MagicMock(return_value=state) + ag.choose_overworld_action = MagicMock(return_value="wait") + ag.controller = MagicMock() + ag.turn_count = 1 + + ag.run_overworld() + + ag.controller.wait.assert_called_once_with(30) + ag.controller.press.assert_not_called() + ag.controller.move.assert_not_called() + assert ag.last_overworld_action == "wait" + + # =================================================================== # run_overworld -- Waypoint info logging (711-715) # =================================================================== From cba2a14010c63a1beae0260e95acb2dcf62a5fda Mon Sep 17 00:00:00 2001 From: Brian Douglas Date: Tue, 10 Mar 2026 08:30:58 -0700 Subject: [PATCH 5/5] Add FLE backtracking section to README with paper reference Documents the Factorio Learning Environment-inspired backtracking system: snapshot/restore mechanics, evolvable parameters, and Oak's Lab guard. Adds FLE paper to references list. --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 1cf0293..c159765 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,7 @@ pokemon-agent/ │ ├── tape_reader.py # Tapes SQLite reader (stdlib only) │ ├── observer.py # heuristic observation extractor │ ├── observe_cli.py # CLI for running the observer +│ ├── pathfinding.py # collision map + backtrack manager │ ├── evolve.py # AlphaEvolve strategy evolution harness │ └── run_10_agents.py # parallel multi-agent evaluation runner ├── references/ @@ -185,8 +186,24 @@ Target turn counts for community benchmarking. Fork it, improve the strategy, po | 8 badges | ~200,000 | ~100,000 | ~60,000 | | Elite Four | ~300,000 | ~150,000 | ~80,000 | +## FLE-Style Backtracking + +Inspired by the [Factorio Learning Environment](https://arxiv.org/abs/2503.09617)'s `BacktrackingAgent`, the agent snapshots game state at key moments (map changes, periodic intervals) and restores when stuck. This directly addresses navigation dead-ends like Route 1's y=28 blocker — instead of wasting turns in a loop, the agent reverts to a known-good state and tries an alternate path. + +Snapshots use PyBoy's `save_state()`/`load_state()` with in-memory `BytesIO` buffers (~130KB each, <1ms). A bounded deque keeps the most recent 8 snapshots. Each snapshot tracks its restore count, and after 3 failed attempts from the same snapshot it's discarded. Four parameters control the behavior and are evolvable through AlphaEvolve: + +| Parameter | Default | Description | +|---|---|---| +| `bt_max_snapshots` | 8 | Max snapshots in the deque | +| `bt_restore_threshold` | 15 | Stuck turns before restoring | +| `bt_max_attempts` | 3 | Retries per snapshot | +| `bt_snapshot_interval` | 50 | Periodic snapshot frequency | + +Scripted areas like Oak's Lab (map 40) disable backtracking entirely — the lab's multi-phase cutscene looks "stuck" but is progressing naturally. + ## Inspiration & References +- [Factorio Learning Environment](https://arxiv.org/abs/2503.09617) — Backtracking agent patterns, structured observations, and incremental report distillation for game-playing LLM agents - [AlphaEvolve](https://arxiv.org/abs/2506.13131) — DeepMind's LLM-driven code evolution framework - [Discovering Multiagent Learning Algorithms with LLMs](https://arxiv.org/abs/2602.16928) — AlphaEvolve applied to game-playing agents - [ClaudePlaysPokemon](https://www.twitch.tv/claudeplayspokemon) — Anthropic's Claude-plays-Pokemon Twitch stream