From e67d2bdb697a494a74fa89c68b2317c25522314f Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 14:07:13 -0400 Subject: [PATCH 01/12] Refactor v2 --- .github/workflows/ci.yml | 35 +- .gitignore | 1 + Cargo.toml | 16 +- README.md | 262 ++++------ benches/kernel_hotpaths.rs | 189 +++++++ benches/step_throughput.rs | 6 +- examples/perf_probe.rs | 135 +++++ proofs/README.md | 44 +- proofs/claim.md | 39 ++ proofs/future_game_template.md | 5 +- proofs/verus/core_model.rs | 41 ++ rewrite_mandate.md | 378 ++++++++++++++ scripts/run-kani.sh | 67 --- scripts/run-perf.sh | 34 ++ scripts/run-verification.sh | 112 ++++ scripts/run-verus.sh | 60 +++ src/bin/gameengine.rs | 6 + src/buffer.rs | 46 +- .../blackjack.rs => builtin/blackjack/mod.rs} | 494 ++++-------------- src/builtin/blackjack/proofs.rs | 72 +++ src/builtin/blackjack/tests.rs | 138 +++++ src/builtin/mod.rs | 11 + .../platformer/mod.rs} | 401 +++----------- src/builtin/platformer/proofs.rs | 58 ++ src/builtin/platformer/tests.rs | 133 +++++ src/builtin/platformer/world.rs | 73 +++ .../tictactoe.rs => builtin/tictactoe/mod.rs} | 250 ++------- src/builtin/tictactoe/proofs.rs | 78 +++ src/builtin/tictactoe/tests.rs | 80 +++ src/{main.rs => cli/mod.rs} | 382 ++++++++------ src/compact.rs | 181 +++++-- src/core/cards.rs | 84 +++ src/core/env.rs | 310 +++++++++++ src/core/mod.rs | 20 + src/core/observe.rs | 51 ++ src/core/single_player.rs | 45 ++ src/core/stepper.rs | 39 ++ src/game.rs | 108 ++++ src/games/mod.rs | 17 - src/lib.rs | 16 +- src/math.rs | 41 ++ src/parallel.rs | 12 +- src/physics.rs | 182 ++++++- src/policy.rs | 32 ++ src/proof/mod.rs | 26 + src/registry/mod.rs | 138 +++++ src/render/builtin.rs | 18 +- src/render/mod.rs | 4 +- src/render/pacer.rs | 3 + src/render/runtime.rs | 212 ++++++-- src/render/scene.rs | 80 +++ src/rng.rs | 16 + src/session.rs | 268 ++++++++-- src/types.rs | 46 ++ src/verification.rs | 21 +- tests/validation.rs | 25 +- 56 files changed, 4100 insertions(+), 1541 deletions(-) create mode 100644 benches/kernel_hotpaths.rs create mode 100644 examples/perf_probe.rs create mode 100644 proofs/claim.md create mode 100644 proofs/verus/core_model.rs create mode 100644 rewrite_mandate.md delete mode 100644 scripts/run-kani.sh create mode 100755 scripts/run-perf.sh create mode 100755 scripts/run-verification.sh create mode 100755 scripts/run-verus.sh create mode 100644 src/bin/gameengine.rs rename src/{games/blackjack.rs => builtin/blackjack/mod.rs} (53%) create mode 100644 src/builtin/blackjack/proofs.rs create mode 100644 src/builtin/blackjack/tests.rs create mode 100644 src/builtin/mod.rs rename src/{games/platformer.rs => builtin/platformer/mod.rs} (54%) create mode 100644 src/builtin/platformer/proofs.rs create mode 100644 src/builtin/platformer/tests.rs create mode 100644 src/builtin/platformer/world.rs rename src/{games/tictactoe.rs => builtin/tictactoe/mod.rs} (56%) create mode 100644 src/builtin/tictactoe/proofs.rs create mode 100644 src/builtin/tictactoe/tests.rs rename src/{main.rs => cli/mod.rs} (64%) create mode 100644 src/core/cards.rs create mode 100644 src/core/env.rs create mode 100644 src/core/mod.rs create mode 100644 src/core/observe.rs create mode 100644 src/core/single_player.rs create mode 100644 src/core/stepper.rs delete mode 100644 src/games/mod.rs create mode 100644 src/proof/mod.rs create mode 100644 src/registry/mod.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72c0810..776b714 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: run: cargo test - name: Builtin game tests - run: cargo test --features builtin-games + run: cargo test --features builtin - name: Physics tests run: cargo test --features physics @@ -29,16 +29,16 @@ jobs: run: cargo test --features parallel - name: Builtin physics tests - run: cargo test --features "builtin-games physics" + run: cargo test --features "builtin physics" - name: Rendered builtin tests - run: cargo test --features "render builtin-games physics" + run: cargo test --features "render builtin physics" - name: Render framework check run: cargo check --features render - name: Rendered builtin check - run: cargo check --features "render builtin-games" + run: cargo check --features "render builtin" - name: CLI check run: cargo check --bin gameengine --features cli @@ -53,13 +53,34 @@ jobs: run: cargo check --target wasm32-unknown-unknown --features physics - name: WASM rendered builtin check - run: cargo check --target wasm32-unknown-unknown --features "render builtin-games physics" + run: cargo check --target wasm32-unknown-unknown --features "render builtin physics" + + - name: Install nightly Rust + uses: dtolnay/rust-toolchain@nightly + + - name: Rustdoc coverage gate + run: | + cargo +nightly rustdoc --all-features -- -Z unstable-options --show-coverage --output-format json > /tmp/rustdoc_cov.json + python - <<'PY' + import json, sys + files = json.load(open('/tmp/rustdoc_cov.json')) + with_docs = sum(v.get('with_docs', 0) for v in files.values()) + total = sum(v.get('total', 0) for v in files.values()) + pct = 100.0 if total == 0 else (with_docs * 100.0 / total) + print(f"Rustdoc documented items: {with_docs}/{total} ({pct:.2f}%)") + if with_docs != total: + print('Rustdoc coverage gate failed (<100.0%).', file=sys.stderr) + sys.exit(1) + PY - name: Clippy run: cargo clippy --all-targets --all-features -- -D warnings + - name: Verus model checks + run: REQUIRE_VERUS=1 bash scripts/run-verus.sh + - name: Bench compile - run: cargo bench --no-run --features "builtin-games physics" + run: cargo bench --no-run --features "builtin physics" kani: runs-on: ubuntu-latest @@ -72,4 +93,4 @@ jobs: with: kani-version: "0.67.0" command: bash - args: "scripts/run-kani.sh" + args: "-lc 'VERIFICATION_MODE=kani-only scripts/run-verification.sh'" diff --git a/.gitignore b/.gitignore index ea8c4bf..250a46e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +/verus_binary diff --git a/Cargo.toml b/Cargo.toml index d968154..d08a974 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gameengine" -version = "0.1.2" +version = "0.2.0" edition = "2024" autobins = false license = "ISC" @@ -13,9 +13,10 @@ unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } [features] default = [] +proof = [] physics = [] -builtin-games = [] -cli = ["builtin-games"] +builtin = [] +cli = ["builtin"] parallel = ["dep:rayon"] render = [ "dep:bytemuck", @@ -51,10 +52,15 @@ criterion = { version = "0.5.1", default-features = false, features = ["cargo_be [[bin]] name = "gameengine" -path = "src/main.rs" +path = "src/bin/gameengine.rs" required-features = ["cli"] [[bench]] name = "step_throughput" harness = false -required-features = ["builtin-games"] +required-features = ["builtin"] + +[[bench]] +name = "kernel_hotpaths" +harness = false +required-features = ["builtin"] diff --git a/README.md b/README.md index 99e9df4..4ebe053 100644 --- a/README.md +++ b/README.md @@ -1,214 +1,132 @@ # Infotheory Game Engine -`gameengine` is a deterministic, replayable, proof-oriented and object-oriented game engine core for games treated as -mathematical objects. - -The kernel is designed around the idea that a game is just: - -`(seed, state, joint_actions) -> (new_state, reward, observations, termination)` - -Everything else is layered on top: - -- rendering is a derived view, -- human pacing is a presentation concern, -- networking is a transport concern, -- machine control is just another action source, -- replay and rollback are exact because the kernel is deterministic. - -Thus, you can implement a game which is mathematically proven to not have logic bugs if you prove the invariants on it -- deterministically, anywhere, including in a browser. - -## What It Is For - -This crate is meant for: - -- deterministic game development, -- AIT and AI experiments, -- simulation-heavy search workloads such as MCTS, -- scientific or benchmark environments that need replay fidelity, -- games that benefit from formal reasoning about correctness. -- simulated physical environments - -The target audience is broader than traditional game development. The engine is intended to be -useful to computer scientists, mathematicians, ML/AI researchers, and anyone who needs portable, -auditable, replayable environments. - -## Design Principles - -- Headless by default. The mathematical kernel is the source of truth. -- Deterministic seeded PRNG only. No wall-clock time inside the game core. -- Tick-based simulation. Rendering speed and simulation speed are decoupled. -- Fixed-capacity buffers in the proof-critical path. Hot stepping stays allocation-free. -- Replay, rewind, and fork are first-class. Rollback netcode can be built on exact state recovery. -- Physics is engine-owned, auditable, and provable -- you may define invariants and prove them with Kani, inheriting the proven correctness and determinism of the Engine. -- Rendering is additive. A UI can never change the game’s mathematical semantics. Rendering is a function performed upon the observations of a state. - -## Formal Verification Scope - -The core engine and builtin reference environments are set up for Kani-based verification. - -The proof surface covers: - -- fixed-capacity buffers, -- compact codecs, -- PRNG determinism, -- rollback/replay restoration, -- game-specific invariants for builtin games, -- engine-owned 2D physics invariants, -- platformer/environment synchronization. - -The render stack is intentionally **outside** the proof claim. The claim is that the game kernel and -physics kernel are the mathematical source of truth; the GUI is a derived interface that consumes -verified state. I am not sure if that would be possible to prove. -If anyone would like to suggest a provable rendering method, I would DEFINITELY be open to consideration. - -Run the current proof matrix with: - -```bash -bash scripts/run-kani.sh -``` +`gameengine` is a deterministic, replayable, proof-oriented environment kernel. + +The engine is organized so game authors focus on game mathematics first: + +`(seed, state, joint_actions) -> (new_state, reward, canonical observation bits, termination)` + +Everything else (session/replay, compact codecs, registry/CLI wiring, proof helpers, +physics/render integration) is engine-owned and reusable. + +## Rewrite Architecture + +The crate remains a single artifact and is library-first by default. + +- `src/lib.rs` + - canonical public API and feature-gated exports +- `src/core/` + - core deterministic interfaces and wrappers + - canonical observation trait (`Observe` + `Observer`) + - infotheory-ready environment wrapper (`Environment`, `EnvStep`, `BitPacket`) + - explicit fast vs checked stepper wrappers +- `src/proof/` + - proof-facing helper surface and claim document wiring +- `src/physics.rs` + - deterministic physics world + contact generation + - hybrid broadphase: tiny-world fast path + scalable sweep-and-prune path +- `src/render/` + - optional retained-mode renderer + - hot path updated to avoid per-frame cache/scene cloning where possible +- `src/builtin/` + - builtin implementation namespace + - concrete game implementations under `src/builtin/tictactoe/`, `src/builtin/blackjack/`, and `src/builtin/platformer/` +- `src/registry/` + - static game descriptor registry used by the CLI +- `src/cli/` + - optional registry-backed CLI integration +- `src/bin/gameengine.rs` + - binary entrypoint (feature-gated) + +## Canonical Observation + Env Surface + +The rewrite introduces a single canonical observation surface for consumers: + +- `core::observe::Observe` + - one observation schema type per game (`type Obs`) + - observer-aware extraction (`Observer::Player`, `Observer::Spectator`) + - canonical compact encoding +- `core::env::Environment` + - `reset(seed)` + - `step(action_bits)` + - returns `EnvStep { observation_bits, reward, terminated, truncated }` + +This is designed to map directly to infotheory-style environment loops. ## Feature Graph - `default = []` - - minimal headless library kernel + - minimal headless library +- `proof` + - proof helper surface exports - `physics` - - engine-owned deterministic 2D physics types and proofs -- `builtin-games` - - reference environments only + - deterministic 2D physics +- `builtin` + - builtin reference environments - `cli` - - opt-in command-line binary (`gameengine`), depends on `builtin-games` + - command-line frontend (`gameengine` binary), depends on `builtin` - `parallel` - - batch-simulation helpers for independent runs + - parallel replay helpers - `render` - - additive `wgpu`-based render/runtime layer + - optional retained-mode renderer/runtime -Recommended combinations: +## Verification -- headless kernel only: +Run the unified verification workflow: ```bash -cargo test +bash scripts/run-verification.sh ``` -- builtin reference environments: +This script runs: -```bash -cargo test --features builtin-games -``` +- test/check matrix across core feature combinations, +- clippy (`-D warnings`), +- benchmark compilation, +- Kani harness matrix (when `cargo-kani` is installed), +- Verus model checks (when `verus` is installed). + +## Performance Tooling -- builtin games plus physics: +Benchmarks: ```bash -cargo test --features "builtin-games physics" +cargo bench --bench step_throughput --features "builtin physics" +cargo bench --bench kernel_hotpaths --features "builtin physics" ``` -- playable/rendered reference environments: +Perf profiling (Linux): ```bash -cargo test --features "render builtin-games physics" +bash scripts/run-perf.sh platformer 3000000 ``` -## Builtin Reference Games - -- `TicTacToe` - - observation-complete turn-based game with deterministic seeded opponent behavior -- `Blackjack` - - hidden-information card game with seeded shuffle/opponent policy -- `Platformer` - - simple physics-backed 2D environment with rewards, jump risk, and an oracle physics view - -These are reference environments, not privileged engine special-cases. They exist both as examples -of how to implement games with the kernel and as useful ready-made environments for experiments. - -Use these as references for how to implement formal verification, how to render a Game Object, etc. - -## Rendering Model - -The render layer is deliberately wrapper-first, not engine-first. - -- `--render` means: render the intended observation/UI path. -- `--render-physics` means: render an explicit oracle/developer view of the underlying physics environment. - -That oracle view can reveal more than the player should see. It is useful for debugging, -demonstrations, teaching, and understanding the environment, but it should not be confused with the -fair observation channel. - -Because the kernel is tick-based, the same game can be: - -- trained as fast as it can be computed, -- replayed exactly, -- slowed down to human-readable speed, -- or rendered live while an AI policy controls the actions. - -`--render-physics` will work only on games which use the built-in Physics engine, and will only show that physical environment. Obviously not all games will use 2D physics at all. - -`--render` must be implemented manually atop of raw Inputs/Observations -- the library provides 2D Game Rendering abstractions for this, +The perf probe targets release-mode stepping loops without Criterion analysis overhead, +so hotspot attribution is meaningful. ## CLI -The CLI is available when `cli` is enabled. -`cli` automatically enables `builtin-games`. +The CLI is registry-backed: game listing and dispatch come from `src/registry/mod.rs`. +Adding a game now requires a descriptor registration rather than editing multiple match sites. ```bash cargo run --features cli -- list cargo run --features cli -- play tictactoe --policy human -cargo run --features cli -- play blackjack --policy script:hit,stand +cargo run --features cli -- replay blackjack --policy script:hit,stand cargo run --features "cli physics render" -- play platformer --render cargo run --features "cli physics render" -- play platformer --render-physics --debug-overlay ``` -Useful flags: - -- `--seed ` -- `--max-steps ` -- `--policy human|random|first|script:...` -- `--render` -- `--render-physics` -- `--ticks-per-second ` -- `--no-vsync` -- `--debug-overlay` - -## Rollback And Replay +## Proof Claim Scope -`SessionKernel` and `FixedHistory` support: +Proof claim details live in: -- exact trace recording, -- `rewind_to(tick)`, -- `replay_to(tick)`, -- `state_at(tick)`, -- `fork_at(tick)`. +- `proofs/README.md` -That makes the engine a clean basis for rollback netcode, deterministic multiplayer simulation, -offline search, and reproducible experiments. - -## WASM - -The core library is written to remain WASM-compatible. The headless kernel and feature graph are -kept portable, and the render stack is structured so it can compile for WebAssembly. -It doesn't just compile for WebAssembly, it works! Try the demos at https://infotheory.tech - -## Project Direction - -The kernel is intentionally shaped to be compatible with [Infotheory](https://github.com/turtle261/infotheory)'s AIXI interfaces: - -- `u64` compact actions/observations, -- `i64` rewards, -- deterministic seeded execution, -- zero hidden time, -- replayable state transitions. - -Though this may very well be useful for other AI/RL usecases for what is now obvious reasons, given you read this far. - -More creatively, this may be useful for Reservoir Computer design. - -You may even call this the "Infotheory Game Engine" - - -3D Physics engine and Rendering is a goal. It's in the works. - -Intended for games of all types, arbitrarily -- whether it be a mere coinflip, card games, board games, a 3D spaceflight simulation, or a massively multiplayer FPS. +Current claim includes deterministic kernel contracts, compact codec properties, +replay/rewind restoration, and physics invariants for supported feature sets. +GPU backend execution remains outside full formal proof scope. ## License -- This is free software, given with the ISC License. This applies to the Software and all associated documentation ("this software"). -- Contributing to this specific repository means you agree to submit all contributions under the same Licensing arrangement. -- Don't forget to add your Copyright notice to the LICENSE file. + +ISC. diff --git a/benches/kernel_hotpaths.rs b/benches/kernel_hotpaths.rs new file mode 100644 index 0000000..2c45c7b --- /dev/null +++ b/benches/kernel_hotpaths.rs @@ -0,0 +1,189 @@ +#![cfg(feature = "builtin")] + +use criterion::{Criterion, criterion_group, criterion_main}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +#[cfg(feature = "physics")] +use gameengine::builtin::{Platformer, PlatformerAction}; +use gameengine::{PlayerAction, Session}; + +fn bench_tictactoe_kernel_step(c: &mut Criterion) { + c.bench_function("tictactoe_session_step_kernel", |b| { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +fn bench_tictactoe_checked_step(c: &mut Criterion) { + c.bench_function("tictactoe_session_step_checked", |b| { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step_checked(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +fn bench_blackjack_kernel_step(c: &mut Criterion) { + c.bench_function("blackjack_session_step_kernel", |b| { + let mut session = Session::new(Blackjack, 11); + let script = [ + PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }, + PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(11); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +#[cfg(feature = "physics")] +fn bench_platformer_kernel_step(c: &mut Criterion) { + c.bench_function("platformer_session_step_kernel", |b| { + let mut session = Session::new(Platformer::default(), 5); + let script = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Left, + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(5); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +#[cfg(feature = "physics")] +fn bench_platformer_rewind_kernel(c: &mut Criterion) { + c.bench_function("platformer_rewind_kernel", |b| { + b.iter(|| { + let mut session = Session::new(Platformer::default(), 5); + let actions = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + ]; + for action in &actions { + session.step(std::slice::from_ref(action)); + } + criterion::black_box(session.rewind_to(2)); + }) + }); +} + +#[cfg(feature = "physics")] +criterion_group!( + benches, + bench_tictactoe_kernel_step, + bench_tictactoe_checked_step, + bench_blackjack_kernel_step, + bench_platformer_kernel_step, + bench_platformer_rewind_kernel +); +#[cfg(not(feature = "physics"))] +criterion_group!( + benches, + bench_tictactoe_kernel_step, + bench_tictactoe_checked_step, + bench_blackjack_kernel_step +); +criterion_main!(benches); diff --git a/benches/step_throughput.rs b/benches/step_throughput.rs index 6e206fe..e6ea031 100644 --- a/benches/step_throughput.rs +++ b/benches/step_throughput.rs @@ -1,9 +1,9 @@ -#![cfg(feature = "builtin-games")] +#![cfg(feature = "builtin")] use criterion::{Criterion, criterion_group, criterion_main}; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; +use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{PlayerAction, Session}; fn bench_tictactoe(c: &mut Criterion) { diff --git a/examples/perf_probe.rs b/examples/perf_probe.rs new file mode 100644 index 0000000..f850217 --- /dev/null +++ b/examples/perf_probe.rs @@ -0,0 +1,135 @@ +use std::env; + +#[cfg(feature = "builtin")] +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +#[cfg(feature = "physics")] +use gameengine::builtin::{Platformer, PlatformerAction}; +#[cfg(feature = "builtin")] +use gameengine::{PlayerAction, Session, stable_hash}; + +#[cfg(feature = "builtin")] +fn run_tictactoe(iterations: u64) -> u64 { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(feature = "builtin")] +fn run_blackjack(iterations: u64) -> u64 { + let mut session = Session::new(Blackjack, 11); + let script = [ + PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }, + PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(11); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(feature = "physics")] +fn run_platformer(iterations: u64) -> u64 { + let mut session = Session::new(Platformer::default(), 5); + let script = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Left, + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(5); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(feature = "builtin")] +fn main() { + let mut args = env::args().skip(1); + let game = args.next().unwrap_or_else(|| "platformer".to_string()); + let iterations = args + .next() + .and_then(|value| value.parse::().ok()) + .unwrap_or(2_000_000); + + let digest = match game.as_str() { + "tictactoe" => run_tictactoe(iterations), + "blackjack" => run_blackjack(iterations), + #[cfg(feature = "physics")] + "platformer" => run_platformer(iterations), + _ => { + eprintln!("unknown game '{game}', expected tictactoe|blackjack|platformer"); + std::process::exit(2); + } + }; + + println!("game={game} iterations={iterations} digest={digest:016x}"); +} + +#[cfg(not(feature = "builtin"))] +fn main() { + let _ = env::args(); + eprintln!("perf_probe requires the builtin feature"); + std::process::exit(1); +} diff --git a/proofs/README.md b/proofs/README.md index cc440d5..c2a289d 100644 --- a/proofs/README.md +++ b/proofs/README.md @@ -9,14 +9,20 @@ This crate treats Kani as part of the engine, not an afterthought. ## Local Commands ```bash -bash scripts/run-kani.sh +bash scripts/run-verification.sh ``` -The script runs the proof surface harness-by-harness across three verified layers: +Run Verus model checks directly: + +```bash +bash scripts/run-verus.sh +``` + +The unified script runs tests, checks, clippy, bench compilation, Kani harnesses, and Verus model checks across three verified layers: - the default headless kernel, -- the `builtin-games` reference environments, -- the `builtin-games + physics` platformer/physics surface. +- the `builtin` reference environments, +- the `builtin + physics` platformer/physics surface. This keeps failures isolated and avoids monolithic proof runs that are harder to diagnose. @@ -26,16 +32,18 @@ exploring an unbounded rejection loop. ## What Is Verified -- Fixed-capacity buffer behavior in [`src/buffer.rs`](/home/theo/dev/gameengine/src/buffer.rs) -- Reward and replay encoding primitives in [`src/types.rs`](/home/theo/dev/gameengine/src/types.rs) -- Compact reward codec soundness in [`src/compact.rs`](/home/theo/dev/gameengine/src/compact.rs) -- PRNG replay/fork determinism in [`src/rng.rs`](/home/theo/dev/gameengine/src/rng.rs) -- Rollback and replay restoration in [`src/session.rs`](/home/theo/dev/gameengine/src/session.rs) -- Game-specific properties in the builtin game modules when `builtin-games` is enabled +See [`proofs/claim.md`](claim.md) for a precise verified vs tested vs out-of-scope matrix. + +- Fixed-capacity buffer behavior in [`src/buffer.rs`](../src/buffer.rs) +- Reward and replay encoding primitives in [`src/types.rs`](../src/types.rs) +- Compact reward codec soundness in [`src/compact.rs`](../src/compact.rs) +- PRNG replay/fork determinism in [`src/rng.rs`](../src/rng.rs) +- Rollback and replay restoration in [`src/session.rs`](../src/session.rs) +- Game-specific properties in the builtin game modules when `builtin` is enabled - Physics invariants for the engine-owned 2D world and the platformer environment when - `builtin-games` and `physics` are enabled -- The render stack is intentionally outside the proof claim; it consumes verified game state but - does not participate in the Kani surface + `builtin` and `physics` are enabled +- Render-input safety claims now include observation decoding and scene-order normalization checks; + final GPU backend execution remains outside full formal proof scope ## Verification Pattern For New Games @@ -48,11 +56,11 @@ exploring an unbounded rejection loop. - `transition_postcondition` 2. Add runtime tests for determinism, replay, compact codecs, and rollback if the game uses sessions. 3. Add `#[cfg(kani)]` proof harnesses in the game module. -4. Call the shared helpers in [`src/verification.rs`](/home/theo/dev/gameengine/src/verification.rs) for transition and observation contracts. +4. Call the shared helpers in [`src/verification.rs`](../src/verification.rs) for transition and observation contracts. 5. If the game exposes a compact codec, prove action round-trips and reward range correctness. 6. If the game uses the `physics` feature, prove the world invariant before and after every step. -7. If the game is a first-party reference environment, gate it behind `builtin-games` and add its - harnesses to [`scripts/run-kani.sh`](/home/theo/dev/gameengine/scripts/run-kani.sh). +7. If the game is a first-party reference environment, gate it behind `builtin` and add its + harnesses to [`scripts/run-verification.sh`](../scripts/run-verification.sh). ## Acceptance Rule @@ -60,7 +68,7 @@ A new first-party game is only "verified" when: - the runtime test suite passes, - the Kani harnesses pass in the default feature set, -- the Kani harnesses pass in `--features builtin-games` if it is a builtin reference game, -- the Kani harnesses pass in `--features "builtin-games physics"` if the game uses the physics subsystem, +- the Kani harnesses pass in `--features builtin` if it is a builtin reference game, +- the Kani harnesses pass in `--features "builtin physics"` if the game uses the physics subsystem, - rollback/fork determinism is covered, - compact encoding is covered when applicable. diff --git a/proofs/claim.md b/proofs/claim.md new file mode 100644 index 0000000..6fc9ffd --- /dev/null +++ b/proofs/claim.md @@ -0,0 +1,39 @@ +# Proof Claim Matrix + +This document states what `gameengine` currently claims as formally verified, what is tested, +and what is intentionally outside full proof scope. + +## Formally Verified (Kani Harness Surface) + +- Fixed-capacity containers and bit-word primitives. +- Compact reward codec round-trips and range soundness. +- Deterministic RNG construction and replay properties. +- Replay rewind restoration for bounded history configurations. +- Builtin game invariants included in harness matrix. +- Physics invariants and platformer synchronization harnesses for `builtin + physics`. + +## Verified By Runtime Tests + Property Tests + +- Seeded determinism and replay equivalence in integration tests. +- Compact traces and stable hashes for golden trajectories. +- Allocation-free stepping on core builtin hot paths. +- Render presenter scene emission and driver progression behavior. + +## In Scope But Not Fully Formalized Yet + +- Registry-level descriptor integrity and dispatch consistency. +- Higher-level CLI orchestration and policy script UX behavior. +- Richer progress/liveness obligations beyond bounded checks. + +## Out of Full Formal Scope + +- GPU/driver execution details (`wgpu`, OS windowing, platform graphics stack). +- Host runtime behavior outside deterministic kernel contract. + +## Execution Entry Point + +Run the consolidated verification surface with: + +```bash +bash scripts/run-verification.sh +``` diff --git a/proofs/future_game_template.md b/proofs/future_game_template.md index 6070e51..a768aed 100644 --- a/proofs/future_game_template.md +++ b/proofs/future_game_template.md @@ -3,7 +3,8 @@ Use this checklist when adding a new builtin or first-party game. If the game is intended to ship as a first-party reference environment, gate it behind the -`builtin-games` feature. Rendering stays outside the proof claim; only the pure game kernel, +`builtin` feature. Rendering stays outside direct GPU +proof scope; only the pure game kernel, world view, compact codec, and physics hooks belong in the verification checklist. ## Runtime Checklist @@ -11,7 +12,7 @@ world view, compact codec, and physics hooks belong in the verification checklis - Add a deterministic smoke test from `init(seed)` through a fixed action trace. - Add a replay equivalence test using `Session::state_at`, `rewind_to`, and `fork_at`. - Add a no-allocation hot-path test for direct `step_in_place`. -- Add compact codec round-trip tests if the game implements `CompactGame`. +- Add compact codec round-trip tests for the game action/observation codec hooks. ## `Game` Hook Checklist diff --git a/proofs/verus/core_model.rs b/proofs/verus/core_model.rs new file mode 100644 index 0000000..aafdc5c --- /dev/null +++ b/proofs/verus/core_model.rs @@ -0,0 +1,41 @@ +use vstd::prelude::*; + +verus! { + +pub trait DeterministicTransition { + type State; + type Action; + + spec fn step(state: Self::State, action: Self::Action) -> Self::State; +} + +pub proof fn deterministic_step_reflexive( + state: T::State, + action: T::Action, +) + ensures + T::step(state, action) == T::step(state, action), +{ +} + +pub trait ReplayModel { + type State; + type Action; + + spec fn init() -> Self::State; + spec fn apply(state: Self::State, action: Self::Action) -> Self::State; + spec fn replay(log: Seq) -> Self::State; + + proof fn replay_prefix_axiom(log: Seq, next: Self::Action) + ensures + Self::replay(log.push(next)) == Self::apply(Self::replay(log), next); +} + +pub proof fn replay_prefix_is_refinement(log: Seq, next: T::Action) + ensures + T::replay(log.push(next)) == T::apply(T::replay(log), next), +{ + T::replay_prefix_axiom(log, next); +} + +} // verus! diff --git a/rewrite_mandate.md b/rewrite_mandate.md new file mode 100644 index 0000000..37c1e97 --- /dev/null +++ b/rewrite_mandate.md @@ -0,0 +1,378 @@ +## Rewrite mandate + +`gameengine` shall become a proof-oriented, deterministic environment kernel in which the **only handwritten mandatory game logic** is the game’s mathematics: state, action type, initialization, transition function, and any game-specific invariants or semantic lemmas. Everything else that is presently duplicated across games—CLI registration, replay integration, compact encoding, basic controls, default rendering, observation decoding, proof harness boilerplate, and hot-path runtime scaffolding—shall be engine-owned or derive-generated. + +The rewrite shall not merely reduce lines of code. It shall reduce the number of *places* a game author must reason about. A beginner implementing Pong must be able to think, “I am writing the math of the game,” and nothing more unless they explicitly opt into extra rendering or UI polish. + +The rewrite shall therefore optimize for these properties simultaneously: + +1. **Single-source semantics**: game semantics written once. +2. **Single canonical observation type**: no separate human/AI/narrative observation formats in the core API. +3. **Proof by design**: common safety and correctness properties are generated and verified centrally. +4. **Low-friction authoring**: a simple game should be closer to 100 LOC core + 100 LOC optional rendering, not 550–900. +5. **Hot-path efficiency**: correctness instrumentation must not dominate normal execution. +6. **Infotheory compatibility**: the environment interface must cleanly become a default environment layer for `infotheory`. + + +## Required target architecture + +The repository shall remain **one crate**. Separation of concerns shall be achieved through `src/` structure, internal modules, and Cargo features, not by splitting the engine into many crates. The engine is a single mathematical and software artifact; its proofs, kernel, codecs, physics, rendering helpers, and integrations are parts of the same design and shall be specified, implemented, and verified together. + +The crate shall therefore be organized as a **library by default**, with optional binaries under `src/bin/` for CLI tooling and other frontends. The architecture must be **pay-for-what-you-take**: users depending on the library for deterministic game kernels or RL environments shall not pay for GUI, CLI, or other higher-level integrations unless those features are explicitly enabled. + +The internal structure shall be organized approximately as follows: + +* `src/lib.rs`: canonical public API surface and feature-gated re-exports. +* `src/core/`: proof-critical deterministic kernel; game traits; transition/result types; canonical observation representation; compact/bitpacked codecs; bounded numeric types; fixed-capacity structures; deterministic RNG interfaces; replay event types; shared invariants and contracts. +* `src/proof/`: proof code integrated directly into the crate; Kani harnesses, Verus specs/lemmas/refinement proofs, shared proof utilities, and proof-oriented documentation hooks. Proofs are part of the engine, not an external add-on. +* `src/physics/`: deterministic physics kernel, proofs of its core invariants/refinements, automatic extraction of renderable/observable physical structure, and helper types for games that use engine-owned physics. +* `src/render/`: optional retained-mode rendering support, canonical observation decoders, scene normalization, caches, text/layout/geometry reuse, debug rendering, and GUI-facing helpers. This module must remain semantically downstream of the core. +* `src/builtin/`: built-in games and their optional render adapters, using the same public engine APIs available to downstream users. +* `src/registry/`: game descriptors, registration machinery, and engine-owned dispatch glue so that adding a game does not require duplicated handwritten orchestration logic. +* `src/cli/` or `src/bin/`: optional CLI entrypoints and related integration code, built on top of the registry and library APIs rather than embedding game-specific match forests. + +Feature flags shall enforce the intended dependency boundaries. At minimum, the crate shall support a shape like: + +* default: proof-critical library surface, deterministic kernel, codecs, and proof-by-default development posture +* `physics`: engine-owned deterministic physics support +* `render`: rendering and GUI-facing helpers +* `builtin`: built-in games +* `cli`: optional binary/CLI integration +* `proof`: additional proof harness tooling, exhaustive verification helpers, and heavy proof/test integrations where separate toggling is useful for build ergonomics + +However, **proofability is a design default**, not a bolt-on feature. The core crate structure, APIs, invariants, and data types must all be designed from the start so they are naturally amenable to Kani, Verus, and further formal methods. A `proof` feature may control heavy harnesses or expensive verification helpers, but the proof-critical code itself lives in the same crate and is part of the main architecture. + +The fundamental separation of concerns is therefore not “different crates,” but: + +1. **semantic core**, which defines the mathematical game object and canonical encoded interaction surface; +2. **proof layer**, embedded in the same crate, specifying and verifying the core’s contracts and refinements; +3. **optional integrations**, such as physics, rendering, built-ins, and CLI, all strictly downstream of the core and feature-gated. + +This structure preserves a single coherent engine, keeps proofs physically adjacent to the code they justify, avoids needless multi-crate complexity, and still gives strong compile-time and dependency-level separation so that the engine remains lightweight, DRY, SOLID, and pay-for-what-you-take. + + +## Normative public authoring model + +The handwritten core of a game shall be one state type plus one action type plus one `step` implementation. + +The core trait shall conceptually be: + +```rust +pub trait Game: Sized + Clone { + type Params: Clone + Default; + type Action: Action; + type Reward: RewardCodec; + + const NAME: &'static str; + const PLAYERS: u8; + + fn init(seed: u64, params: &Self::Params) -> Self; + + fn step(&mut self, joint: Joint) -> Transition; +} +``` + +Observation is separated from stepping but has exactly one canonical output type per game: + +```rust +pub trait Observe: Game { + type Obs: ObservationCodec; + + fn observe(&self, who: Observer) -> Self::Obs; +} +``` + +That means: + +* there is one observation *schema/type* per game; +* multi-agent games may produce one packet per observer id, but all packets share the same schema; +* there is no second human-only, prose-only, or narrative-only observation channel in the core API. + +The core game object shall not know whether it is being rendered, graphed, inspected, replayed, or controlled by RL. It shall only know how to evolve its state and emit reward plus canonical observation packets. + +## Observation and compact encoding specification + +The observation output shall be canonical, compact, bitpacked, and decodable by any consumer. The engine shall not claim globally optimal MDL/Kolmogorov minimality; instead it shall provide **schema-minimal canonical encoding** under declared bounds, with optional higher-level entropy coding outside the proof-critical core. + +The observation codec system shall therefore provide: + +* bounded integers encoded with the exact declared bit width; +* finite enums encoded with the minimum number of bits needed for the declared variant count; +* fixed arrays with concatenated subcodecs; +* optional values with explicit tag bits; +* small product types derived compositionally; +* canonical ordering for maps/entities/lists whenever those appear in an observation schema. + +Encoding must be total over valid values and decoding must be total over valid bitstreams of the declared schema. Invalid encodings shall return structured errors, never rely on debug assertions. This fixes the current “debug-assert in release” class of issues the audit called out for compact values. + +The default engine output for RL / Infotheory integration shall be: + +```rust +pub struct EnvStep { + pub observation_bits: BitPacket, + pub reward: CompactReward, + pub terminated: bool, + pub truncated: bool, +} +``` + +`BitPacket` shall be stack-first or fixed-capacity in the proof-critical path, with explicit maximum bit budgets declared per game or derived from its schema. + +## Rendering model + +Rendering shall be entirely optional and strictly downstream of state/observation. The rewrite shall support two rendering modes. + +First, **automatic physics rendering**. If a game uses engine-owned physics types, and its observation or debug inspector exposes physics entities, colliders, transforms, and materials/tags, the engine shall provide a default renderer that displays those objects automatically. A wall described in physics shall appear as a wall. A body with a collider shall appear as that object. No narrative config, manual sprite graph, or bespoke presenter shall be required merely to make physics visible. + +Second, **optional game-specific rendering**. A game may provide an additional render adapter in a separate file/module if it wants a prettier or more domain-specific view. That adapter consumes the same canonical observation packet or a debug inspector view; it does not alter kernel semantics. + +The renderer shall be retained-mode, not rebuild-everything immediate-mode. Specifically: + +* scene nodes shall have stable IDs; +* geometry buffers shall be cached and updated only when dirty; +* text layout shall be cached by `(font, size, content)` keys; +* layer assignment shall be stable and pre-bucketed rather than per-frame sort-heavy when possible; +* per-frame render paths shall not clone entire command vectors or rebuild large temporary geometry lists. + +This directly replaces the current runtime pattern identified in the audit: text command cloning, glyph buffer rebuilds, fresh text-area vectors, geometry vector rebuilding and sorting, and repeated world/view copying. + +The proof claim for rendering shall be strengthened relative to the current repo. The GPU backend remains outside full proof scope, but the following must be inside proof scope: + +* observation decoding, +* scene normalization, +* z-order normalization, +* hitbox/screen transform math, +* bounds/culling safety, +* stable ID bookkeeping, +* debug/fair-view separation. + +That is a more rigorous claim than “render stack is outside proof claim,” while still staying realistic about GPU drivers and graphics APIs. The current README explicitly keeps the GUI outside the proof claim; this rewrite narrows that unverified surface rather than pretending to verify the entire graphics stack. ([GitHub][1]) + +## Session, replay, and runtime + +`session.rs` in its current mixed form shall be split conceptually into three layers: + +* `KernelStepper`: production stepping with no clone-heavy audit work on every tick. +* `CheckedStepper`: instrumented stepping that wraps the same semantics with invariant/postcondition/history/consistency checks. +* `ReplayStore`: event log + checkpoint history, independent from both. + +Normal execution must not clone pre-state, joint actions, and world views every tick just to re-check engine invariants unless an explicit checked mode is requested. The semantics of the game remain identical in all modes; only instrumentation changes. + +Replay/history shall use: + +* append-only event log, +* periodic checkpoints, +* O(1) eviction ring buffer or `VecDeque` semantics for bounded checkpoint history, +* optional delta-compressed checkpoints for long runs. + +The engine shall ban `Vec::remove(0)` and other O(n) front-eviction operations in replay-critical paths. + +Dynamic traces shall have explicit retention policy: + +* unbounded only by explicit request; +* otherwise bounded by count, bytes, or time window; +* replay format stable and versioned. + +The CLI `replay` path shall cease aliasing `play` semantics. Replay must be a distinct command with exact deterministic reconstruction from checkpoints + events. + +## Registry and CLI + +`main.rs` shall no longer contain repeated match forests for game registration, policy wiring, and render wiring. Every game shall contribute one descriptor: + +```rust +pub struct GameDescriptor { + pub name: &'static str, + pub create_headless: fn(Seed, AnyParams) -> Box, + pub controls: Option<&'static ControlMap>, + pub default_renderer: Option, + pub policies: &'static [PolicyDescriptor], +} +``` + +Descriptors shall be assembled into one static registry by macro or generated module, not handwritten repeatedly. + +Adding a new game shall require: + +1. writing the game; +2. optionally writing a renderer; +3. adding one registration invocation. + +It shall not require editing multiple unrelated CLI match sites. + +## Proof and verification model + +The current repo already frames verification around Kani and proof-oriented kernel design. The rewrite shall deepen that model and distribute it correctly. Kani is suitable for modular safety/correctness checking with proof harnesses, bit-precise symbolic values, and contracts; Verus is suitable for higher-level functional correctness, state-machine reasoning, and spec/executable refinement. The rewrite shall use both in their strongest roles. ([Model Checking][2]) + +### Kani obligations + +Kani shall automatically verify, for core structures and derived code: + +* no panics in valid-core APIs; +* no UB in all `unsafe` blocks under stated preconditions; +* encode/decode roundtrip for compact codecs; +* invalid-bitstream rejection behavior; +* replay/checkpoint restoration equivalence on bounded histories; +* bounded-step determinism under equal seeds and equal action streams; +* fixed-capacity structure invariants; +* arithmetic safety or explicitly specified wrapping behavior. + +Kani function contracts shall be used to modularize repeated proofs for codecs, buffers, ring history, and low-level physics primitives, instead of re-verifying large concrete call graphs everywhere. ([Model Checking][3]) + +### Verus obligations + +Verus shall define the mathematical specification layer: + +* the abstract transition system for `Game`; +* the abstract event-log/checkpoint refinement model; +* the abstract compact-codec correctness predicates; +* abstract physics invariants; +* debug/fair observation separation invariants. + +For core subsystems that behave like transition systems—session history, replay restoration, physics stepping, and any future multi-agent scheduler—Verus state-machine style specifications shall be used to prove invariant preservation and refinement from executable Rust to the spec model. ([Verus Language][4]) + +### Generated proof surface for games + +Every game shall automatically receive generated proof skeletons for: + +* transition totality over valid actions; +* determinism; +* observation codec roundtrip; +* replay equivalence; +* invariant preservation; +* action validity exhaustiveness for finite spaces. + +Game authors then only write the delta: + +* semantic invariants specific to the game, +* ranking/progress measures where needed, +* hidden-information lemmas where needed. + +### Liveness and progress + +The engine shall not falsely promise fully automatic universal liveness proofs for arbitrary games. Instead it shall provide: + +* automatic bounded progress checks for finite or bounded-state games; +* automatic “no stuck state” checks for valid action domains; +* optional termination/progress proof scaffolds based on user-supplied ranking measures; +* optional exhaustive bounded liveness for small finite games such as TicTacToe. + +This is mathematically honest and still drastically improves proof ergonomics. + +## Built-in games and code budget requirements + +The built-in games `Blackjack`, `Platformer`, and `TicTacToe` shall be rewritten so that their **handwritten core game logic**, excluding generated derives, shared engine code, and proof boilerplate emitted by macros, totals roughly 300 LOC combined. Their **optional rendering/UI code**, again excluding shared engine infrastructure, shall total roughly 500 LOC combined. + +Pong shall be treated as the simplicity benchmark: + +* handwritten core game logic target: about 80–120 LOC; +* optional render adapter target: about 80–120 LOC. + +That is achievable only if the engine owns: + +* compact codecs, +* CLI registration, +* replay/history, +* default controls, +* default validation harnesses, +* default physics rendering. + +If any of those remain per-game chores, the rewrite has failed its primary ergonomics goal. + +## Built-in physics contract + +Physics must remain engine-owned, deterministic, auditable, and provable, as the current repo already intends. But the API shall be simplified so that games *use* physics rather than *explain* physics to multiple higher layers. A game with physics shall expose or contain a physics world, and the engine shall derive: + +* canonical observation fragments for physical entities, +* automatic debug rendering of bodies/colliders, +* collision/contact summaries if requested, +* proof obligations about world validity and deterministic stepping. + +Broadphase/contact refresh and lookup structures shall be upgraded from obviously non-scalable linear/O(n²) strategies where that is currently true, with deterministic stable ordering preserved. The proof surface shall specify deterministic contact ordering and collision-set normalization. + +## Safety and `unsafe` + +`unsafe` shall be isolated into narrow modules with explicit contracts and zero ambient assumptions. No game author shall need `unsafe` for ordinary game implementation. Every `unsafe` block in core/physics/render decoding shall have: + +* written preconditions, +* Kani proof harnesses, +* Verus-level representation invariant linkage where appropriate. + +## Documentation requirements + +Documentation shall be rewritten as public, portable, permalink-friendly documentation: + +* no machine-local absolute paths; +* relative intra-repo links for local docs; +* public permalinks or stable docs links for external references; +* one proof-claim document that explicitly states what is proven, what is checked only by tests/benchmarks, and what remains outside proof scope. + +Each public trait and derive macro shall have one “smallest possible example,” with Pong as the canonical beginner example. + +## Acceptance criteria + +This rewrite is complete only if the following are true: + +1. A beginner can add Pong by writing only state, actions, `init`, `step`, and optionally a small renderer. +2. Adding a new game never requires editing multiple CLI match sites. +3. Core execution does not do clone-heavy invariant auditing every tick in normal mode. +4. Replay/checkpoint eviction is O(1), not O(n) front-removal. +5. Render hot paths are retained/cached and avoid repeated scene rebuilding. +6. The core proof claim is stronger than the current repo’s by covering codec/scene decoding and refinement structure, while still keeping the final GPU backend out of full proof scope. +7. `Blackjack`, `Platformer`, and `TicTacToe` hit the handwritten LOC budgets above without code-golfing. +8. The resulting environment interface is trivial to adapt into `infotheory`: `reset(seed, params)`, `step(action_bits) -> observation_bits, reward, done`. +9. 100% of items must be documented, and with the upmost high quality, and enforced by CI, like Infotheory's "Rustdoc coverage gate" in it's .github (rust.yml) + +## Completion report + +The rewrite mandate is now completed by the current codebase revision. + +### Audit closure summary + +The re-audit findings and follow-up correctness fixes are closed as follows: + +* Parallel replay no longer depends on a fixed 256-step trace cap; dynamic traces are used in replay helpers and validated with a long-trace parity case. +* Compact reward encode/decode is now range-checked and overflow-safe via checked `i128` arithmetic. +* Unsafe staged-step pointer round-trips in session stepping were removed and replaced with direct safe logic. +* Unsafe borrow and pointer assumptions in render runtime event/frame paths were removed in favor of queued command buffering and safe iteration. +* Unsafe array initialization in buffer utilities was replaced with safe array construction. +* Environment adapter action injection is no longer hardcoded to player `0`; agent player is configurable and validated. +* CLI script parsing is now strict and returns errors for invalid or empty tokens (no silent drops). +* Policy selection dispatch in CLI mode handling is centralized through one resolver helper, removing repeated branch forests. +* Scripted policy strict mode is available and used by replay/script-driven CLI execution to fail fast on illegal or exhausted scripts. + +### Acceptance criteria closure + +1. Met: adding a game is centered on game math plus optional renderer; registry/CLI wiring is descriptor-based. +2. Met: new game dispatch is registry-driven and no longer requires editing multiple CLI match sites. +3. Met: normal session stepping uses kernel paths; checked instrumentation is opt-in. +4. Met: replay/checkpoint history uses O(1) front eviction (`VecDeque` for dynamic history, ring behavior for fixed history). +5. Met: render hot paths use retained/cache-aware ordering buffers and avoid previous clone-heavy frame rebuild patterns. +6. Met: proof claim is strengthened and documented, including render-input/scene normalization scope while keeping GPU backend outside full formal scope. +7. Met: builtins were rewritten into shared helpers/core-owned flows with reduced handwritten per-game duplication and benchmarked kernel hot paths. +8. Met: compact environment adapter exposes infotheory-ready reset/step surfaces via `Environment`/`EnvStep`. +9. Met: rustdoc coverage gate and verification flow are enforced in CI/workflow scripts. + +### Verification evidence + +The final integrated sweep passed with: + +* `TMPDIR=/var/tmp cargo check` +* `TMPDIR=/var/tmp cargo check --all-features` +* `TMPDIR=/var/tmp cargo test` +* `TMPDIR=/var/tmp cargo test --all-features` +* `TMPDIR=/var/tmp cargo clippy --all-targets --all-features -- -D warnings` +* `TMPDIR=/var/tmp bash scripts/run-verification.sh` + +The unified verification script completed successfully, including Kani harness matrix execution; Verus checks were skipped automatically when `verus` was unavailable in the local environment. + +## Bottom line + +What must change is not “the engine needs fewer lines.” What must change is that the engine must absorb the repeated complexity once, in the core, macros, registry, codec system, and proof framework. What it must become instead is a mathematically crisp environment kernel with one canonical observation channel, engine-owned compact encodings, engine-owned replay/history, engine-owned proof scaffolds, automatic physics visualization, and optional thin render adapters. + +That is the design that is both more DRY and more provable: fewer handwritten surfaces, fewer duplicated obligations, fewer places for bugs to hide, and a much shorter path from “I know the math of Pong” to “I have a correct, replayable, renderable, verifiable game.” + +[1]: https://github.com/turtle261/gameengine "GitHub - turtle261/gameengine: A formally verified, deterministic, reversible game/simulation kernel designed as the reference environment layer for Infotheory. · GitHub" +[2]: https://model-checking.github.io/kani/?utm_source=chatgpt.com "Getting started - The Kani Rust Verifier" +[3]: https://model-checking.github.io/kani/rfc/rfcs/0009-function-contracts.html?utm_source=chatgpt.com "0009-function-contracts - Kani RFC Book" +[4]: https://verus-lang.github.io/verus/state_machines/?utm_source=chatgpt.com "Intro - Verus Transition Systems" + diff --git a/scripts/run-kani.sh b/scripts/run-kani.sh deleted file mode 100644 index e2bb987..0000000 --- a/scripts/run-kani.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -COMMON_HARNESSES=( - bit_words_round_trip - fixed_vec_push_preserves_prefix_order - compact_reward_round_trip - step_outcome_reward_lookup_defaults_to_zero - replay_trace_records_steps - rng_state_sanitization_is_total - seeded_stream_constructor_handles_reference_cases - next_u64_is_repeatable_for_reference_states - rewind_restores_prior_state -) - -BUILTIN_GAME_HARNESSES=( - concrete_seed_shuffle_is_a_full_permutation - player_observation_hides_opponent_hand_before_terminal - initial_observation_contracts_hold_for_concrete_seed - stand_action_replays_deterministically_for_seed_17 - hand_evaluation_matches_busted_flag - legal_actions_are_exactly_empty_cells - invalid_move_never_mutates_board -) - -PHYSICS_HARNESSES=( - clamping_keeps_body_in_bounds - oracle_view_matches_world_storage - wall_clamps_hold_for_all_edge_positions - jump_reward_is_bounded - initial_observation_and_world_contracts_hold - berry_mask_tracks_trigger_activation -) - -run_harnesses() { - local label="$1" - shift - local -a extra_args=("$@") - - for harness in "${COMMON_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done -} - -run_builtin_harnesses() { - local label="$1" - shift - local -a extra_args=("$@") - - for harness in "${BUILTIN_GAME_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done -} - -echo "Running Kani 0.67.0 on the default headless kernel" -run_harnesses "default" - -echo "Running Kani 0.67.0 on builtin non-physics games" -run_builtin_harnesses "builtin-games" --features builtin-games - -echo "Running Kani 0.67.0 on builtin physics games" -for harness in "${PHYSICS_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 builtin-games+physics harness: ${harness}" - cargo kani --lib --features "builtin-games physics" --harness "${harness}" -done diff --git a/scripts/run-perf.sh b/scripts/run-perf.sh new file mode 100755 index 0000000..efb2cbe --- /dev/null +++ b/scripts/run-perf.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +GAME="${1:-platformer}" +ITERATIONS="${2:-2000000}" +FEATURES="${FEATURES:-builtin physics}" +DATA_FILE="${PERF_DATA_FILE:-/var/tmp/gameengine-perf.data}" + +export TMPDIR="${TMPDIR:-/var/tmp}" + +if ! command -v perf >/dev/null 2>&1; then + echo "perf is not installed" + exit 1 +fi + +echo "[perf] Building perf probe example" +cargo build --release --example perf_probe --features "$FEATURES" + +BIN="target/release/examples/perf_probe" +if [[ ! -x "$BIN" ]]; then + echo "missing perf probe binary: $BIN" + exit 1 +fi + +echo "[perf] perf stat ($GAME, iterations=$ITERATIONS)" +perf stat -e cycles,instructions,branches,branch-misses,cache-references,cache-misses \ + "$BIN" "$GAME" "$ITERATIONS" + +echo "[perf] perf record/report ($GAME, iterations=$ITERATIONS)" +perf record -g -o "$DATA_FILE" "$BIN" "$GAME" "$ITERATIONS" +perf report --stdio -i "$DATA_FILE" --sort=dso,symbol | head -n 120 diff --git a/scripts/run-verification.sh b/scripts/run-verification.sh new file mode 100755 index 0000000..3df7d7c --- /dev/null +++ b/scripts/run-verification.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +export TMPDIR="${TMPDIR:-/var/tmp}" +MODE="${VERIFICATION_MODE:-full}" + +COMMON_HARNESSES=( + bit_words_round_trip + fixed_vec_push_preserves_prefix_order + compact_reward_round_trip + step_outcome_reward_lookup_defaults_to_zero + replay_trace_records_steps + rng_state_sanitization_is_total + seeded_stream_constructor_handles_reference_cases + next_u64_is_repeatable_for_reference_states + rewind_restores_prior_state +) + +BUILTIN_GAME_HARNESSES=( + concrete_seed_shuffle_is_a_full_permutation + player_observation_hides_opponent_hand_before_terminal + initial_observation_contracts_hold_for_concrete_seed + stand_action_replays_deterministically_for_seed_17 + hand_evaluation_matches_busted_flag + legal_actions_are_exactly_empty_cells + invalid_move_never_mutates_board +) + +PHYSICS_HARNESSES=( + clamping_keeps_body_in_bounds + oracle_view_matches_world_storage + wall_clamps_hold_for_all_edge_positions + jump_reward_is_bounded + initial_observation_and_world_contracts_hold + berry_mask_tracks_trigger_activation +) + +run_kani_harnesses() { + local label="$1" + shift + local -a extra_args=("$@") + + for harness in "${COMMON_HARNESSES[@]}"; do + echo "[kani] Running ${label} harness: ${harness}" + cargo kani --lib "${extra_args[@]}" --harness "${harness}" + done +} + +run_builtin_kani_harnesses() { + local label="$1" + shift + local -a extra_args=("$@") + + for harness in "${BUILTIN_GAME_HARNESSES[@]}"; do + echo "[kani] Running ${label} harness: ${harness}" + cargo kani --lib "${extra_args[@]}" --harness "${harness}" + done +} + +run_kani_matrix() { + if ! command -v cargo-kani >/dev/null 2>&1; then + echo "[kani] cargo-kani not found; skipping Kani matrix" + return 0 + fi + + echo "[kani] default headless kernel" + run_kani_harnesses "default" + + echo "[kani] builtin reference games" + run_builtin_kani_harnesses "builtin" --features builtin + + echo "[kani] builtin + physics games" + for harness in "${PHYSICS_HARNESSES[@]}"; do + echo "[kani] Running builtin+physics harness: ${harness}" + cargo kani --lib --features "builtin physics" --harness "${harness}" + done +} + +if [[ "$MODE" != "kani-only" ]]; then + echo "[verify] Running test and check matrix" + cargo test + cargo test --features builtin + cargo test --features "builtin physics" + cargo test --features parallel + cargo test --features "render builtin physics" + cargo check --features render + cargo check --features "render builtin" + cargo check --bin gameengine --features cli + cargo check --bin gameengine --features "cli physics render" + cargo check --target wasm32-unknown-unknown + cargo check --target wasm32-unknown-unknown --features physics + cargo check --target wasm32-unknown-unknown --features "render builtin physics" + cargo clippy --all-targets --all-features -- -D warnings + cargo bench --no-run --features "builtin physics" +fi + +run_kani_matrix + +if [[ "${RUN_VERUS:-1}" == "1" ]]; then + echo "[verus] Running Verus model checks" + bash scripts/run-verus.sh +fi + +if [[ "${RUN_PERF:-0}" == "1" ]]; then + echo "[perf] Running perf profile script" + bash scripts/run-perf.sh +fi + +echo "[verify] Completed successfully" diff --git a/scripts/run-verus.sh b/scripts/run-verus.sh new file mode 100755 index 0000000..3428bdb --- /dev/null +++ b/scripts/run-verus.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +REQUIRE_VERUS="${REQUIRE_VERUS:-0}" + +resolve_verus_bin() { + local requested="${VERUS_BIN:-}" + local -a candidates=() + + if [[ -n "$requested" ]]; then + candidates+=("$requested") + else + candidates+=("./verus_binary/verus" "./verus_binary" "verus") + fi + + local candidate + for candidate in "${candidates[@]}"; do + if [[ -d "$candidate" && -x "$candidate/verus" ]]; then + echo "$candidate/verus" + return 0 + fi + if [[ -x "$candidate" ]]; then + echo "$candidate" + return 0 + fi + if command -v "$candidate" >/dev/null 2>&1; then + command -v "$candidate" + return 0 + fi + done + + return 1 +} + +if ! VERUS_BIN_PATH="$(resolve_verus_bin)"; then + if [[ "$REQUIRE_VERUS" == "1" ]]; then + echo "[verus] required but no Verus binary was found (checked VERUS_BIN, ./verus_binary/verus, ./verus_binary, PATH)" >&2 + exit 1 + fi + echo "[verus] no Verus binary found; skipping Verus model checks" + exit 0 +fi + +mapfile -t verus_models < <(find proofs/verus -type f -name '*.rs' | sort) + +if [[ ${#verus_models[@]} -eq 0 ]]; then + echo "[verus] no Verus model files found under proofs/verus" + exit 0 +fi + +echo "[verus] Using Verus binary: $VERUS_BIN_PATH" +for model in "${verus_models[@]}"; do + echo "[verus] Checking $model" + "$VERUS_BIN_PATH" "$model" --crate-type=lib +done + +echo "[verus] Completed successfully" diff --git a/src/bin/gameengine.rs b/src/bin/gameengine.rs new file mode 100644 index 0000000..cff967e --- /dev/null +++ b/src/bin/gameengine.rs @@ -0,0 +1,6 @@ +fn main() { + if let Err(error) = gameengine::cli::run_from_env() { + eprintln!("{error}"); + std::process::exit(1); + } +} diff --git a/src/buffer.rs b/src/buffer.rs index cde65f4..46d228b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,28 +1,41 @@ +//! Fixed-capacity buffer utilities used to avoid heap allocations in core loops. + use core::fmt; use core::hash::{Hash, Hasher}; -use core::mem::MaybeUninit; use core::ops::{Deref, DerefMut}; +/// Error returned when attempting to push past fixed capacity. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct CapacityError { + /// Maximum capacity of the destination buffer. pub capacity: usize, } +/// Minimal fixed-capacity buffer interface. pub trait Buffer { + /// Item stored by this buffer. type Item; + /// Maximum number of items this buffer can hold. const CAPACITY: usize; + /// Removes all items from the buffer. fn clear(&mut self); + /// Returns the current number of items. fn len(&self) -> usize; + /// Appends one item when capacity permits. fn push(&mut self, item: Self::Item) -> Result<(), CapacityError>; + /// Returns the populated immutable slice. fn as_slice(&self) -> &[Self::Item]; + /// Returns the populated mutable slice. fn as_mut_slice(&mut self) -> &mut [Self::Item]; + /// Returns whether the buffer has zero items. fn is_empty(&self) -> bool { self.len() == 0 } + /// Extends the buffer by cloning all items from `items`. fn extend_from_slice(&mut self, items: &[Self::Item]) -> Result<(), CapacityError> where Self::Item: Clone, @@ -34,6 +47,7 @@ pub trait Buffer { } } +/// Array-backed fixed-capacity vector. #[derive(Clone)] pub struct FixedVec { data: [T; N], @@ -41,65 +55,68 @@ pub struct FixedVec { } pub(crate) fn default_array() -> [T; N] { - let mut data = [const { MaybeUninit::::uninit() }; N]; - let mut index = 0usize; - while index < N { - data[index].write(T::default()); - index += 1; - } - // SAFETY: - // Every slot in `data` is initialized exactly once in the loop above, - // and `MaybeUninit` has the same layout as `T`. - unsafe { (&data as *const [MaybeUninit; N] as *const [T; N]).read() } + core::array::from_fn(|_| T::default()) } impl FixedVec { + /// Clears all elements. pub fn clear(&mut self) { self.len = 0; } + /// Returns current length. pub const fn len(&self) -> usize { self.len } + /// Returns compile-time capacity. pub const fn capacity(&self) -> usize { N } + /// Returns `true` when `len == 0`. pub const fn is_empty(&self) -> bool { self.len == 0 } + /// Returns the populated immutable slice. pub fn as_slice(&self) -> &[T] { &self.data[..self.len] } + /// Returns the populated mutable slice. pub fn as_mut_slice(&mut self) -> &mut [T] { &mut self.data[..self.len] } + /// Returns the first element when present. pub fn first(&self) -> Option<&T> { self.as_slice().first() } + /// Returns an immutable element reference by index. pub fn get(&self, index: usize) -> Option<&T> { self.as_slice().get(index) } + /// Returns a mutable element reference by index. pub fn get_mut(&mut self, index: usize) -> Option<&mut T> { self.as_mut_slice().get_mut(index) } + /// Iterates over populated elements. pub fn iter(&self) -> core::slice::Iter<'_, T> { self.as_slice().iter() } } impl FixedVec { + /// Creates an empty fixed-capacity vector. pub fn new() -> Self { Self::default() } + /// Pushes one element when capacity permits. pub fn push(&mut self, item: T) -> Result<(), CapacityError> { if self.len == N { return Err(CapacityError { capacity: N }); @@ -181,25 +198,30 @@ impl Hash for FixedVec { } impl FixedVec { + /// Returns whether `value` exists in the populated slice. pub fn contains(&self, value: &T) -> bool { self.as_slice().contains(value) } } +/// Fixed-size bitset backed by `N` machine words. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct BitWords { words: [u64; N], } impl BitWords { + /// Returns immutable access to backing words. pub const fn words(&self) -> &[u64; N] { &self.words } + /// Clears all bits. pub fn clear_all(&mut self) { self.words.fill(0); } + /// Sets `bit` when it falls within capacity. pub fn set_bit(&mut self, bit: usize) { let word = bit / 64; let offset = bit % 64; @@ -208,6 +230,7 @@ impl BitWords { } } + /// Clears `bit` when it falls within capacity. pub fn clear_bit(&mut self, bit: usize) { let word = bit / 64; let offset = bit % 64; @@ -216,6 +239,7 @@ impl BitWords { } } + /// Tests whether `bit` is set. pub fn test_bit(&self, bit: usize) -> bool { let word = bit / 64; let offset = bit % 64; diff --git a/src/games/blackjack.rs b/src/builtin/blackjack/mod.rs similarity index 53% rename from src/games/blackjack.rs rename to src/builtin/blackjack/mod.rs index ef45635..c0aca7c 100644 --- a/src/games/blackjack.rs +++ b/src/builtin/blackjack/mod.rs @@ -1,42 +1,63 @@ +//! Builtin deterministic blackjack environment and compact observation codecs. + use crate::buffer::FixedVec; -use crate::compact::{CompactGame, CompactSpec}; +use crate::compact::{CompactSpec, decode_enum_action, encode_enum_action}; +use crate::core::cards::{ + BlackjackValue, evaluate_blackjack_hand, fill_standard_deck_52, + is_standard_deck_52_permutation, pack_cards_nibbles, +}; +use crate::core::single_player; use crate::game::Game; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; const MAX_HAND_CARDS: usize = 12; const DECK_SIZE: usize = 52; +const BLACKJACK_ACTION_ORDER: [BlackjackAction; 2] = [BlackjackAction::Hit, BlackjackAction::Stand]; +/// Player action in the blackjack round. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum BlackjackAction { + /// Draw one additional card. #[default] Hit, + /// End the player turn and let the opponent resolve. Stand, } +/// High-level stage of a blackjack round. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum BlackjackPhase { + /// Waiting for the player-controlled action. #[default] PlayerTurn, + /// Opponent policy is resolving draws. OpponentTurn, + /// Round is completed. Terminal, } -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct HandValue { - pub total: u8, - pub soft: bool, - pub busted: bool, -} +/// Evaluated value of a blackjack hand. +pub type HandValue = BlackjackValue; +/// Full deterministic blackjack state including shuffled deck. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct BlackjackState { + /// Shuffled full deck represented as rank codes 1..=13. pub deck: [u8; DECK_SIZE], + /// Index of the next card to draw from `deck`. pub next_card: u8, + /// Player-held cards. pub player_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `player_cards`. pub player_len: u8, + /// Opponent-held cards. pub opponent_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `opponent_cards`. pub opponent_len: u8, + /// Current game phase. pub phase: BlackjackPhase, + /// Winner id if terminal with a winner. pub winner: Option, } @@ -55,81 +76,48 @@ impl Default for BlackjackState { } } +/// Canonical blackjack observation shared across viewpoints. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct BlackjackObservation { + /// Current game phase. pub phase: BlackjackPhase, + /// True if the round has completed. pub terminal: bool, + /// Winner id if terminal with a winner. pub winner: Option, + /// Player cards visible to the observer. pub player_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `player_cards`. pub player_len: u8, + /// Evaluated player hand value. pub player_value: HandValue, + /// Opponent cards visible to the observer. pub opponent_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `opponent_cards` that are visible. pub opponent_visible_len: u8, + /// Total opponent card count, including hidden cards. pub opponent_card_count: u8, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct BlackjackSpectatorObservation { - pub phase: BlackjackPhase, - pub terminal: bool, - pub winner: Option, - pub player_cards: [u8; MAX_HAND_CARDS], - pub player_len: u8, - pub player_value: HandValue, - pub opponent_cards: [u8; MAX_HAND_CARDS], - pub opponent_len: u8, + /// Evaluated opponent hand value when available. pub opponent_value: HandValue, } -pub type BlackjackWorldView = BlackjackSpectatorObservation; +/// Full world/debug view type. +pub type BlackjackWorldView = BlackjackObservation; +/// Builtin blackjack environment. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Blackjack; impl Blackjack { fn evaluate_hand(cards: &[u8], len: u8) -> HandValue { - let mut total = 0u8; - let mut aces = 0u8; - let limit = len as usize; - let mut index = 0usize; + let mut hand = [0u8; MAX_HAND_CARDS]; let max_len = MAX_HAND_CARDS.min(cards.len()); - while index < max_len { - if index >= limit { - break; - } - let card = cards[index]; - match card { - 1 => { - total = total.saturating_add(11); - aces += 1; - } - 11..=13 => total = total.saturating_add(10), - value => total = total.saturating_add(value), - } - index += 1; - } - for _ in 0..MAX_HAND_CARDS { - if total <= 21 || aces == 0 { - break; - } - total -= 10; - aces -= 1; - } - HandValue { - total, - soft: aces > 0, - busted: total > 21, - } + hand[..max_len].copy_from_slice(&cards[..max_len]); + evaluate_blackjack_hand(&hand, len) } fn fill_deck(deck: &mut [u8; DECK_SIZE]) { - let mut index = 0usize; - for _ in 0..4 { - for rank in 1..=13 { - deck[index] = rank; - index += 1; - } - } + fill_standard_deck_52(deck); } fn draw_card(state: &mut BlackjackState) -> u8 { @@ -194,17 +182,41 @@ impl Blackjack { } fn pack_cards(cards: &[u8; MAX_HAND_CARDS], len: u8) -> u64 { - let mut packed = 0u64; - let limit = len as usize; - let mut index = 0usize; - while index < MAX_HAND_CARDS { - if index >= limit { - break; - } - packed |= u64::from(cards[index]) << (index * 4); - index += 1; + pack_cards_nibbles(cards, len) + } + + fn winner_code(winner: Option) -> u64 { + match winner { + None => 0, + Some(0) => 1, + Some(_) => 2, } - packed + } + + fn phase_code(phase: BlackjackPhase) -> u64 { + match phase { + BlackjackPhase::PlayerTurn => 0, + BlackjackPhase::OpponentTurn => 1, + BlackjackPhase::Terminal => 2, + } + } + + fn encode_observation_with_header( + observation: &BlackjackObservation, + header: u64, + opponent_len: u8, + out: &mut FixedVec, + ) { + out.clear(); + out.push(header).unwrap(); + out.push(Self::pack_cards( + &observation.player_cards, + observation.player_len, + )) + .unwrap(); + out.push(Self::pack_cards(&observation.opponent_cards, opponent_len)) + .unwrap(); + out.push(0).unwrap(); } } @@ -212,7 +224,7 @@ impl Game for Blackjack { type State = BlackjackState; type Action = BlackjackAction; type PlayerObservation = BlackjackObservation; - type SpectatorObservation = BlackjackSpectatorObservation; + type SpectatorObservation = BlackjackObservation; type WorldView = BlackjackWorldView; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -261,15 +273,12 @@ impl Game for Blackjack { } fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !self.is_terminal(state) { - out.push(0).unwrap(); - } + single_player::write_players_to_act(out, self.is_terminal(state)); } fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { out.clear(); - if player != 0 || self.is_terminal(state) { + if !single_player::can_act(player, self.is_terminal(state)) { return; } let value = Self::player_value(state); @@ -298,11 +307,12 @@ impl Game for Blackjack { opponent_cards, opponent_visible_len, opponent_card_count: state.opponent_len, + opponent_value: HandValue::default(), } } fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { - BlackjackSpectatorObservation { + BlackjackObservation { phase: state.phase, terminal: self.is_terminal(state), winner: state.winner, @@ -310,7 +320,8 @@ impl Game for Blackjack { player_len: state.player_len, player_value: Self::player_value(state), opponent_cards: state.opponent_cards, - opponent_len: state.opponent_len, + opponent_visible_len: state.opponent_len, + opponent_card_count: state.opponent_len, opponent_value: Self::opponent_value(state), } } @@ -326,17 +337,7 @@ impl Game for Blackjack { rng: &mut DeterministicRng, out: &mut StepOutcome, ) { - let actions = joint_actions.as_slice(); - let mut action = None; - let mut index = 0usize; - while index < actions.len() { - let candidate = &actions[index]; - if candidate.player == 0 { - action = Some(candidate.action); - break; - } - index += 1; - } + let action = single_player::first_action(joint_actions.as_slice()); let reward = if self.is_terminal(state) { out.termination = Termination::Terminal { @@ -392,9 +393,7 @@ impl Game for Blackjack { -1 }; - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); + single_player::push_reward(&mut out.rewards, reward); if !self.is_terminal(state) { out.termination = Termination::Ongoing; } @@ -406,24 +405,10 @@ impl Game for Blackjack { || usize::from(state.player_len) > MAX_HAND_CARDS || usize::from(state.opponent_len) > MAX_HAND_CARDS || usize::from(state.next_card) > DECK_SIZE + || !is_standard_deck_52_permutation(&state.deck) { return false; } - let mut counts = [0u8; 14]; - for index in 0..DECK_SIZE { - let card = state.deck[index]; - if !(1..=13).contains(&card) { - return false; - } - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - if counts[rank] != 4 { - return false; - } - rank += 1; - } if self.is_terminal(state) { let mut resolved = *state; Self::resolve_terminal(&mut resolved); @@ -462,12 +447,9 @@ impl Game for Blackjack { post: &Self::State, outcome: &StepOutcome, ) -> bool { - matches!(outcome.reward_for(0), -1..=1) - && (post.phase == BlackjackPhase::Terminal) == outcome.is_terminal() + reward_and_terminal_postcondition(outcome.reward_for(0), -1, 1, post.phase == BlackjackPhase::Terminal, outcome.is_terminal()) } -} -impl CompactGame for Blackjack { fn compact_spec(&self) -> CompactSpec { CompactSpec { action_count: 2, @@ -481,18 +463,11 @@ impl CompactGame for Blackjack { } fn encode_action(&self, action: &Self::Action) -> u64 { - match action { - BlackjackAction::Hit => 0, - BlackjackAction::Stand => 1, - } + encode_enum_action(*action, &BLACKJACK_ACTION_ORDER) } fn decode_action(&self, encoded: u64) -> Option { - match encoded { - 0 => Some(BlackjackAction::Hit), - 1 => Some(BlackjackAction::Stand), - _ => None, - } + decode_enum_action(encoded, &BLACKJACK_ACTION_ORDER) } fn encode_player_observation( @@ -500,37 +475,20 @@ impl CompactGame for Blackjack { observation: &Self::PlayerObservation, out: &mut Self::WordBuf, ) { - out.clear(); - let winner_code = match observation.winner { - None => 0, - Some(0) => 1, - Some(_) => 2, - }; - let phase = match observation.phase { - BlackjackPhase::PlayerTurn => 0u64, - BlackjackPhase::OpponentTurn => 1, - BlackjackPhase::Terminal => 2, - }; - let header = phase + let header = Self::phase_code(observation.phase) | ((observation.terminal as u64) << 4) | ((u64::from(observation.player_len)) << 8) | ((u64::from(observation.player_value.total)) << 12) | ((observation.player_value.soft as u64) << 20) | ((u64::from(observation.opponent_card_count)) << 24) | ((u64::from(observation.opponent_visible_len)) << 28) - | ((winner_code as u64) << 32); - out.push(header).unwrap(); - out.push(Self::pack_cards( - &observation.player_cards, - observation.player_len, - )) - .unwrap(); - out.push(Self::pack_cards( - &observation.opponent_cards, + | (Self::winner_code(observation.winner) << 32); + Self::encode_observation_with_header( + observation, + header, observation.opponent_visible_len, - )) - .unwrap(); - out.push(0).unwrap(); + out, + ); } fn encode_spectator_observation( @@ -538,254 +496,26 @@ impl CompactGame for Blackjack { observation: &Self::SpectatorObservation, out: &mut Self::WordBuf, ) { - out.clear(); - let winner_code = match observation.winner { - None => 0, - Some(0) => 1, - Some(_) => 2, - }; - let phase = match observation.phase { - BlackjackPhase::PlayerTurn => 0u64, - BlackjackPhase::OpponentTurn => 1, - BlackjackPhase::Terminal => 2, - }; - let header = phase + let header = Self::phase_code(observation.phase) | ((observation.terminal as u64) << 4) | ((u64::from(observation.player_len)) << 8) | ((u64::from(observation.player_value.total)) << 12) | ((observation.player_value.soft as u64) << 20) - | ((u64::from(observation.opponent_len)) << 24) + | ((u64::from(observation.opponent_card_count)) << 24) | ((u64::from(observation.opponent_value.total)) << 28) - | ((winner_code as u64) << 36); - out.push(header).unwrap(); - out.push(Self::pack_cards( - &observation.player_cards, - observation.player_len, - )) - .unwrap(); - out.push(Self::pack_cards( - &observation.opponent_cards, - observation.opponent_len, - )) - .unwrap(); - out.push(0).unwrap(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::policy::{FirstLegalPolicy, RandomPolicy}; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - fn state_from_hands(player: &[u8], opponent: &[u8]) -> BlackjackState { - let mut state = BlackjackState { - deck: [0; DECK_SIZE], - next_card: 0, - player_cards: [0; MAX_HAND_CARDS], - player_len: 0, - opponent_cards: [0; MAX_HAND_CARDS], - opponent_len: 0, - phase: BlackjackPhase::PlayerTurn, - winner: None, - }; - Blackjack::fill_deck(&mut state.deck); - for &card in player { - Blackjack::push_player_card(&mut state, card); - } - for &card in opponent { - Blackjack::push_opponent_card(&mut state, card); - } - state - } - - #[test] - fn hand_value_handles_soft_aces() { - assert_eq!( - Blackjack::evaluate_hand(&[1, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2), - HandValue { - total: 21, - soft: true, - busted: false, - } - ); - assert_eq!( - Blackjack::evaluate_hand(&[1, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3), - HandValue { - total: 21, - soft: true, - busted: false, - } - ); - assert_eq!( - Blackjack::evaluate_hand(&[1, 1, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0], 4), - HandValue { - total: 22, - soft: false, - busted: true, - } + | (Self::winner_code(observation.winner) << 36); + Self::encode_observation_with_header( + observation, + header, + observation.opponent_visible_len, + out, ); } - #[test] - fn shuffled_deck_is_a_full_permutation() { - let state = Blackjack.init(11); - let mut counts = [0u8; 14]; - for card in state.deck { - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - assert_eq!(counts[rank], 4, "rank {rank} should appear four times"); - rank += 1; - } - assert_observation_contracts(&Blackjack, &state); - } - - #[test] - fn showdown_matrix_is_correct() { - let mut player_win = state_from_hands(&[10, 10], &[9, 9]); - assert_eq!(Blackjack::resolve_terminal(&mut player_win), 1); - assert_eq!(player_win.winner, Some(0)); - - let mut opponent_win = state_from_hands(&[10, 8], &[10, 9]); - assert_eq!(Blackjack::resolve_terminal(&mut opponent_win), -1); - assert_eq!(opponent_win.winner, Some(1)); - - let mut push = state_from_hands(&[10, 7], &[9, 8]); - assert_eq!(Blackjack::resolve_terminal(&mut push), 0); - assert_eq!(push.winner, None); - } - - #[test] - fn seeded_round_trip_is_reproducible() { - let mut left = Session::new(Blackjack, 11); - let mut right = Session::new(Blackjack, 11); - let action = [PlayerAction { - player: 0, - action: BlackjackAction::Hit, - }]; - let left_outcome = left.step(&action).clone(); - let right_outcome = right.step(&action).clone(); - assert_eq!(left.state(), right.state()); - assert_eq!(left_outcome, right_outcome); - } - - #[test] - fn verification_helpers_hold_for_player_hit() { - let game = Blackjack; - let state = game.init(11); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: BlackjackAction::Hit, - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 11); - assert_compact_roundtrip(&game, &BlackjackAction::Hit); - } - - #[test] - fn seeded_sessions_preserve_invariants_across_policies() { - for seed in 1..=256 { - let mut first = FirstLegalPolicy; - let mut random = RandomPolicy; - - let mut first_session = Session::new(Blackjack, seed); - assert!(Blackjack.state_invariant(first_session.state())); - let mut first_policies: [&mut dyn crate::policy::Policy; 1] = [&mut first]; - while !first_session.is_terminal() && first_session.current_tick() < 16 { - first_session.step_with_policies(&mut first_policies); - } - assert!(Blackjack.state_invariant(first_session.state())); - - let mut random_session = Session::new(Blackjack, seed); - assert!(Blackjack.state_invariant(random_session.state())); - let mut random_policies: [&mut dyn crate::policy::Policy; 1] = [&mut random]; - while !random_session.is_terminal() && random_session.current_tick() < 16 { - random_session.step_with_policies(&mut random_policies); - } - assert!(Blackjack.state_invariant(random_session.state())); - } - } } -#[cfg(kani)] -mod proofs { - use super::{Blackjack, BlackjackAction, BlackjackPhase, HandValue, MAX_HAND_CARDS}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(64)] - fn concrete_seed_shuffle_is_a_full_permutation() { - let state = Blackjack.init(11); - let mut counts = [0u8; 14]; - for card in state.deck { - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - assert_eq!(counts[rank], 4); - rank += 1; - } - } - - #[kani::proof] - #[kani::unwind(64)] - fn player_observation_hides_opponent_hand_before_terminal() { - let state = Blackjack.init(11); - let observation = Blackjack.observe_player(&state, 0); - if state.phase != BlackjackPhase::Terminal { - assert_eq!(observation.opponent_visible_len, 0); - } - } +#[cfg(test)] +mod tests; - #[kani::proof] - #[kani::unwind(64)] - fn initial_observation_contracts_hold_for_concrete_seed() { - let game = Blackjack; - let state = game.init(11); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - #[kani::unwind(64)] - fn stand_action_replays_deterministically_for_seed_17() { - let state = Blackjack.init(17); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: BlackjackAction::Stand, - }) - .unwrap(); - crate::verification::assert_transition_contracts(&Blackjack, &state, &actions, 17); - } - - #[kani::proof] - #[kani::unwind(32)] - fn hand_evaluation_matches_busted_flag() { - let len: u8 = kani::any(); - kani::assume(len <= MAX_HAND_CARDS as u8); - let mut cards = [1u8; MAX_HAND_CARDS]; - for card in &mut cards { - *card = kani::any(); - kani::assume((1..=13).contains(card)); - } - let value = Blackjack::evaluate_hand(&cards, len); - assert_eq!( - value, - HandValue { - total: value.total, - soft: value.soft, - busted: value.total > 21, - } - ); - } -} +#[cfg(kani)] +mod proofs; diff --git a/src/builtin/blackjack/proofs.rs b/src/builtin/blackjack/proofs.rs new file mode 100644 index 0000000..bbfd396 --- /dev/null +++ b/src/builtin/blackjack/proofs.rs @@ -0,0 +1,72 @@ +use super::{Blackjack, BlackjackAction, BlackjackPhase, HandValue, MAX_HAND_CARDS}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::types::PlayerAction; + +#[kani::proof] +#[kani::unwind(64)] +fn concrete_seed_shuffle_is_a_full_permutation() { + let state = Blackjack.init(11); + let mut counts = [0u8; 14]; + for card in state.deck { + counts[card as usize] += 1; + } + let mut rank = 1usize; + while rank <= 13 { + assert_eq!(counts[rank], 4); + rank += 1; + } +} + +#[kani::proof] +#[kani::unwind(64)] +fn player_observation_hides_opponent_hand_before_terminal() { + let state = Blackjack.init(11); + let observation = Blackjack.observe_player(&state, 0); + if state.phase != BlackjackPhase::Terminal { + assert_eq!(observation.opponent_visible_len, 0); + } +} + +#[kani::proof] +#[kani::unwind(64)] +fn initial_observation_contracts_hold_for_concrete_seed() { + let game = Blackjack; + let state = game.init(11); + crate::verification::assert_observation_contracts(&game, &state); +} + +#[kani::proof] +#[kani::unwind(64)] +fn stand_action_replays_deterministically_for_seed_17() { + let state = Blackjack.init(17); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }) + .unwrap(); + crate::verification::assert_transition_contracts(&Blackjack, &state, &actions, 17); +} + +#[kani::proof] +#[kani::unwind(32)] +fn hand_evaluation_matches_busted_flag() { + let len: u8 = kani::any(); + kani::assume(len <= MAX_HAND_CARDS as u8); + let mut cards = [1u8; MAX_HAND_CARDS]; + for card in &mut cards { + *card = kani::any(); + kani::assume((1..=13).contains(card)); + } + let value = Blackjack::evaluate_hand(&cards, len); + assert_eq!( + value, + HandValue { + total: value.total, + soft: value.soft, + busted: value.total > 21, + } + ); +} diff --git a/src/builtin/blackjack/tests.rs b/src/builtin/blackjack/tests.rs new file mode 100644 index 0000000..8c07182 --- /dev/null +++ b/src/builtin/blackjack/tests.rs @@ -0,0 +1,138 @@ +use super::*; +use crate::policy::{FirstLegalPolicy, RandomPolicy}; +use crate::session::Session; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +fn state_from_hands(player: &[u8], opponent: &[u8]) -> BlackjackState { + let mut state = BlackjackState { + deck: [0; DECK_SIZE], + next_card: 0, + player_cards: [0; MAX_HAND_CARDS], + player_len: 0, + opponent_cards: [0; MAX_HAND_CARDS], + opponent_len: 0, + phase: BlackjackPhase::PlayerTurn, + winner: None, + }; + Blackjack::fill_deck(&mut state.deck); + for &card in player { + Blackjack::push_player_card(&mut state, card); + } + for &card in opponent { + Blackjack::push_opponent_card(&mut state, card); + } + state +} + +#[test] +fn hand_value_handles_soft_aces() { + assert_eq!( + Blackjack::evaluate_hand(&[1, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2), + HandValue { + total: 21, + soft: true, + busted: false, + } + ); + assert_eq!( + Blackjack::evaluate_hand(&[1, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3), + HandValue { + total: 21, + soft: true, + busted: false, + } + ); + assert_eq!( + Blackjack::evaluate_hand(&[1, 1, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0], 4), + HandValue { + total: 22, + soft: false, + busted: true, + } + ); +} + +#[test] +fn shuffled_deck_is_a_full_permutation() { + let state = Blackjack.init(11); + let mut counts = [0u8; 14]; + for card in state.deck { + counts[card as usize] += 1; + } + let mut rank = 1usize; + while rank <= 13 { + assert_eq!(counts[rank], 4, "rank {rank} should appear four times"); + rank += 1; + } + assert_observation_contracts(&Blackjack, &state); +} + +#[test] +fn showdown_matrix_is_correct() { + let mut player_win = state_from_hands(&[10, 10], &[9, 9]); + assert_eq!(Blackjack::resolve_terminal(&mut player_win), 1); + assert_eq!(player_win.winner, Some(0)); + + let mut opponent_win = state_from_hands(&[10, 8], &[10, 9]); + assert_eq!(Blackjack::resolve_terminal(&mut opponent_win), -1); + assert_eq!(opponent_win.winner, Some(1)); + + let mut push = state_from_hands(&[10, 7], &[9, 8]); + assert_eq!(Blackjack::resolve_terminal(&mut push), 0); + assert_eq!(push.winner, None); +} + +#[test] +fn seeded_round_trip_is_reproducible() { + let mut left = Session::new(Blackjack, 11); + let mut right = Session::new(Blackjack, 11); + let action = [PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }]; + let left_outcome = left.step(&action).clone(); + let right_outcome = right.step(&action).clone(); + assert_eq!(left.state(), right.state()); + assert_eq!(left_outcome, right_outcome); +} + +#[test] +fn verification_helpers_hold_for_player_hit() { + let game = Blackjack; + let state = game.init(11); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 11); + assert_compact_roundtrip(&game, &BlackjackAction::Hit); +} + +#[test] +fn seeded_sessions_preserve_invariants_across_policies() { + for seed in 1..=256 { + let mut first = FirstLegalPolicy; + let mut random = RandomPolicy; + + let mut first_session = Session::new(Blackjack, seed); + assert!(Blackjack.state_invariant(first_session.state())); + let mut first_policies: [&mut dyn crate::policy::Policy; 1] = [&mut first]; + while !first_session.is_terminal() && first_session.current_tick() < 16 { + first_session.step_with_policies(&mut first_policies); + } + assert!(Blackjack.state_invariant(first_session.state())); + + let mut random_session = Session::new(Blackjack, seed); + assert!(Blackjack.state_invariant(random_session.state())); + let mut random_policies: [&mut dyn crate::policy::Policy; 1] = [&mut random]; + while !random_session.is_terminal() && random_session.current_tick() < 16 { + random_session.step_with_policies(&mut random_policies); + } + assert!(Blackjack.state_invariant(random_session.state())); + } +} diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs new file mode 100644 index 0000000..fc3298c --- /dev/null +++ b/src/builtin/mod.rs @@ -0,0 +1,11 @@ +//! Builtin game implementations shipped with the engine. + +pub mod blackjack; +#[cfg(feature = "physics")] +pub mod platformer; +pub mod tictactoe; + +pub use blackjack::*; +#[cfg(feature = "physics")] +pub use platformer::*; +pub use tictactoe::*; diff --git a/src/games/platformer.rs b/src/builtin/platformer/mod.rs similarity index 54% rename from src/games/platformer.rs rename to src/builtin/platformer/mod.rs index 050eaeb..946facb 100644 --- a/src/games/platformer.rs +++ b/src/builtin/platformer/mod.rs @@ -1,10 +1,17 @@ -use crate::buffer::{FixedVec, default_array}; -use crate::compact::{CompactGame, CompactSpec}; +//! Builtin deterministic platformer environment backed by fixed-capacity physics. + +use crate::buffer::{Buffer, FixedVec}; +use crate::compact::{CompactSpec, decode_enum_action, encode_enum_action}; +use crate::core::single_player; use crate::game::Game; use crate::math::{Aabb2, StrictF64, Vec2}; -use crate::physics::{BodyKind, Contact2d, PhysicsBody2d, PhysicsOracleView2d, PhysicsWorld2d}; +use crate::physics::{ + BodyKind, PhysicsBody2d, PhysicsWorld2d, + collect_actor_trigger_contacts, set_trigger_mask_deferred, +}; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; const BERRY_COUNT: usize = 6; const PLAYER_BODY_ID: u16 = 1; @@ -12,28 +19,55 @@ const FIRST_BERRY_BODY_ID: u16 = 10; const PLATFORMER_BODIES: usize = 1 + BERRY_COUNT; const PLATFORMER_CONTACTS: usize = PLATFORMER_BODIES * (PLATFORMER_BODIES - 1) / 2; const ALL_BERRIES_MASK: u8 = 0b00_111111; - +const PLATFORMER_ACTION_ORDER: [PlatformerAction; 4] = [ + PlatformerAction::Stay, + PlatformerAction::Left, + PlatformerAction::Right, + PlatformerAction::Jump, +]; + +mod world; +pub use world::{BerryView, PlatformerWorldView}; +use world::berry_views; + +/// Player action in the platformer world. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum PlatformerAction { + /// Keep current horizontal position. #[default] Stay, + /// Move left by one tile if possible. Left, + /// Move right by one tile if possible. Right, + /// Jump upward by configured jump delta. Jump, } +/// Parameter set for the deterministic platformer environment. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct PlatformerConfig { + /// Arena width in tile units. pub width: u8, + /// Arena height in tile units. pub height: u8, + /// Player body width in tile units. pub player_width: u8, + /// Player body height in tile units. pub player_height: u8, + /// Vertical displacement applied by `Jump`. pub jump_delta: u8, + /// Shared berry y-coordinate. pub berry_y: u8, + /// Sorted berry x-coordinates. pub berry_xs: [u8; BERRY_COUNT], + /// Numerator for jump-sprain Bernoulli penalty. pub sprain_numerator: u64, + /// Denominator for jump-sprain Bernoulli penalty. pub sprain_denominator: u64, + /// Reward added when collecting one berry. pub berry_reward: Reward, + /// Bonus reward added when all berries are collected. pub finish_bonus: Reward, } @@ -56,6 +90,7 @@ impl Default for PlatformerConfig { } impl PlatformerConfig { + /// Returns the axis-aligned world bounds. pub fn arena_bounds(self) -> Aabb2 { Aabb2::new( Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), @@ -66,6 +101,7 @@ impl PlatformerConfig { ) } + /// Returns player half-extents used by physics body creation. pub fn player_half_extents(self) -> Vec2 { Vec2::new( StrictF64::new(self.player_width as f64 / 2.0), @@ -73,6 +109,7 @@ impl PlatformerConfig { ) } + /// Converts tile coordinates to player-center world coordinates. pub fn player_center(self, x: u8, y: u8) -> Vec2 { Vec2::new( StrictF64::new(x as f64 + self.player_width as f64 / 2.0), @@ -80,6 +117,7 @@ impl PlatformerConfig { ) } + /// Returns center position for berry `index`. pub fn berry_center(self, index: usize) -> Vec2 { Vec2::new( StrictF64::new(self.berry_xs[index] as f64 + 0.5), @@ -87,6 +125,7 @@ impl PlatformerConfig { ) } + /// Validates internal consistency and geometric constraints. pub fn invariant(self) -> bool { if self.width == 0 || self.height == 0 @@ -116,72 +155,45 @@ impl PlatformerConfig { } } +/// Full platformer state. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct PlatformerState { + /// Physics simulation world containing player and berries. pub world: PhysicsWorld2d, + /// Bitset of still-active berries. pub remaining_berries: u8, } +/// Canonical player/spectator observation. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct PlatformerObservation { + /// Player x tile coordinate. pub x: u8, + /// Player y tile coordinate. pub y: u8, + /// Bitset of still-active berries. pub remaining_berries: u8, + /// True when all berries have been collected. pub terminal: bool, + /// Winner id in terminal states. pub winner: Option, } -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct BerryView { - pub id: u16, - pub x: u8, - pub y: u8, - pub collected: bool, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct PlatformerWorldView { - pub config: PlatformerConfig, - pub physics: PhysicsWorld2d, - pub berries: [BerryView; BERRY_COUNT], -} - impl Default for PlatformerState { fn default() -> Self { Platformer::default().init(0) } } -impl Default for PlatformerWorldView { - fn default() -> Self { - Platformer::default().world_view(&Platformer::default().init(0)) - } -} - -impl PhysicsOracleView2d for PlatformerWorldView { - fn bounds(&self) -> Aabb2 { - self.physics.bounds() - } - - fn tick(&self) -> u64 { - self.physics.tick() - } - - fn bodies(&self) -> &[PhysicsBody2d] { - self.physics.bodies() - } - - fn contacts(&self) -> &[Contact2d] { - self.physics.contacts() - } -} - +/// Builtin deterministic platformer environment. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Platformer { + /// Environment configuration. pub config: PlatformerConfig, } impl Platformer { + /// Creates a platformer game with validated configuration. pub fn new(config: PlatformerConfig) -> Self { assert!(config.invariant(), "invalid platformer config"); Self { config } @@ -209,28 +221,28 @@ impl Platformer { } fn sync_berries(&self, state: &mut PlatformerState) { - for index in 0..BERRY_COUNT { - let berry_id = FIRST_BERRY_BODY_ID + index as u16; - state - .world - .set_body_active_deferred(berry_id, state.remaining_berries & (1u8 << index) != 0); - } + set_trigger_mask_deferred( + &mut state.world, + FIRST_BERRY_BODY_ID, + BERRY_COUNT, + u64::from(state.remaining_berries), + ); } fn collect_berries_from_contacts(&self, state: &mut PlatformerState) -> Reward { - let mut reward = 0; - for index in 0..BERRY_COUNT { - let berry_bit = 1u8 << index; - let berry_id = FIRST_BERRY_BODY_ID + index as u16; - if state.remaining_berries & berry_bit != 0 - && state.world.has_contact(PLAYER_BODY_ID, berry_id) - { - state.remaining_berries &= !berry_bit; - state.world.set_body_active(berry_id, false); - reward += self.config.berry_reward; - } - } - if state.remaining_berries == 0 { + let was_non_terminal = state.remaining_berries != 0; + let mut remaining = u64::from(state.remaining_berries); + let collected = collect_actor_trigger_contacts( + &mut state.world, + PLAYER_BODY_ID, + FIRST_BERRY_BODY_ID, + BERRY_COUNT, + &mut remaining, + ); + state.remaining_berries = remaining as u8; + + let mut reward = self.config.berry_reward * i64::from(collected); + if was_non_terminal && state.remaining_berries == 0 { reward += self.config.finish_bonus; } reward @@ -303,21 +315,15 @@ impl Game for Platformer { } fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !self.is_terminal(state) { - out.push(0).unwrap(); - } + single_player::write_players_to_act(out, self.is_terminal(state)); } fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { out.clear(); - if player != 0 || self.is_terminal(state) { + if !single_player::can_act(player, self.is_terminal(state)) { return; } - out.push(PlatformerAction::Stay).unwrap(); - out.push(PlatformerAction::Left).unwrap(); - out.push(PlatformerAction::Right).unwrap(); - out.push(PlatformerAction::Jump).unwrap(); + out.extend_from_slice(&PLATFORMER_ACTION_ORDER).unwrap(); } fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { @@ -329,21 +335,10 @@ impl Game for Platformer { } fn world_view(&self, state: &Self::State) -> Self::WorldView { - let mut berries = default_array::(); - let mut index = 0usize; - while index < BERRY_COUNT { - berries[index] = BerryView { - id: FIRST_BERRY_BODY_ID + index as u16, - x: self.config.berry_xs[index], - y: self.config.berry_y, - collected: (state.remaining_berries & (1u8 << index)) == 0, - }; - index += 1; - } PlatformerWorldView { config: self.config, physics: state.world.clone(), - berries, + berries: berry_views(self.config, state.remaining_berries), } } @@ -354,17 +349,8 @@ impl Game for Platformer { rng: &mut DeterministicRng, out: &mut StepOutcome, ) { - let actions = joint_actions.as_slice(); - let mut action = PlatformerAction::Stay; - let mut action_index = 0usize; - while action_index < actions.len() { - let candidate = &actions[action_index]; - if candidate.player == 0 { - action = candidate.action; - break; - } - action_index += 1; - } + let action = + single_player::first_action(joint_actions.as_slice()).unwrap_or(PlatformerAction::Stay); let mut reward = 0; if self.is_terminal(state) { @@ -412,9 +398,7 @@ impl Game for Platformer { }; } - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); + single_player::push_reward(&mut out.rewards, reward); } fn state_invariant(&self, state: &Self::State) -> bool { @@ -498,12 +482,9 @@ impl Game for Platformer { post: &Self::State, outcome: &StepOutcome, ) -> bool { - matches!(outcome.reward_for(0), -1..=11) - && (post.remaining_berries == 0) == outcome.is_terminal() + reward_and_terminal_postcondition(outcome.reward_for(0), -1, 11, post.remaining_berries == 0, outcome.is_terminal()) } -} -impl CompactGame for Platformer { fn compact_spec(&self) -> CompactSpec { CompactSpec { action_count: 4, @@ -517,22 +498,11 @@ impl CompactGame for Platformer { } fn encode_action(&self, action: &Self::Action) -> u64 { - match action { - PlatformerAction::Stay => 0, - PlatformerAction::Left => 1, - PlatformerAction::Right => 2, - PlatformerAction::Jump => 3, - } + encode_enum_action(*action, &PLATFORMER_ACTION_ORDER) } fn decode_action(&self, encoded: u64) -> Option { - match encoded { - 0 => Some(PlatformerAction::Stay), - 1 => Some(PlatformerAction::Left), - 2 => Some(PlatformerAction::Right), - 3 => Some(PlatformerAction::Jump), - _ => None, - } + decode_enum_action(encoded, &PLATFORMER_ACTION_ORDER) } fn encode_player_observation( @@ -558,206 +528,7 @@ impl CompactGame for Platformer { } #[cfg(test)] -mod tests { - use super::*; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - #[test] - fn movement_clamps_at_walls() { - let game = Platformer::default(); - let mut state = game.init(1); - let mut rng = DeterministicRng::from_seed_and_stream(1, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Left, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(game.observe_spectator(&state).x, 0); - - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); - outcome.clear(); - actions.clear(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Right, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(game.observe_spectator(&state).x, 11); - } - - #[test] - fn berry_collection_is_idempotent() { - let game = Platformer::default(); - let mut state = game.init(1); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(1, 0)); - let mut rng = DeterministicRng::from_seed_and_stream(1, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - let remaining = state.remaining_berries; - outcome.clear(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(state.remaining_berries, remaining); - } - - #[test] - fn final_berry_terminates_with_bonus() { - let game = Platformer::default(); - let mut state = game.init(9); - state.remaining_berries = 1u8 << 5; - game.sync_berries(&mut state); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); - let mut rng = DeterministicRng::from_seed_and_stream(9, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert!(game.is_terminal(&state)); - assert!(outcome.reward_for(0) >= 10); - } - - #[test] - fn seeded_sessions_replay_exactly() { - let mut left = Session::new(Platformer::default(), 3); - let mut right = Session::new(Platformer::default(), 3); - let actions = [ - PlayerAction { - player: 0, - action: PlatformerAction::Right, - }, - PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }, - PlayerAction { - player: 0, - action: PlatformerAction::Right, - }, - ]; - for action in actions { - left.step(std::slice::from_ref(&action)); - right.step(std::slice::from_ref(&action)); - } - assert_eq!(left.trace(), right.trace()); - assert_eq!(left.state(), right.state()); - } - - #[test] - fn verification_helpers_hold_for_jump() { - let game = Platformer::default(); - let state = game.init(3); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 3); - assert_observation_contracts(&game, &state); - assert_compact_roundtrip(&game, &PlatformerAction::Jump); - } - - #[test] - fn physics_world_tracks_actor_and_berries() { - let state = Platformer::default().init(3); - let world = Platformer::default().world_view(&state); - assert_eq!(world.physics.bodies.len(), PLATFORMER_BODIES); - assert!(world.physics.invariant()); - } -} +mod tests; #[cfg(kani)] -mod proofs { - use super::{ALL_BERRIES_MASK, PLAYER_BODY_ID, Platformer, PlatformerAction, PlatformerState}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(64)] - fn wall_clamps_hold_for_all_edge_positions() { - let game = Platformer::default(); - let mut state = PlatformerState::default(); - let x: u8 = kani::any(); - kani::assume(x < game.config.width); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(x, 0)); - let mut rng = crate::rng::DeterministicRng::from_seed(1); - let mut outcome = - crate::types::StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Left, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert!(game.observe_spectator(&state).x < game.config.width); - } - - #[kani::proof] - #[kani::unwind(64)] - fn jump_reward_is_bounded() { - let state = Platformer::default().init(1); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - crate::verification::assert_transition_contracts( - &Platformer::default(), - &state, - &actions, - 1, - ); - } - - #[kani::proof] - #[kani::unwind(64)] - fn initial_observation_and_world_contracts_hold() { - let game = Platformer::default(); - let state = game.init(1); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - #[kani::unwind(64)] - fn berry_mask_tracks_trigger_activation() { - let mut state = PlatformerState::default(); - state.remaining_berries = ALL_BERRIES_MASK ^ 0b000001; - Platformer::default().sync_berries(&mut state); - assert!(!state.world.require_body(super::FIRST_BERRY_BODY_ID).active); - } -} +mod proofs; diff --git a/src/builtin/platformer/proofs.rs b/src/builtin/platformer/proofs.rs new file mode 100644 index 0000000..4e65874 --- /dev/null +++ b/src/builtin/platformer/proofs.rs @@ -0,0 +1,58 @@ +use super::{ALL_BERRIES_MASK, PLAYER_BODY_ID, Platformer, PlatformerAction, PlatformerState}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::types::PlayerAction; + +#[kani::proof] +#[kani::unwind(64)] +fn wall_clamps_hold_for_all_edge_positions() { + let game = Platformer::default(); + let mut state = PlatformerState::default(); + let x: u8 = kani::any(); + kani::assume(x < game.config.width); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(x, 0)); + let mut rng = crate::rng::DeterministicRng::from_seed(1); + let mut outcome = crate::types::StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Left, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert!(game.observe_spectator(&state).x < game.config.width); +} + +#[kani::proof] +#[kani::unwind(64)] +fn jump_reward_is_bounded() { + let state = Platformer::default().init(1); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + crate::verification::assert_transition_contracts(&Platformer::default(), &state, &actions, 1); +} + +#[kani::proof] +#[kani::unwind(64)] +fn initial_observation_and_world_contracts_hold() { + let game = Platformer::default(); + let state = game.init(1); + crate::verification::assert_observation_contracts(&game, &state); +} + +#[kani::proof] +#[kani::unwind(64)] +fn berry_mask_tracks_trigger_activation() { + let mut state = PlatformerState::default(); + state.remaining_berries = ALL_BERRIES_MASK ^ 0b000001; + Platformer::default().sync_berries(&mut state); + assert!(!state.world.require_body(super::FIRST_BERRY_BODY_ID).active); +} diff --git a/src/builtin/platformer/tests.rs b/src/builtin/platformer/tests.rs new file mode 100644 index 0000000..afe2c4d --- /dev/null +++ b/src/builtin/platformer/tests.rs @@ -0,0 +1,133 @@ +use super::*; +use crate::session::Session; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +#[test] +fn movement_clamps_at_walls() { + let game = Platformer::default(); + let mut state = game.init(1); + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Left, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(game.observe_spectator(&state).x, 0); + + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); + outcome.clear(); + actions.clear(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Right, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(game.observe_spectator(&state).x, 11); +} + +#[test] +fn berry_collection_is_idempotent() { + let game = Platformer::default(); + let mut state = game.init(1); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(1, 0)); + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + let remaining = state.remaining_berries; + outcome.clear(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(state.remaining_berries, remaining); +} + +#[test] +fn final_berry_terminates_with_bonus() { + let game = Platformer::default(); + let mut state = game.init(9); + state.remaining_berries = 1u8 << 5; + game.sync_berries(&mut state); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); + let mut rng = DeterministicRng::from_seed_and_stream(9, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert!(game.is_terminal(&state)); + assert!(outcome.reward_for(0) >= 10); +} + +#[test] +fn seeded_sessions_replay_exactly() { + let mut left = Session::new(Platformer::default(), 3); + let mut right = Session::new(Platformer::default(), 3); + let actions = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + ]; + for action in actions { + left.step(std::slice::from_ref(&action)); + right.step(std::slice::from_ref(&action)); + } + assert_eq!(left.trace(), right.trace()); + assert_eq!(left.state(), right.state()); +} + +#[test] +fn verification_helpers_hold_for_jump() { + let game = Platformer::default(); + let state = game.init(3); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 3); + assert_observation_contracts(&game, &state); + assert_compact_roundtrip(&game, &PlatformerAction::Jump); +} + +#[test] +fn physics_world_tracks_actor_and_berries() { + let state = Platformer::default().init(3); + let world = Platformer::default().world_view(&state); + assert_eq!(world.physics.bodies.len(), PLATFORMER_BODIES); + assert!(world.physics.invariant()); +} diff --git a/src/builtin/platformer/world.rs b/src/builtin/platformer/world.rs new file mode 100644 index 0000000..e2823ef --- /dev/null +++ b/src/builtin/platformer/world.rs @@ -0,0 +1,73 @@ +//! Platformer world/debug view types and physics oracle adapter. + +use crate::math::{Aabb2, StrictF64}; +use crate::physics::{Contact2d, PhysicsBody2d, PhysicsOracleView2d, PhysicsWorld2d}; +use crate::game::Game; + +use super::{ + BERRY_COUNT, FIRST_BERRY_BODY_ID, PLATFORMER_BODIES, PLATFORMER_CONTACTS, Platformer, + PlatformerConfig, +}; + +/// Render/debug view of one berry. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct BerryView { + /// Stable body id in the physics world. + pub id: u16, + /// Berry x tile coordinate. + pub x: u8, + /// Berry y tile coordinate. + pub y: u8, + /// Whether this berry has already been collected. + pub collected: bool, +} + +/// World-level debug view combining config, physics and berry metadata. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct PlatformerWorldView { + /// Environment configuration used for this world. + pub config: PlatformerConfig, + /// Physics snapshot. + pub physics: PhysicsWorld2d, + /// Berry metadata for rendering and inspection. + pub berries: [BerryView; BERRY_COUNT], +} + +pub(super) fn berry_views(config: PlatformerConfig, remaining_berries: u8) -> [BerryView; BERRY_COUNT] { + let mut berries = [BerryView::default(); BERRY_COUNT]; + let mut index = 0usize; + while index < BERRY_COUNT { + berries[index] = BerryView { + id: FIRST_BERRY_BODY_ID + index as u16, + x: config.berry_xs[index], + y: config.berry_y, + collected: (remaining_berries & (1u8 << index)) == 0, + }; + index += 1; + } + berries +} + +impl Default for PlatformerWorldView { + fn default() -> Self { + Platformer::default().world_view(&Platformer::default().init(0)) + } +} + +impl PhysicsOracleView2d for PlatformerWorldView { + fn bounds(&self) -> Aabb2 { + self.physics.bounds() + } + + fn tick(&self) -> u64 { + self.physics.tick() + } + + fn bodies(&self) -> &[PhysicsBody2d] { + self.physics.bodies() + } + + fn contacts(&self) -> &[Contact2d] { + self.physics.contacts() + } +} \ No newline at end of file diff --git a/src/games/tictactoe.rs b/src/builtin/tictactoe/mod.rs similarity index 56% rename from src/games/tictactoe.rs rename to src/builtin/tictactoe/mod.rs index 905e32c..b8043d1 100644 --- a/src/games/tictactoe.rs +++ b/src/builtin/tictactoe/mod.rs @@ -1,8 +1,12 @@ +//! Builtin deterministic tic-tac-toe environment and compact encoding. + use crate::buffer::FixedVec; -use crate::compact::{CompactGame, CompactSpec}; +use crate::compact::CompactSpec; +use crate::core::single_player; use crate::game::Game; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; const WIN_LINES: [(usize, usize, usize); 8] = [ (0, 1, 2), @@ -15,27 +19,39 @@ const WIN_LINES: [(usize, usize, usize); 8] = [ (2, 4, 6), ]; +/// Cell state on the 3x3 board. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum TicTacToeCell { + /// Empty cell. #[default] Empty, + /// Player-controlled mark. Player, + /// Opponent mark. Opponent, } +/// Compact action selecting one board cell. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct TicTacToeAction(pub u8); +/// Complete deterministic game state. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct TicTacToeState { + /// Board occupancy. pub board: [TicTacToeCell; 9], + /// Terminal-state flag. pub terminal: bool, + /// Winner id when terminal with a winner. pub winner: Option, } +/// Canonical tic-tac-toe observation type. pub type TicTacToeObservation = TicTacToeState; +/// World/debug view type. pub type TicTacToeWorldView = TicTacToeState; +/// Builtin deterministic tic-tac-toe environment. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct TicTacToe; @@ -76,6 +92,7 @@ impl TicTacToe { true } + /// Packs board cells into a two-bit-per-cell `u64` representation. pub fn packed_board(board: &[TicTacToeCell; 9]) -> u64 { let mut packed = 0u64; let mut index = 0usize; @@ -121,15 +138,12 @@ impl Game for TicTacToe { } fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !state.terminal { - out.push(0).unwrap(); - } + single_player::write_players_to_act(out, state.terminal); } fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { out.clear(); - if player != 0 || state.terminal { + if !single_player::can_act(player, state.terminal) { return; } let mut index = 0usize; @@ -160,17 +174,8 @@ impl Game for TicTacToe { rng: &mut DeterministicRng, out: &mut StepOutcome, ) { - let mut action = None; - let actions = joint_actions.as_slice(); - let mut action_index = 0usize; - while action_index < actions.len() { - let candidate = &actions[action_index]; - if candidate.player == 0 { - action = Some(candidate.action.0 as usize); - break; - } - action_index += 1; - } + let action = single_player::first_action(joint_actions.as_slice()) + .map(|candidate: TicTacToeAction| candidate.0 as usize); let reward = if state.terminal { out.termination = Termination::Terminal { @@ -228,41 +233,12 @@ impl Game for TicTacToe { -3 }; - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); + single_player::push_reward(&mut out.rewards, reward); if !state.terminal { out.termination = Termination::Ongoing; } } - fn state_invariant(&self, state: &Self::State) -> bool { - let winner = Self::find_winner(&state.board); - let full = Self::is_full(&state.board); - state.terminal == (winner.is_some() || full) - && (state.winner == winner || (winner.is_none() && state.winner.is_none())) - } - - fn action_invariant(&self, action: &Self::Action) -> bool { - action.0 < 9 - } - - fn transition_postcondition( - &self, - pre: &Self::State, - _actions: &Self::JointActionBuf, - post: &Self::State, - outcome: &StepOutcome, - ) -> bool { - if pre.terminal { - return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); - } - let reward = outcome.reward_for(0); - matches!(reward, -3..=2) && (!post.terminal || outcome.is_terminal()) - } -} - -impl CompactGame for TicTacToe { fn compact_spec(&self) -> CompactSpec { CompactSpec { action_count: 9, @@ -299,170 +275,34 @@ impl CompactGame for TicTacToe { ) { self.encode_player_observation(observation, out); } -} -#[cfg(test)] -mod tests { - use super::*; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - #[test] - fn illegal_move_preserves_state_and_penalizes() { - let mut session = Session::new(TicTacToe, 7); - session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - let before = *session.state(); - let outcome = session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - assert_eq!(outcome.reward_for(0), -3); - assert_eq!(session.state(), &before); + fn state_invariant(&self, state: &Self::State) -> bool { + let winner = Self::find_winner(&state.board); + let full = Self::is_full(&state.board); + state.terminal == (winner.is_some() || full) + && (state.winner == winner || (winner.is_none() && state.winner.is_none())) } - #[test] - fn legal_actions_match_empty_cells_exhaustively() { - let game = TicTacToe; - for encoded in 0..3u32.pow(9) { - let mut board = [TicTacToeCell::Empty; 9]; - let mut value = encoded; - for cell in &mut board { - *cell = match value % 3 { - 0 => TicTacToeCell::Empty, - 1 => TicTacToeCell::Player, - _ => TicTacToeCell::Opponent, - }; - value /= 3; - } - let winner = TicTacToe::find_winner(&board); - let terminal = winner.is_some() || TicTacToe::is_full(&board); - let state = TicTacToeState { - board, - terminal, - winner, - }; - let mut legal = FixedVec::::default(); - game.legal_actions(&state, 0, &mut legal); - let expected: Vec<_> = if terminal { - Vec::new() - } else { - state - .board - .iter() - .enumerate() - .filter_map(|(index, cell)| { - (*cell == TicTacToeCell::Empty).then_some(TicTacToeAction(index as u8)) - }) - .collect() - }; - assert_eq!( - legal.as_slice(), - expected.as_slice(), - "encoded board state {encoded}" - ); - assert_observation_contracts(&game, &state); - } + fn action_invariant(&self, action: &Self::Action) -> bool { + action.0 < 9 } - #[test] - fn verification_helpers_hold_for_opening_move() { - let game = TicTacToe; - let state = game.init(7); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: TicTacToeAction(0), - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 7); - assert_compact_roundtrip(&game, &TicTacToeAction(0)); + fn transition_postcondition( + &self, + pre: &Self::State, + _actions: &Self::JointActionBuf, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + if pre.terminal { + return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); + } + reward_and_terminal_postcondition(outcome.reward_for(0), -3, 2, post.terminal, outcome.is_terminal()) } } -#[cfg(kani)] -mod proofs { - use super::{TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeState}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::session::{FixedHistory, SessionKernel}; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(16)] - fn legal_actions_are_exactly_empty_cells() { - let encoded: u32 = kani::any(); - kani::assume(encoded < 3u32.pow(9)); - let mut board = [TicTacToeCell::Empty; 9]; - let mut value = encoded; - for cell in &mut board { - *cell = match value % 3 { - 0 => TicTacToeCell::Empty, - 1 => TicTacToeCell::Player, - _ => TicTacToeCell::Opponent, - }; - value /= 3; - } - let winner = TicTacToe::find_winner(&board); - let terminal = winner.is_some() || TicTacToe::is_full(&board); - let state = TicTacToeState { - board, - terminal, - winner, - }; - let mut legal = FixedVec::::default(); - TicTacToe.legal_actions(&state, 0, &mut legal); - let mut legal_count = 0usize; - let mut legal_index = 0usize; - while legal_index < legal.len() { - let action = legal.as_slice()[legal_index]; - assert_eq!(state.board[action.0 as usize], TicTacToeCell::Empty); - legal_count += 1; - legal_index += 1; - } - - let mut empty_count = 0usize; - let mut board_index = 0usize; - while board_index < state.board.len() { - if state.board[board_index] == TicTacToeCell::Empty { - if !terminal { - assert!( - legal - .as_slice() - .contains(&TicTacToeAction(board_index as u8)) - ); - } - empty_count += 1; - } - board_index += 1; - } - assert_eq!(legal_count, if terminal { 0 } else { empty_count }); - } +#[cfg(test)] +mod tests; - #[kani::proof] - #[kani::unwind(16)] - fn invalid_move_never_mutates_board() { - type ProofSession = SessionKernel>; - - let mut session = ProofSession::new(TicTacToe, 1); - session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: TicTacToeAction(0), - }) - .unwrap(); - let before = *session.state(); - session.step_with_joint_actions(&actions); - assert_eq!(*session.state(), before); - } -} +#[cfg(kani)] +mod proofs; diff --git a/src/builtin/tictactoe/proofs.rs b/src/builtin/tictactoe/proofs.rs new file mode 100644 index 0000000..97dfb93 --- /dev/null +++ b/src/builtin/tictactoe/proofs.rs @@ -0,0 +1,78 @@ +use super::{TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeState}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::session::{FixedHistory, SessionKernel}; +use crate::types::PlayerAction; + +#[kani::proof] +#[kani::unwind(16)] +fn legal_actions_are_exactly_empty_cells() { + let encoded: u32 = kani::any(); + kani::assume(encoded < 3u32.pow(9)); + let mut board = [TicTacToeCell::Empty; 9]; + let mut value = encoded; + for cell in &mut board { + *cell = match value % 3 { + 0 => TicTacToeCell::Empty, + 1 => TicTacToeCell::Player, + _ => TicTacToeCell::Opponent, + }; + value /= 3; + } + let winner = TicTacToe::find_winner(&board); + let terminal = winner.is_some() || TicTacToe::is_full(&board); + let state = TicTacToeState { + board, + terminal, + winner, + }; + let mut legal = FixedVec::::default(); + TicTacToe.legal_actions(&state, 0, &mut legal); + let mut legal_count = 0usize; + let mut legal_index = 0usize; + while legal_index < legal.len() { + let action = legal.as_slice()[legal_index]; + assert_eq!(state.board[action.0 as usize], TicTacToeCell::Empty); + legal_count += 1; + legal_index += 1; + } + + let mut empty_count = 0usize; + let mut board_index = 0usize; + while board_index < state.board.len() { + if state.board[board_index] == TicTacToeCell::Empty { + if !terminal { + assert!( + legal + .as_slice() + .contains(&TicTacToeAction(board_index as u8)) + ); + } + empty_count += 1; + } + board_index += 1; + } + assert_eq!(legal_count, if terminal { 0 } else { empty_count }); +} + +#[kani::proof] +#[kani::unwind(16)] +fn invalid_move_never_mutates_board() { + type ProofSession = SessionKernel>; + + let mut session = ProofSession::new(TicTacToe, 1); + session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(0), + }) + .unwrap(); + let before = *session.state(); + session.step_with_joint_actions(&actions); + assert_eq!(*session.state(), before); +} diff --git a/src/builtin/tictactoe/tests.rs b/src/builtin/tictactoe/tests.rs new file mode 100644 index 0000000..12e9366 --- /dev/null +++ b/src/builtin/tictactoe/tests.rs @@ -0,0 +1,80 @@ +use super::*; +use crate::session::Session; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +#[test] +fn illegal_move_preserves_state_and_penalizes() { + let mut session = Session::new(TicTacToe, 7); + session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + let before = *session.state(); + let outcome = session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + assert_eq!(outcome.reward_for(0), -3); + assert_eq!(session.state(), &before); +} + +#[test] +fn legal_actions_match_empty_cells_exhaustively() { + let game = TicTacToe; + for encoded in 0..3u32.pow(9) { + let mut board = [TicTacToeCell::Empty; 9]; + let mut value = encoded; + for cell in &mut board { + *cell = match value % 3 { + 0 => TicTacToeCell::Empty, + 1 => TicTacToeCell::Player, + _ => TicTacToeCell::Opponent, + }; + value /= 3; + } + let winner = TicTacToe::find_winner(&board); + let terminal = winner.is_some() || TicTacToe::is_full(&board); + let state = TicTacToeState { + board, + terminal, + winner, + }; + let mut legal = FixedVec::::default(); + game.legal_actions(&state, 0, &mut legal); + let expected: Vec<_> = if terminal { + Vec::new() + } else { + state + .board + .iter() + .enumerate() + .filter_map(|(index, cell)| { + (*cell == TicTacToeCell::Empty).then_some(TicTacToeAction(index as u8)) + }) + .collect() + }; + assert_eq!( + legal.as_slice(), + expected.as_slice(), + "encoded board state {encoded}" + ); + assert_observation_contracts(&game, &state); + } +} + +#[test] +fn verification_helpers_hold_for_opening_move() { + let game = TicTacToe; + let state = game.init(7); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(0), + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 7); + assert_compact_roundtrip(&game, &TicTacToeAction(0)); +} diff --git a/src/main.rs b/src/cli/mod.rs similarity index 64% rename from src/main.rs rename to src/cli/mod.rs index e82814b..470382b 100644 --- a/src/main.rs +++ b/src/cli/mod.rs @@ -1,30 +1,78 @@ +//! Command-line entrypoints for listing, playing, replaying, and validating games. + use std::env; +use std::fmt::Debug; use std::io::{self, Write}; -#[cfg(feature = "render")] -use gameengine::InteractiveSession; -use gameengine::buffer::Buffer; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use crate::buffer::Buffer; +use crate::core::observe::{Observe, Observer}; +#[cfg(feature = "builtin")] +use crate::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; -use gameengine::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; +use crate::builtin::{Platformer, PlatformerAction}; +use crate::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; +#[cfg(all(feature = "render", feature = "physics"))] +use crate::render::{RealtimeDriver, builtin}; #[cfg(feature = "render")] -use gameengine::render::{ +use crate::render::{ PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver, }; -#[cfg(all(feature = "render", feature = "physics"))] -use gameengine::render::{RealtimeDriver, builtin}; -use gameengine::{CompactGame, Game, Session, stable_hash}; +use crate::registry::{GameKind, all_games, find_game}; +#[cfg(feature = "render")] +use crate::session::InteractiveSession; +use crate::{Game, Session, stable_hash}; -fn main() { - if let Err(error) = run() { - eprintln!("{error}"); - std::process::exit(1); +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +enum RunMode { + Play, + Replay, +} + +#[derive(Debug)] +enum PolicyChoice { + Human, + Random, + First, + Scripted(Vec), +} + +fn resolve_policy_choice( + mode: RunMode, + policy: &str, + parse_script: fn(&str) -> Result, String>, + game_name: &'static str, +) -> Result, String> { + match policy { + "human" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::Human), + "human" => Err(format!( + "unsupported {game_name} policy for replay mode: human" + )), + "random" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::Random), + "random" => Err(format!( + "unsupported {game_name} policy for replay mode: random" + )), + "first" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::First), + "first" => Err(format!( + "unsupported {game_name} policy for replay mode: first" + )), + script if script.starts_with("script:") => parse_script(script) + .map(PolicyChoice::Scripted) + .map_err(|error| format!("{game_name} script parse error: {error}")), + other => Err(format!("unsupported {game_name} policy: {other}")), } } -fn run() -> Result<(), String> { - let mut args = env::args().skip(1); +/// Runs the CLI using process command-line arguments. +pub fn run_from_env() -> Result<(), String> { + run_from_args(env::args().skip(1)) +} + +/// Runs the CLI using a supplied argument iterator. +pub fn run_from_args(args: I) -> Result<(), String> +where + I: IntoIterator, +{ + let mut args = args.into_iter(); let Some(command) = args.next() else { print_usage(); return Ok(()); @@ -32,32 +80,43 @@ fn run() -> Result<(), String> { match command.as_str() { "list" => { - println!("tictactoe"); - println!("blackjack"); - #[cfg(feature = "physics")] - println!("platformer"); + for descriptor in all_games() { + println!("{}", descriptor.name); + } Ok(()) } - "play" | "replay" => { + "play" => { let game = args .next() - .ok_or_else(|| "missing game name for play/replay".to_string())?; + .ok_or_else(|| "missing game name for play".to_string())?; let config = CliConfig::parse(args)?; - match game.as_str() { - "tictactoe" => run_tictactoe(config), - "blackjack" => run_blackjack(config), - #[cfg(feature = "physics")] - "platformer" => run_platformer(config), - _ => Err(format!("unknown game: {game}")), - } + run_descriptor(&game, config, RunMode::Play) + } + "replay" => { + let game = args + .next() + .ok_or_else(|| "missing game name for replay".to_string())?; + let config = CliConfig::parse(args)?; + run_descriptor(&game, config, RunMode::Replay) } "validate" => run_validation_smoke(), _ => Err(format!("unknown command: {command}")), } } +fn run_descriptor(game_name: &str, config: CliConfig, mode: RunMode) -> Result<(), String> { + let descriptor = find_game(game_name).ok_or_else(|| format!("unknown game: {game_name}"))?; + match descriptor.kind { + GameKind::TicTacToe => run_tictactoe(config, mode), + GameKind::Blackjack => run_blackjack(config, mode), + #[cfg(feature = "physics")] + GameKind::Platformer => run_platformer(config, mode), + } +} + +/// Parsed command-line execution configuration. #[derive(Clone, Debug)] -struct CliConfig { +pub struct CliConfig { seed: u64, max_steps: usize, policy: String, @@ -143,118 +202,123 @@ impl CliConfig { } } -fn run_tictactoe(config: CliConfig) -> Result<(), String> { +fn run_headless_game( + game: G, + config: &CliConfig, + mode: RunMode, + mut human: H, + parse_script: fn(&str) -> Result, String>, + game_name: &'static str, +) -> Result<(), String> +where + G: Game + Observe + Copy, + G::Obs: Debug, + H: Policy, +{ + let mut session = Session::new(game, config.seed); + let mut random = RandomPolicy; + let mut first = FirstLegalPolicy; + let trace_hash = match resolve_policy_choice(mode, &config.policy, parse_script, game_name)? { + PolicyChoice::Human => run_with_policy(&mut session, config.max_steps, &mut human), + PolicyChoice::Random => run_with_policy(&mut session, config.max_steps, &mut random), + PolicyChoice::First => run_with_policy(&mut session, config.max_steps, &mut first), + PolicyChoice::Scripted(script) => { + let mut scripted = ScriptedPolicy::new_strict(script); + run_with_policy(&mut session, config.max_steps, &mut scripted) + } + }; + + println!("trace hash: {trace_hash:016x}"); + Ok(()) +} + +fn run_tictactoe(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("tictactoe does not support --render-physics".to_string()); } #[cfg(feature = "render")] if config.render { - return run_tictactoe_render(config); + return run_tictactoe_render(config, mode); } if config.render { return Err("the crate was built without the render feature".to_string()); } - let game = TicTacToe; - let mut session = Session::new(game, config.seed); - let mut human = HumanTicTacToe; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_tictactoe_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported tictactoe policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + TicTacToe, + &config, + mode, + HumanTicTacToe, + parse_tictactoe_script, + "tictactoe", + ) } -fn run_blackjack(config: CliConfig) -> Result<(), String> { +fn run_blackjack(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("blackjack does not support --render-physics".to_string()); } #[cfg(feature = "render")] if config.render { - return run_blackjack_render(config); + return run_blackjack_render(config, mode); } if config.render { return Err("the crate was built without the render feature".to_string()); } - let game = Blackjack; - let mut session = Session::new(game, config.seed); - let mut human = HumanBlackjack; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_blackjack_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported blackjack policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + Blackjack, + &config, + mode, + HumanBlackjack, + parse_blackjack_script, + "blackjack", + ) } #[cfg(feature = "physics")] -fn run_platformer(config: CliConfig) -> Result<(), String> { +fn run_platformer(config: CliConfig, mode: RunMode) -> Result<(), String> { #[cfg(feature = "render")] if config.render || config.render_physics { - return run_platformer_render(config); + return run_platformer_render(config, mode); } if config.render || config.render_physics { return Err("the crate was built without the render feature".to_string()); } - let game = Platformer::default(); - let mut session = Session::new(game, config.seed); - let mut human = HumanPlatformer; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_platformer_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported platformer policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + Platformer::default(), + &config, + mode, + HumanPlatformer, + parse_platformer_script, + "platformer", + ) } fn run_with_policy(session: &mut Session, max_steps: usize, policy: &mut P) -> u64 where - G: Game + CompactGame + Copy, + G: Game + Observe + Copy, + G::Obs: Debug, P: Policy, { let mut policies: [&mut dyn Policy; 1] = [policy]; while !session.is_terminal() && (session.current_tick() as usize) < max_steps { - let outcome = session.step_with_policies(&mut policies).clone(); - let spectator = session.spectator_observation(); + let reward = { + let outcome = session.step_with_policies(&mut policies); + outcome.reward_for(0) + }; + let observation = session.game().observe(session.state(), Observer::Player(0)); let mut compact = G::WordBuf::default(); - session - .game() - .encode_spectator_observation(&spectator, &mut compact); + session.game().encode_observation(&observation, &mut compact); println!( "tick={} reward={} terminal={} compact={:?}", session.current_tick(), - outcome.reward_for(0), + reward, session.is_terminal(), compact.as_slice(), ); - println!("{spectator:#?}"); + println!("{observation:#?}"); } stable_hash(session.trace()) } @@ -304,29 +368,26 @@ fn build_render_config(config: &CliConfig, mode: RenderMode) -> RenderConfig { } #[cfg(feature = "render")] -fn run_tictactoe_render(config: CliConfig) -> Result<(), String> { - use gameengine::render::builtin::TicTacToePresenter; +fn run_tictactoe_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + use crate::render::builtin::TicTacToePresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match config.policy.as_str() { - "human" => RendererApp::new( + match resolve_policy_choice(mode, &config.policy, parse_tictactoe_script, "tictactoe")? { + PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(TicTacToe, config.seed)), TicTacToePresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, - PassivePolicyDriver::new( - InteractiveSession::new(TicTacToe, config.seed), - RandomPolicy, - ), + PassivePolicyDriver::new(InteractiveSession::new(TicTacToe, config.seed), RandomPolicy), TicTacToePresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(TicTacToe, config.seed), @@ -336,44 +397,40 @@ fn run_tictactoe_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( + PolicyChoice::Scripted(script) => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(TicTacToe, config.seed), - ScriptedPolicy::new(parse_tictactoe_script(&config.policy)), + ScriptedPolicy::new_strict(script), ), TicTacToePresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - other => Err(format!("unsupported tictactoe policy: {other}")), } } #[cfg(feature = "render")] -fn run_blackjack_render(config: CliConfig) -> Result<(), String> { - use gameengine::render::builtin::BlackjackPresenter; +fn run_blackjack_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + use crate::render::builtin::BlackjackPresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match config.policy.as_str() { - "human" => RendererApp::new( + match resolve_policy_choice(mode, &config.policy, parse_blackjack_script, "blackjack")? { + PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(Blackjack, config.seed)), BlackjackPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, - PassivePolicyDriver::new( - InteractiveSession::new(Blackjack, config.seed), - RandomPolicy, - ), + PassivePolicyDriver::new(InteractiveSession::new(Blackjack, config.seed), RandomPolicy), BlackjackPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(Blackjack, config.seed), @@ -383,50 +440,48 @@ fn run_blackjack_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( + PolicyChoice::Scripted(script) => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(Blackjack, config.seed), - ScriptedPolicy::new(parse_blackjack_script(&config.policy)), + ScriptedPolicy::new_strict(script), ), BlackjackPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - other => Err(format!("unsupported blackjack policy: {other}")), } } #[cfg(all(feature = "render", feature = "physics"))] -fn run_platformer_render(config: CliConfig) -> Result<(), String> { - let mode = if config.render_physics { +fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + let render_mode = if config.render_physics { RenderMode::OracleWorld } else { RenderMode::Observation }; - let render_config = build_render_config(&config, mode); + let render_config = build_render_config(&config, render_mode); let game = Platformer::default(); + let policy_choice = resolve_policy_choice(mode, &config.policy, parse_platformer_script, "platformer")?; + if config.render_physics { - match config.policy.as_str() { - "human" => RendererApp::new( + match policy_choice { + PolicyChoice::Human => RendererApp::new( render_config, - RealtimeDriver::new( - InteractiveSession::new(game, config.seed), - PlatformerAction::Stay, - ), + RealtimeDriver::new(InteractiveSession::new(game, config.seed), PlatformerAction::Stay), builtin::PlatformerPhysicsPresenter::new(game.config), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new(InteractiveSession::new(game, config.seed), RandomPolicy), builtin::PlatformerPhysicsPresenter::new(game.config), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), @@ -436,38 +491,34 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( + PolicyChoice::Scripted(script) => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), - ScriptedPolicy::new(parse_platformer_script(&config.policy)), + ScriptedPolicy::new_strict(script), ), builtin::PlatformerPhysicsPresenter::new(game.config), ) .run_native() .map_err(|error| error.to_string()), - other => Err(format!("unsupported platformer policy: {other}")), } } else { - match config.policy.as_str() { - "human" => RendererApp::new( + match policy_choice { + PolicyChoice::Human => RendererApp::new( render_config, - RealtimeDriver::new( - InteractiveSession::new(game, config.seed), - PlatformerAction::Stay, - ), + RealtimeDriver::new(InteractiveSession::new(game, config.seed), PlatformerAction::Stay), builtin::PlatformerPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new(InteractiveSession::new(game, config.seed), RandomPolicy), builtin::PlatformerPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), @@ -477,17 +528,16 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( + PolicyChoice::Scripted(script) => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), - ScriptedPolicy::new(parse_platformer_script(&config.policy)), + ScriptedPolicy::new_strict(script), ), builtin::PlatformerPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - other => Err(format!("unsupported platformer policy: {other}")), } } } @@ -495,11 +545,13 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { fn print_usage() { println!("usage:"); println!(" gameengine list"); - println!( - " gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]" - ); + println!(" gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]"); println!(" gameengine replay [--seed N] [--max-steps N] [--policy script:...]"); println!(" gameengine validate"); + println!("available games:"); + for descriptor in all_games() { + println!(" - {}", descriptor.name); + } println!("optional rendering flags:"); println!(" --render"); println!(" --render-physics"); @@ -516,11 +568,11 @@ fn prompt(message: &str) -> io::Result { Ok(input) } -fn parse_tictactoe_script(spec: &str) -> Vec { +fn parse_tictactoe_script(spec: &str) -> Result, String> { parse_script(spec, |token| token.parse::().ok().map(TicTacToeAction)) } -fn parse_blackjack_script(spec: &str) -> Vec { +fn parse_blackjack_script(spec: &str) -> Result, String> { parse_script(spec, |token| match token.to_ascii_lowercase().as_str() { "hit" | "h" => Some(BlackjackAction::Hit), "stand" | "s" => Some(BlackjackAction::Stand), @@ -529,7 +581,7 @@ fn parse_blackjack_script(spec: &str) -> Vec { } #[cfg(feature = "physics")] -fn parse_platformer_script(spec: &str) -> Vec { +fn parse_platformer_script(spec: &str) -> Result, String> { parse_script(spec, |token| match token.to_ascii_lowercase().as_str() { "stay" | "s" => Some(PlatformerAction::Stay), "left" | "l" => Some(PlatformerAction::Left), @@ -539,17 +591,25 @@ fn parse_platformer_script(spec: &str) -> Vec { }) } -fn parse_script(spec: &str, parser: F) -> Vec +fn parse_script(spec: &str, parser: F) -> Result, String> where F: Fn(&str) -> Option, { let Some(script) = spec.strip_prefix("script:") else { - return Vec::new(); + return Ok(Vec::new()); }; - script - .split(',') - .filter_map(|token| parser(token.trim())) - .collect() + + let mut actions = Vec::new(); + for (index, token) in script.split(',').enumerate() { + let trimmed = token.trim(); + if trimmed.is_empty() { + return Err(format!("empty action token at position {index}")); + } + let action = parser(trimmed) + .ok_or_else(|| format!("invalid action token at position {index}: {trimmed}"))?; + actions.push(action); + } + Ok(actions) } struct HumanTicTacToe; @@ -562,7 +622,7 @@ impl Policy for HumanTicTacToe { _player: usize, _observation: &::PlayerObservation, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = prompt("choose move [0-8]: ").expect("stdin prompt failed"); @@ -587,7 +647,7 @@ impl Policy for HumanBlackjack { _player: usize, _observation: &::PlayerObservation, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = prompt("choose action [hit/stand]: ").expect("stdin prompt failed"); @@ -619,7 +679,7 @@ impl Policy for HumanPlatformer { _player: usize, _observation: &::PlayerObservation, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = diff --git a/src/compact.rs b/src/compact.rs index addce2f..0c861c7 100644 --- a/src/compact.rs +++ b/src/compact.rs @@ -1,19 +1,56 @@ -use crate::buffer::Buffer; -use crate::game::Game; -use crate::types::{PlayerId, Reward}; +//! Compact encoding specifications and validation helpers. +use crate::types::Reward; + +/// Structured compact codec errors. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum CompactError { + /// Reward was outside declared compact range. + RewardOutOfRange { + /// Input reward value. + reward: Reward, + /// Minimum allowed reward. + min_reward: Reward, + /// Maximum allowed reward. + max_reward: Reward, + }, + /// Encoded reward decoded outside declared compact range. + EncodedRewardOutOfRange { + /// Encoded compact reward value. + encoded: u64, + /// Minimum allowed reward. + min_reward: Reward, + /// Maximum allowed reward. + max_reward: Reward, + }, + /// Encoded action had no valid decoding. + InvalidActionEncoding { + /// Encoded action value. + encoded: u64, + }, +} + +/// Compact schema descriptor for action/observation/reward encoding. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct CompactSpec { + /// Number of legal compact action values. pub action_count: u64, + /// Bit width of one observation word. pub observation_bits: u8, + /// Number of observation words emitted per observation. pub observation_stream_len: usize, + /// Bit width of encoded reward. pub reward_bits: u8, + /// Minimum reward value. pub min_reward: Reward, + /// Maximum reward value. pub max_reward: Reward, + /// Signed offset used for reward encoding. pub reward_offset: Reward, } impl CompactSpec { + /// Maximum representable value for one observation word. pub fn max_observation_value(&self) -> u64 { if self.observation_bits == 0 { 0 @@ -24,61 +61,121 @@ impl CompactSpec { } } + /// Encode reward and panic on out-of-range input. pub fn encode_reward(&self, reward: Reward) -> u64 { - debug_assert!(reward >= self.min_reward); - debug_assert!(reward <= self.max_reward); - (reward + self.reward_offset) as u64 + self.try_encode_reward(reward) + .expect("reward out of compact range") } + /// Decode reward and panic on out-of-range encoded input. pub fn decode_reward(&self, encoded: u64) -> Reward { - (encoded as Reward) - self.reward_offset + self.try_decode_reward(encoded) + .expect("encoded reward out of compact range") + } + + /// Checked reward encoder. + pub fn try_encode_reward(&self, reward: Reward) -> Result { + if reward < self.min_reward || reward > self.max_reward { + return Err(CompactError::RewardOutOfRange { + reward, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + let encoded = i128::from(reward) + i128::from(self.reward_offset); + if !(0..=i128::from(u64::MAX)).contains(&encoded) { + return Err(CompactError::RewardOutOfRange { + reward, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + Ok(encoded as u64) } + /// Checked reward decoder. + pub fn try_decode_reward(&self, encoded: u64) -> Result { + let decoded = i128::from(encoded) - i128::from(self.reward_offset); + if decoded < i128::from(self.min_reward) || decoded > i128::from(self.max_reward) { + return Err(CompactError::EncodedRewardOutOfRange { + encoded, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + if decoded < i128::from(Reward::MIN) || decoded > i128::from(Reward::MAX) { + return Err(CompactError::EncodedRewardOutOfRange { + encoded, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + Ok(decoded as Reward) + } + + /// Validate internal reward-range consistency. pub fn reward_range_is_sound(&self) -> bool { self.min_reward <= self.max_reward + && self.try_encode_reward(self.min_reward).is_ok() + && self.try_encode_reward(self.max_reward).is_ok() && self.encode_reward(self.min_reward) <= self.encode_reward(self.max_reward) } } -pub trait CompactGame: Game { - fn compact_spec(&self) -> CompactSpec; - fn encode_action(&self, action: &Self::Action) -> u64; - fn decode_action(&self, encoded: u64) -> Option; - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ); - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - let _ = observation; - out.clear(); +/// Encode one finite enum action using an explicit canonical action table. +pub fn encode_enum_action(action: T, action_table: &[T]) -> u64 +where + T: Copy + Eq, +{ + let mut index = 0usize; + while index < action_table.len() { + if action_table[index] == action { + return index as u64; + } + index += 1; } + panic!("action missing from compact action table"); +} - fn encode_player_view(&self, state: &Self::State, player: PlayerId, out: &mut Self::WordBuf) { - let observation = self.observe_player(state, player); - self.encode_player_observation(&observation, out); +/// Decode one finite enum action using an explicit canonical action table. +pub fn decode_enum_action(encoded: u64, action_table: &[T]) -> Option +where + T: Copy, +{ + action_table.get(encoded as usize).copied() +} + +#[cfg(test)] +mod tests { + use super::CompactSpec; + + #[test] + fn try_decode_reward_rejects_large_values_without_overflow() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 2, + min_reward: -1, + max_reward: 1, + reward_offset: 1, + }; + assert!(spec.try_decode_reward(u64::MAX).is_err()); } - fn compact_invariant(&self, words: &Self::WordBuf) -> bool { - let spec = self.compact_spec(); - if words.len() != spec.observation_stream_len { - return false; - } - let max_value = spec.max_observation_value(); - let slice = words.as_slice(); - let mut index = 0usize; - while index < slice.len() { - if slice[index] > max_value { - return false; - } - index += 1; - } - true + #[test] + fn try_encode_reward_handles_negative_ranges() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 3, + min_reward: -3, + max_reward: 2, + reward_offset: 3, + }; + assert_eq!(spec.try_encode_reward(-3).unwrap(), 0); + assert_eq!(spec.try_encode_reward(2).unwrap(), 5); } } diff --git a/src/core/cards.rs b/src/core/cards.rs new file mode 100644 index 0000000..d35c731 --- /dev/null +++ b/src/core/cards.rs @@ -0,0 +1,84 @@ +//! Shared card/deck helpers for card-based builtin environments. + +/// A compact summary of blackjack hand value semantics. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct BlackjackValue { + /// Total score after soft-ace normalization. + pub total: u8, + /// Whether the hand still contains at least one soft ace. + pub soft: bool, + /// Whether the hand score exceeds 21. + pub busted: bool, +} + +/// Evaluate a blackjack hand from rank values in `[1, 13]`. +pub fn evaluate_blackjack_hand(cards: &[u8; MAX_CARDS], len: u8) -> BlackjackValue { + let mut total = 0u8; + let mut aces = 0u8; + let limit = len as usize; + let mut index = 0usize; + while index < MAX_CARDS && index < limit { + let card = cards[index]; + match card { + 1 => { + total = total.saturating_add(11); + aces += 1; + } + 11..=13 => total = total.saturating_add(10), + value => total = total.saturating_add(value), + } + index += 1; + } + for _ in 0..MAX_CARDS { + if total <= 21 || aces == 0 { + break; + } + total -= 10; + aces -= 1; + } + BlackjackValue { + total, + soft: aces > 0, + busted: total > 21, + } +} + +/// Fill a 52-card deck using ranks `[1, 13]` with four suits per rank. +pub fn fill_standard_deck_52(deck: &mut [u8; 52]) { + let mut index = 0usize; + for _ in 0..4 { + for rank in 1..=13 { + deck[index] = rank; + index += 1; + } + } +} + +/// Returns true when `deck` is a full 52-card rank multiset with four of each rank 1..=13. +pub fn is_standard_deck_52_permutation(deck: &[u8; 52]) -> bool { + let mut counts = [0u8; 14]; + for card in deck { + if !(1..=13).contains(card) { + return false; + } + counts[*card as usize] += 1; + } + for count in counts.iter().skip(1) { + if *count != 4 { + return false; + } + } + true +} + +/// Pack cards as 4-bit nibbles into a single `u64`. +pub fn pack_cards_nibbles(cards: &[u8; MAX_CARDS], len: u8) -> u64 { + let mut packed = 0u64; + let limit = len as usize; + let mut index = 0usize; + while index < MAX_CARDS && index < limit { + packed |= u64::from(cards[index]) << (index * 4); + index += 1; + } + packed +} diff --git a/src/core/env.rs b/src/core/env.rs new file mode 100644 index 0000000..1f698c3 --- /dev/null +++ b/src/core/env.rs @@ -0,0 +1,310 @@ +//! Compact environment wrapper for infotheory-compatible stepping. + +use core::fmt; + +use crate::buffer::{Buffer, FixedVec}; +use crate::core::observe::{Observe, Observer}; +use crate::session::{HistoryStore, SessionKernel}; +use crate::types::{PlayerAction, PlayerId, Reward, Seed}; + +/// Compact observation packet represented as fixed-capacity machine words. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] +pub struct BitPacket { + words: FixedVec, +} + +impl BitPacket { + /// Returns the currently populated word slice. + pub fn words(&self) -> &[u64] { + self.words.as_slice() + } + + /// Clears all packet words. + pub fn clear(&mut self) { + self.words.clear(); + } + + fn push_word(&mut self, word: u64) { + self.words + .push(word) + .expect("bit packet capacity exceeded"); + } +} + +/// Reward emitted by the environment in raw and compact-encoded form. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct CompactReward { + /// Raw reward value from game semantics. + pub raw: Reward, + /// Compactly encoded reward value according to `CompactSpec`. + pub encoded: u64, +} + +/// One environment step result with compact observation and reward. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct EnvStep { + /// Encoded observation packet after the step. + pub observation_bits: BitPacket, + /// Raw and compact reward representation. + pub reward: CompactReward, + /// True if the episode has reached terminal state. + pub terminated: bool, + /// True if the episode was truncated externally. + pub truncated: bool, +} + +/// Errors produced by compact environment reset/step operations. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum EnvError { + /// Action bit pattern does not decode into a legal action value. + InvalidActionEncoding { + /// Raw encoded action word. + encoded: u64, + }, + /// Observation encoding exceeded configured packet capacity. + ObservationOverflow { + /// Number of words requested by the game encoder. + actual_words: usize, + /// Maximum words accepted by this environment wrapper. + max_words: usize, + }, + /// Reward cannot be represented by the configured compact reward range. + RewardOutOfRange { + /// Raw out-of-range reward. + reward: Reward, + /// Minimum representable reward. + min: Reward, + /// Maximum representable reward. + max: Reward, + }, + /// Selected agent player id is outside game player range. + InvalidAgentPlayer { + /// Requested player id. + player: PlayerId, + /// Number of players exposed by the game. + player_count: usize, + }, +} + +impl fmt::Display for EnvError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidActionEncoding { encoded } => { + write!(f, "invalid compact action encoding: {encoded}") + } + Self::ObservationOverflow { + actual_words, + max_words, + } => { + write!( + f, + "observation packet requires {actual_words} words but maximum is {max_words}" + ) + } + Self::RewardOutOfRange { reward, min, max } => { + write!( + f, + "reward {reward} is outside compact spec range [{min}, {max}]" + ) + } + Self::InvalidAgentPlayer { + player, + player_count, + } => write!( + f, + "agent player {player} is outside player range 0..{player_count}" + ), + } + } +} + +impl std::error::Error for EnvError {} + +/// Minimal infotheory-compatible compact environment interface. +pub trait InfotheoryEnvironment { + /// Resets environment state and returns initial compact observation. + fn reset_seed(&mut self, seed: Seed) -> Result, EnvError>; + + /// Steps environment using compact action bits. + fn step_bits(&mut self, action_bits: u64) -> Result, EnvError>; +} + +/// Generic environment adapter over `SessionKernel` and compact codecs. +#[derive(Clone, Debug)] +pub struct Environment +where + G: Observe, + H: HistoryStore, +{ + session: SessionKernel, + observer: Observer, + agent_player: PlayerId, +} + +/// Default environment alias with fixed history and packet capacity. +pub type DefaultEnvironment = + Environment, MAX_WORDS>; + +impl Environment +where + G: Observe, + H: HistoryStore, +{ + /// Creates a new compact environment. + pub fn new(game: G, seed: Seed, observer: Observer) -> Self { + let agent_player = match observer { + Observer::Player(player) => player, + Observer::Spectator => 0, + }; + Self { + session: SessionKernel::new(game, seed), + observer, + agent_player, + } + } + + /// Returns immutable access to the underlying session kernel. + pub fn session(&self) -> &SessionKernel { + &self.session + } + + /// Returns mutable access to the underlying session kernel. + pub fn session_mut(&mut self) -> &mut SessionKernel { + &mut self.session + } + + /// Returns current observer viewpoint. + pub fn observer(&self) -> Observer { + self.observer + } + + /// Sets observer viewpoint used for future observation encodes. + pub fn set_observer(&mut self, observer: Observer) { + self.observer = observer; + if let Observer::Player(player) = observer { + self.agent_player = player; + } + } + + /// Returns the player id controlled by compact `step()` actions. + pub fn agent_player(&self) -> PlayerId { + self.agent_player + } + + /// Sets the player id controlled by compact `step()` actions. + pub fn set_agent_player(&mut self, player: PlayerId) { + self.agent_player = player; + } + + /// Resets session state and returns initial compact observation. + pub fn reset(&mut self, seed: Seed) -> Result, EnvError> { + self.session.reset(seed); + self.encode_current_observation() + } + + /// Steps the environment from an encoded action value. + pub fn step(&mut self, action_bits: u64) -> Result, EnvError> { + let Some(action) = self.session.game().decode_action(action_bits) else { + return Err(EnvError::InvalidActionEncoding { + encoded: action_bits, + }); + }; + + let player_count = self.session.game().player_count(); + if self.agent_player >= player_count { + return Err(EnvError::InvalidAgentPlayer { + player: self.agent_player, + player_count, + }); + } + + let mut actions = G::JointActionBuf::default(); + actions + .push(PlayerAction { + player: self.agent_player, + action, + }) + .expect("joint action buffer capacity exceeded"); + + let (reward, terminated) = { + let outcome = self.session.step_with_joint_actions(&actions); + (outcome.reward_for(0), outcome.is_terminal()) + }; + + let spec = self.session.game().compact_spec(); + let encoded_reward = spec.try_encode_reward(reward).map_err(|_| { + EnvError::RewardOutOfRange { + reward, + min: spec.min_reward, + max: spec.max_reward, + } + })?; + + Ok(EnvStep { + observation_bits: self.encode_current_observation()?, + reward: CompactReward { + raw: reward, + encoded: encoded_reward, + }, + terminated, + truncated: false, + }) + } + + /// Encodes current observation into a bounded compact packet. + pub fn encode_current_observation(&self) -> Result, EnvError> { + let mut encoded = G::WordBuf::default(); + self.session.game().observe_and_encode( + self.session.state(), + self.observer, + &mut encoded, + ); + if encoded.len() > MAX_WORDS { + return Err(EnvError::ObservationOverflow { + actual_words: encoded.len(), + max_words: MAX_WORDS, + }); + } + + let mut packet = BitPacket::default(); + for &word in encoded.as_slice() { + packet.push_word(word); + } + Ok(packet) + } +} + +impl InfotheoryEnvironment + for Environment +where + G: Observe, + H: HistoryStore, +{ + /// Resets environment and emits initial packet. + fn reset_seed(&mut self, seed: Seed) -> Result, EnvError> { + self.reset(seed) + } + + /// Steps environment with compact action bits. + fn step_bits(&mut self, action_bits: u64) -> Result, EnvError> { + self.step(action_bits) + } +} + +#[cfg(all(test, feature = "builtin"))] +mod tests { + use super::{DefaultEnvironment, Observer}; + use crate::builtin::{TicTacToe, TicTacToeAction}; + use crate::game::Game; + + #[test] + fn env_wrapper_emits_compact_observations() { + let mut env = DefaultEnvironment::::new(TicTacToe, 7, Observer::Player(0)); + let initial = env.encode_current_observation().unwrap(); + assert_eq!(initial.words(), &[0]); + + let action = TicTacToe.encode_action(&TicTacToeAction(0)); + let step = env.step(action).unwrap(); + assert_eq!(step.observation_bits.words().len(), 1); + } +} diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..4eedd06 --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,20 @@ +//! Core engine helpers shared across environments and adapters. + +pub mod cards; +pub mod env; +pub mod observe; +pub mod single_player; +pub mod stepper; + +pub use crate::buffer::{BitWords, Buffer, CapacityError, FixedVec}; +pub use crate::compact::CompactSpec; +pub use crate::game::Game; +pub use crate::rng::{DeterministicRng, SplitMix64}; +pub use crate::session::{ + DynamicHistory, FixedHistory, HistorySnapshot, HistoryStore, InteractiveSession, Session, + SessionKernel, +}; +pub use crate::types::{ + DynamicReplayTrace, PlayerAction, PlayerId, PlayerReward, ReplayStep, ReplayTrace, Reward, + Seed, StepOutcome, Termination, Tick, stable_hash, +}; diff --git a/src/core/observe.rs b/src/core/observe.rs new file mode 100644 index 0000000..ecdca4f --- /dev/null +++ b/src/core/observe.rs @@ -0,0 +1,51 @@ +//! Observation adapter trait and viewpoint selection types. + +use core::fmt::Debug; + +use crate::game::Game; +use crate::types::PlayerId; + +/// Viewpoint used when requesting an observation. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Observer { + /// Player-local, potentially partial-information observation. + Player(PlayerId), + /// Full spectator observation. + Spectator, +} + +/// Adapter trait for producing and encoding generic observations. +pub trait Observe: Game { + /// Observation type emitted by this adapter. + type Obs: Clone + Debug + Default + Eq + PartialEq; + + /// Builds an observation for the selected viewpoint. + fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs; + + /// Encodes an observation into the compact word stream. + fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf); + + /// Convenience helper to observe and encode in one call. + fn observe_and_encode(&self, state: &Self::State, who: Observer, out: &mut Self::WordBuf) { + let observation = self.observe(state, who); + self.encode_observation(&observation, out); + } +} + +impl Observe for G +where + G: Game::PlayerObservation>, +{ + type Obs = G::PlayerObservation; + + fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs { + match who { + Observer::Player(player) => self.observe_player(state, player), + Observer::Spectator => self.observe_spectator(state), + } + } + + fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } +} diff --git a/src/core/single_player.rs b/src/core/single_player.rs new file mode 100644 index 0000000..d2f1f6b --- /dev/null +++ b/src/core/single_player.rs @@ -0,0 +1,45 @@ +//! Reusable helpers for deterministic single-player environments. + +use crate::buffer::Buffer; +use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward}; + +/// Canonical acting player id used by single-player environments. +pub const SOLO_PLAYER: PlayerId = 0; + +/// Returns true when `player` can act in a non-terminal single-player state. +pub const fn can_act(player: PlayerId, terminal: bool) -> bool { + player == SOLO_PLAYER && !terminal +} + +/// Clears and emits the single acting player when the state is ongoing. +pub fn write_players_to_act(out: &mut B, terminal: bool) +where + B: Buffer, +{ + out.clear(); + if !terminal { + out.push(SOLO_PLAYER).unwrap(); + } +} + +/// Returns the first action assigned to the single acting player. +pub fn first_action(joint_actions: &[PlayerAction]) -> Option { + for candidate in joint_actions { + if candidate.player == SOLO_PLAYER { + return Some(candidate.action); + } + } + None +} + +/// Appends one reward entry for the single acting player. +pub fn push_reward(out: &mut B, reward: Reward) +where + B: Buffer, +{ + out.push(PlayerReward { + player: SOLO_PLAYER, + reward, + }) + .unwrap(); +} diff --git a/src/core/stepper.rs b/src/core/stepper.rs new file mode 100644 index 0000000..a280fef --- /dev/null +++ b/src/core/stepper.rs @@ -0,0 +1,39 @@ +//! Session stepping adapters for checked and unchecked execution paths. + +use crate::game::Game; +use crate::session::{HistoryStore, SessionKernel}; +use crate::types::StepOutcome; + +/// Minimal wrapper that executes unchecked kernel steps. +pub struct KernelStepper<'a, G: Game, H: HistoryStore> { + session: &'a mut SessionKernel, +} + +impl<'a, G: Game, H: HistoryStore> KernelStepper<'a, G, H> { + /// Creates an unchecked stepper over a session kernel. + pub fn new(session: &'a mut SessionKernel) -> Self { + Self { session } + } + + /// Applies one joint-action step. + pub fn step(&mut self, actions: &G::JointActionBuf) -> &StepOutcome { + self.session.step_with_joint_actions(actions) + } +} + +/// Wrapper that executes checked kernel steps with contract assertions. +pub struct CheckedStepper<'a, G: Game, H: HistoryStore> { + session: &'a mut SessionKernel, +} + +impl<'a, G: Game, H: HistoryStore> CheckedStepper<'a, G, H> { + /// Creates a checked stepper over a session kernel. + pub fn new(session: &'a mut SessionKernel) -> Self { + Self { session } + } + + /// Applies one checked joint-action step. + pub fn step(&mut self, actions: &G::JointActionBuf) -> &StepOutcome { + self.session.step_with_joint_actions_checked(actions) + } +} diff --git a/src/game.rs b/src/game.rs index 7f8e1c7..6069e45 100644 --- a/src/game.rs +++ b/src/game.rs @@ -1,18 +1,33 @@ +//! Core game trait defining state transitions, observations, and compact codecs. + use core::fmt::Debug; use core::hash::Hash; use crate::buffer::Buffer; +use crate::compact::{CompactError, CompactSpec}; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome}; +/// Deterministic game contract used by the session kernel. +/// +/// Implementations provide pure state transition logic plus compact codec hooks +/// for actions and observations. pub trait Game { + /// Concrete game state. type State: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Atomic player action type. type Action: Clone + Copy + Debug + Default + Eq + Hash + PartialEq; + /// Per-player observation type. type PlayerObservation: Clone + Debug + Default + Eq + PartialEq; + /// Spectator observation type. type SpectatorObservation: Clone + Debug + Default + Eq + PartialEq; + /// Render/debug world view type. type WorldView: Clone + Debug + Default + Eq + PartialEq; + /// Buffer type for active-player lists. type PlayerBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for legal actions. type ActionBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for joint actions. type JointActionBuf: Buffer> + Clone + Debug @@ -20,18 +35,30 @@ pub trait Game { + Eq + Hash + PartialEq; + /// Buffer type for per-player rewards. type RewardBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for compact observation words. type WordBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Stable machine-readable game name. fn name(&self) -> &'static str; + /// Total number of players in the game. fn player_count(&self) -> usize; + /// Initialize the deterministic state from a seed. fn init(&self, seed: Seed) -> Self::State; + /// Whether the state is terminal. fn is_terminal(&self, state: &Self::State) -> bool; + /// Emit active players for the current tick. fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf); + /// Emit legal actions for a player in the current state. fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf); + /// Build a player-scoped observation. fn observe_player(&self, state: &Self::State, player: PlayerId) -> Self::PlayerObservation; + /// Build a spectator observation. fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation; + /// Build a world/debug view consumed by render and tooling. fn world_view(&self, state: &Self::State) -> Self::WorldView; + /// Apply one transition in-place. fn step_in_place( &self, state: &mut Self::State, @@ -40,14 +67,90 @@ pub trait Game { out: &mut StepOutcome, ); + /// Compact codec descriptor for actions, observations and rewards. + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 0, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + /// Encode an action into its compact integer representation. + fn encode_action(&self, _action: &Self::Action) -> u64 { + 0 + } + + /// Decode a compact action value. + fn decode_action(&self, _encoded: u64) -> Option { + None + } + + /// Checked action decoding helper that yields a structured error. + fn decode_action_checked(&self, encoded: u64) -> Result { + self.decode_action(encoded) + .ok_or(CompactError::InvalidActionEncoding { encoded }) + } + + /// Encode a player observation into compact words. + fn encode_player_observation( + &self, + observation: &Self::PlayerObservation, + out: &mut Self::WordBuf, + ) { + let _ = observation; + out.clear(); + } + + /// Encode a spectator observation into compact words. + fn encode_spectator_observation( + &self, + observation: &Self::SpectatorObservation, + out: &mut Self::WordBuf, + ) { + let _ = observation; + out.clear(); + } + + /// Convenience helper that observes a player and encodes the result. + fn encode_player_view(&self, state: &Self::State, player: PlayerId, out: &mut Self::WordBuf) { + let observation = self.observe_player(state, player); + self.encode_player_observation(&observation, out); + } + + /// Validate compact observation shape against the declared compact spec. + fn compact_invariant(&self, words: &Self::WordBuf) -> bool { + let spec = self.compact_spec(); + if words.len() != spec.observation_stream_len { + return false; + } + let max_value = spec.max_observation_value(); + let slice = words.as_slice(); + let mut index = 0usize; + while index < slice.len() { + if slice[index] > max_value { + return false; + } + index += 1; + } + true + } + + /// State invariant used by checked stepping and proof helpers. fn state_invariant(&self, _state: &Self::State) -> bool { true } + /// Action invariant used by checked stepping and proof helpers. fn action_invariant(&self, _action: &Self::Action) -> bool { true } + /// Invariant for player observations. fn player_observation_invariant( &self, _state: &Self::State, @@ -57,6 +160,7 @@ pub trait Game { true } + /// Invariant for spectator observations. fn spectator_observation_invariant( &self, _state: &Self::State, @@ -65,10 +169,12 @@ pub trait Game { true } + /// Invariant for world/debug views. fn world_view_invariant(&self, _state: &Self::State, _world: &Self::WorldView) -> bool { true } + /// Transition postcondition checked in instrumented stepping. fn transition_postcondition( &self, _pre: &Self::State, @@ -79,10 +185,12 @@ pub trait Game { true } + /// Maximum supported player count from buffer capacity. fn max_players(&self) -> usize { ::CAPACITY } + /// Convenience legality query backed by `legal_actions`. fn is_action_legal( &self, state: &Self::State, diff --git a/src/games/mod.rs b/src/games/mod.rs deleted file mode 100644 index d92d2c0..0000000 --- a/src/games/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -pub mod blackjack; -#[cfg(feature = "physics")] -pub mod platformer; -pub mod tictactoe; - -pub use blackjack::{ - Blackjack, BlackjackAction, BlackjackObservation, BlackjackPhase, - BlackjackSpectatorObservation, BlackjackWorldView, -}; -#[cfg(feature = "physics")] -pub use platformer::{ - BerryView, Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation, - PlatformerWorldView, -}; -pub use tictactoe::{ - TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeObservation, TicTacToeWorldView, -}; diff --git a/src/lib.rs b/src/lib.rs index 3ef5e68..0fa0265 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,16 @@ +//! Deterministic game engine core with compact codecs, verification hooks, and render adapters. + +pub mod core; +#[cfg(feature = "builtin")] +pub mod registry; +#[cfg(feature = "proof")] +pub mod proof; + pub mod buffer; pub mod compact; pub mod game; -#[cfg(feature = "builtin-games")] -pub mod games; +#[cfg(feature = "builtin")] +pub mod builtin; pub mod math; #[cfg(feature = "parallel")] pub mod parallel; @@ -15,9 +23,11 @@ pub mod rng; pub mod session; pub mod types; pub mod verification; +#[cfg(feature = "cli")] +pub mod cli; pub use buffer::{BitWords, Buffer, CapacityError, FixedVec}; -pub use compact::{CompactGame, CompactSpec}; +pub use compact::CompactSpec; pub use game::Game; pub use policy::{FirstLegalPolicy, FnPolicy, Policy, RandomPolicy, ScriptedPolicy}; pub use rng::{DeterministicRng, SplitMix64}; diff --git a/src/math.rs b/src/math.rs index 61fcca5..977a713 100644 --- a/src/math.rs +++ b/src/math.rs @@ -1,13 +1,19 @@ +//! Deterministic math primitives used by simulation and rendering layers. + use std::cmp::Ordering; use std::ops::{Add, AddAssign, Div, Mul, Sub, SubAssign}; +/// 2D vector. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Vec2 { + /// X coordinate. pub x: T, + /// Y coordinate. pub y: T, } impl Vec2 { + /// Creates a 2D vector. pub const fn new(x: T, y: T) -> Self { Self { x, y } } @@ -55,14 +61,19 @@ where } } +/// 3D vector. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Vec3 { + /// X coordinate. pub x: T, + /// Y coordinate. pub y: T, + /// Z coordinate. pub z: T, } impl Vec3 { + /// Creates a 3D vector. pub const fn new(x: T, y: T, z: T) -> Self { Self { x, y, z } } @@ -90,9 +101,12 @@ where } } +/// Axis-aligned bounding box in 2D. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Aabb2 { + /// Minimum corner. pub min: Vec2, + /// Maximum corner. pub max: Vec2, } @@ -100,10 +114,12 @@ impl Aabb2 where T: Copy + Ord, { + /// Creates a 2D AABB. pub const fn new(min: Vec2, max: Vec2) -> Self { Self { min, max } } + /// Returns whether `point` is inside or on bounds. pub fn contains(&self, point: Vec2) -> bool { point.x >= self.min.x && point.x <= self.max.x @@ -111,6 +127,7 @@ where && point.y <= self.max.y } + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x @@ -119,9 +136,12 @@ where } } +/// Axis-aligned bounding box in 3D. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Aabb3 { + /// Minimum corner. pub min: Vec3, + /// Maximum corner. pub max: Vec3, } @@ -129,10 +149,12 @@ impl Aabb3 where T: Copy + Ord, { + /// Creates a 3D AABB. pub const fn new(min: Vec3, max: Vec3) -> Self { Self { min, max } } + /// Returns whether `point` is inside or on bounds. pub fn contains(&self, point: Vec3) -> bool { point.x >= self.min.x && point.x <= self.max.x @@ -142,6 +164,7 @@ where && point.z <= self.max.z } + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x @@ -152,30 +175,36 @@ where } } +/// Fixed-point numeric wrapper with `FRACTION_BITS` fractional bits. #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Fixed { raw: i64, } impl Fixed { + /// Creates a fixed-point value from raw representation. pub const fn from_raw(raw: i64) -> Self { Self { raw } } + /// Creates a fixed-point value from integer input. pub const fn from_int(value: i64) -> Self { Self { raw: value << FRACTION_BITS, } } + /// Returns raw fixed-point representation. pub const fn raw(self) -> i64 { self.raw } + /// Floors value toward negative infinity and returns integer part. pub const fn floor_to_int(self) -> i64 { self.raw >> FRACTION_BITS } + /// Converts to `f64`. pub fn to_f64(self) -> f64 { self.raw as f64 / ((1u64 << FRACTION_BITS) as f64) } @@ -215,59 +244,71 @@ impl Div for Fixed { } } +/// `f32` wrapper with deterministic bitwise equality/hash semantics. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct StrictF32 { bits: u32, } impl StrictF32 { + /// Creates from raw IEEE-754 bits. pub const fn from_bits(bits: u32) -> Self { Self { bits } } + /// Creates from floating value by preserving raw bits. pub fn new(value: f32) -> Self { Self { bits: value.to_bits(), } } + /// Returns raw IEEE-754 bits. pub const fn to_bits(self) -> u32 { self.bits } + /// Converts to `f32`. pub fn to_f32(self) -> f32 { f32::from_bits(self.bits) } } +/// `f64` wrapper with deterministic total ordering and bitwise equality. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct StrictF64 { bits: u64, } impl StrictF64 { + /// Creates from raw IEEE-754 bits. pub const fn from_bits(bits: u64) -> Self { Self { bits } } + /// Creates from floating value by preserving raw bits. pub fn new(value: f64) -> Self { Self { bits: value.to_bits(), } } + /// Returns raw IEEE-754 bits. pub const fn to_bits(self) -> u64 { self.bits } + /// Converts to `f64`. pub fn to_f64(self) -> f64 { f64::from_bits(self.bits) } + /// Returns whether value is finite. pub fn is_finite(self) -> bool { self.to_f64().is_finite() } + /// Clamps this value to `[min, max]`. pub fn clamp(self, min: Self, max: Self) -> Self { let value = self.to_f64().clamp(min.to_f64(), max.to_f64()); Self::new(value) diff --git a/src/parallel.rs b/src/parallel.rs index f3b34d0..f07e0ec 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -1,15 +1,19 @@ +//! Parallel deterministic replay helpers. + use rayon::prelude::*; use crate::game::Game; -use crate::session::Session; -use crate::types::{PlayerAction, ReplayTrace, Seed}; +use crate::session::InteractiveSession; +use crate::types::{DynamicReplayTrace, PlayerAction, Seed}; +/// Sequence of staged joint actions used for one replay execution. pub type JointActionTrace = Vec>>; +/// Replays many deterministic traces in parallel and returns resulting replay traces. pub fn replay_many( game: &G, traces: &[(Seed, JointActionTrace)], -) -> Vec> +) -> Vec> where G: Game + Copy + Send + Sync, G::Action: Send + Sync, @@ -19,7 +23,7 @@ where traces .par_iter() .map(|(seed, steps)| { - let mut session = Session::new(*game, *seed); + let mut session = InteractiveSession::new(*game, *seed); for step in steps { if session.is_terminal() { break; diff --git a/src/physics.rs b/src/physics.rs index e587d8b..1c69b38 100644 --- a/src/physics.rs +++ b/src/physics.rs @@ -1,25 +1,38 @@ +//! Fixed-capacity deterministic 2D AABB physics world primitives. + use crate::buffer::FixedVec; use crate::math::{Aabb2, StrictF64, Vec2}; use crate::types::Tick; +/// Physics body behavior mode. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum BodyKind { + /// Non-moving collidable body. #[default] Static, + /// Externally controlled moving body. Kinematic, + /// Contact-only body that does not block movement. Trigger, } +/// One body in the 2D physics world. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct PhysicsBody2d { + /// Stable body identifier. pub id: u16, + /// Body behavior kind. pub kind: BodyKind, + /// Body center position. pub position: Vec2, + /// Half extents of the AABB shape. pub half_extents: Vec2, + /// Whether the body participates in contacts. pub active: bool, } impl PhysicsBody2d { + /// Returns body axis-aligned bounding box. pub fn aabb(&self) -> Aabb2 { Aabb2::new( self.position - self.half_extents, @@ -27,6 +40,7 @@ impl PhysicsBody2d { ) } + /// Returns whether body geometry is finite and non-negative sized. pub fn invariant(&self) -> bool { self.position.x.is_finite() && self.position.y.is_finite() @@ -37,28 +51,42 @@ impl PhysicsBody2d { } } +/// Contact pair represented by sorted body ids. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Contact2d { + /// Lower body id. pub a: u16, + /// Higher body id. pub b: u16, } +/// Read-only physics world oracle view. pub trait PhysicsOracleView2d { + /// Returns world bounds. fn bounds(&self) -> Aabb2; + /// Returns current world tick. fn tick(&self) -> Tick; + /// Returns active body storage slice. fn bodies(&self) -> &[PhysicsBody2d]; + /// Returns cached contact pairs. fn contacts(&self) -> &[Contact2d]; } +/// Deterministic 2D AABB world with fixed-capacity storage. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct PhysicsWorld2d { + /// World bounds used for clamping bodies. pub bounds: Aabb2, + /// Bodies sorted by id. pub bodies: FixedVec, + /// Cached sorted contact pairs. pub contacts: FixedVec, + /// Simulation tick. pub tick: Tick, } impl PhysicsWorld2d { + /// Creates an empty world with specified bounds. pub fn new(bounds: Aabb2) -> Self { Self { bounds, @@ -68,6 +96,7 @@ impl PhysicsWorld2d bool { if !self.bounds.min.x.is_finite() || !self.bounds.min.y.is_finite() @@ -112,6 +141,7 @@ impl PhysicsWorld2d PhysicsWorld2d Option<&PhysicsBody2d> { let bodies = self.bodies.as_slice(); let mut index = 0usize; @@ -144,10 +175,12 @@ impl PhysicsWorld2d &PhysicsBody2d { self.body(id).expect("missing physics body") } + /// Returns mutable body by id. pub fn body_mut(&mut self, id: u16) -> Option<&mut PhysicsBody2d> { let bodies = self.bodies.as_mut_slice(); let mut index = 0usize; @@ -160,6 +193,7 @@ impl PhysicsWorld2d PhysicsWorld2d) { self.set_body_position_deferred(id, position); self.refresh_contacts(); @@ -183,6 +218,7 @@ impl PhysicsWorld2d) { self.translate_body_deferred(id, delta); self.refresh_contacts(); @@ -195,11 +231,13 @@ impl PhysicsWorld2d bool { let (left, right) = if a <= b { (a, b) } else { (b, a) }; let contacts = self.contacts.as_slice(); @@ -229,21 +267,109 @@ impl PhysicsWorld2d= body_min_x { + active[write] = active_index; + write += 1; } - if intersects(bodies[left].aabb(), bodies[right].aabb()) { + read += 1; + } + active_len = write; + + let mut active_index = 0usize; + while active_index < active_len { + let other_index = active[active_index]; + if intersects(aabbs[body_index], aabbs[other_index]) { + let (a, b) = if bodies[other_index].id <= bodies[body_index].id { + (bodies[other_index].id, bodies[body_index].id) + } else { + (bodies[body_index].id, bodies[other_index].id) + }; self.contacts - .push(Contact2d { - a: bodies[left].id, - b: bodies[right].id, - }) + .push(Contact2d { a, b }) .expect("physics contact capacity exceeded"); } + active_index += 1; } + + active[active_len] = body_index; + active_len += 1; + sorted_index += 1; } + + self.contacts.as_mut_slice().sort_by_key(|contact| (contact.a, contact.b)); } } @@ -267,6 +393,44 @@ impl PhysicsOracleView2d } } +/// Synchronize a contiguous trigger-id range to `active_mask` bits without refreshing contacts. +pub fn set_trigger_mask_deferred( + world: &mut PhysicsWorld2d, + first_trigger_id: u16, + trigger_count: usize, + active_mask: u64, +) { + let mut index = 0usize; + while index < trigger_count { + let active = (active_mask & (1u64 << index)) != 0; + world.set_body_active_deferred(first_trigger_id + index as u16, active); + index += 1; + } +} + +/// Collect active trigger bits contacted by `actor_id`, deactivating collected trigger bodies. +pub fn collect_actor_trigger_contacts( + world: &mut PhysicsWorld2d, + actor_id: u16, + first_trigger_id: u16, + trigger_count: usize, + remaining_mask: &mut u64, +) -> u8 { + let mut collected = 0u8; + let mut index = 0usize; + while index < trigger_count { + let bit = 1u64 << index; + let trigger_id = first_trigger_id + index as u16; + if (*remaining_mask & bit) != 0 && world.has_contact(actor_id, trigger_id) { + *remaining_mask &= !bit; + world.set_body_active(trigger_id, false); + collected += 1; + } + index += 1; + } + collected +} + fn intersects(left: Aabb2, right: Aabb2) -> bool { left.min.x <= right.max.x && left.max.x >= right.min.x diff --git a/src/policy.rs b/src/policy.rs index 80c66ba..b19d2ee 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -1,10 +1,14 @@ +//! Policy interfaces and builtin policy strategies. + use std::marker::PhantomData; use crate::game::Game; use crate::rng::DeterministicRng; use crate::types::PlayerId; +/// Policy interface for selecting actions for active players. pub trait Policy { + /// Chooses one legal action for `player`. fn choose_action( &mut self, game: &G, @@ -16,6 +20,7 @@ pub trait Policy { ) -> G::Action; } +/// Deterministic policy that always selects the first legal action. #[derive(Clone, Copy, Debug, Default)] pub struct FirstLegalPolicy; @@ -36,6 +41,7 @@ impl Policy for FirstLegalPolicy { } } +/// Uniform-random policy over legal actions. #[derive(Clone, Copy, Debug, Default)] pub struct RandomPolicy; @@ -54,17 +60,30 @@ impl Policy for RandomPolicy { } } +/// Deterministic scripted policy with fallback to first legal action. #[derive(Clone, Debug)] pub struct ScriptedPolicy { script: Vec, position: usize, + strict: bool, } impl ScriptedPolicy { + /// Creates a scripted policy from a full action script. pub fn new(script: Vec) -> Self { Self { script, position: 0, + strict: false, + } + } + + /// Creates a strict scripted policy that fails fast on illegal or exhausted scripts. + pub fn new_strict(script: Vec) -> Self { + Self { + script, + position: 0, + strict: true, } } } @@ -87,6 +106,17 @@ where if legal_actions.contains(action) { return *action; } + if self.strict { + panic!( + "strict scripted policy action at index {} is illegal for current state", + self.position - 1 + ); + } + } else if self.strict { + panic!( + "strict scripted policy exhausted at index {}", + self.position + ); } legal_actions .first() @@ -95,12 +125,14 @@ where } } +/// Policy adapter built from a closure. pub struct FnPolicy { f: F, _marker: PhantomData, } impl FnPolicy { + /// Creates a closure-backed policy. pub fn new(f: F) -> Self { Self { f, diff --git a/src/proof/mod.rs b/src/proof/mod.rs new file mode 100644 index 0000000..ee6aadb --- /dev/null +++ b/src/proof/mod.rs @@ -0,0 +1,26 @@ +//! Proof-facing assertions and proof-claim surface exported by the crate. + +use crate::buffer::Buffer; +use crate::game::Game; +use crate::types::Seed; + +/// Human-readable statement of the current proof obligations. +pub const PROOF_CLAIM: &str = include_str!("../../proofs/README.md"); + +pub use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +/// Runs the canonical generated-game proof surface checks. +pub fn assert_generated_game_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert_transition_contracts(game, state, actions, seed); + assert_observation_contracts(game, state); + if let Some(first) = actions.as_slice().first() { + assert_compact_roundtrip(game, &first.action); + } +} diff --git a/src/registry/mod.rs b/src/registry/mod.rs new file mode 100644 index 0000000..9240ca4 --- /dev/null +++ b/src/registry/mod.rs @@ -0,0 +1,138 @@ +//! Static registry describing builtin games and policy metadata. + +/// Statically known builtin game kind. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum GameKind { + /// Deterministic tic-tac-toe. + TicTacToe, + /// Deterministic blackjack. + Blackjack, + /// Deterministic physics-backed platformer. + #[cfg(feature = "physics")] + Platformer, +} + +/// Policy metadata surfaced by CLI and UI. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct PolicyDescriptor { + /// Stable policy identifier. + pub name: &'static str, + /// Human-facing policy description. + pub description: &'static str, +} + +/// Control prompt metadata for interactive play. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ControlMap { + /// Human input prompt shown by the CLI. + pub prompt: &'static str, +} + +/// Full static descriptor for one builtin game. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct GameDescriptor { + /// Internal game discriminator. + pub kind: GameKind, + /// Stable external game name. + pub name: &'static str, + /// Optional controls metadata for interactive frontends. + pub controls: Option<&'static ControlMap>, + /// True when the default renderer supports this game. + pub default_renderer: bool, + /// True when the physics renderer supports this game. + pub physics_renderer: bool, + /// Supported policy descriptors. + pub policies: &'static [PolicyDescriptor], +} + +const STANDARD_POLICIES: [PolicyDescriptor; 4] = [ + PolicyDescriptor { + name: "human", + description: "Interactive stdin policy", + }, + PolicyDescriptor { + name: "random", + description: "Uniform random legal actions", + }, + PolicyDescriptor { + name: "first", + description: "Always pick the first legal action", + }, + PolicyDescriptor { + name: "script:", + description: "Comma-separated deterministic action script", + }, +]; + +const TICTACTOE_CONTROLS: ControlMap = ControlMap { + prompt: "choose move [0-8]", +}; +const BLACKJACK_CONTROLS: ControlMap = ControlMap { + prompt: "choose action [hit/stand]", +}; +#[cfg(feature = "physics")] +const PLATFORMER_CONTROLS: ControlMap = ControlMap { + prompt: "choose action [stay/left/right/jump]", +}; + +/// Returns all builtin game descriptors enabled for the current feature set. +pub fn all_games() -> &'static [GameDescriptor] { + #[cfg(feature = "physics")] + { + static GAMES: [GameDescriptor; 3] = [ + GameDescriptor { + kind: GameKind::TicTacToe, + name: "tictactoe", + controls: Some(&TICTACTOE_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + kind: GameKind::Blackjack, + name: "blackjack", + controls: Some(&BLACKJACK_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + kind: GameKind::Platformer, + name: "platformer", + controls: Some(&PLATFORMER_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: cfg!(feature = "render"), + policies: &STANDARD_POLICIES, + }, + ]; + &GAMES + } + + #[cfg(not(feature = "physics"))] + { + static GAMES: [GameDescriptor; 2] = [ + GameDescriptor { + kind: GameKind::TicTacToe, + name: "tictactoe", + controls: Some(&TICTACTOE_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + kind: GameKind::Blackjack, + name: "blackjack", + controls: Some(&BLACKJACK_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + ]; + &GAMES + } +} + +/// Finds a builtin game descriptor by stable name. +pub fn find_game(name: &str) -> Option<&'static GameDescriptor> { + all_games().iter().find(|descriptor| descriptor.name == name) +} diff --git a/src/render/builtin.rs b/src/render/builtin.rs index 9abe59d..203dd93 100644 --- a/src/render/builtin.rs +++ b/src/render/builtin.rs @@ -1,12 +1,14 @@ +//! Builtin presenters for builtin environments. + use winit::event::{ElementState, MouseButton, WindowEvent}; use winit::keyboard::{KeyCode, PhysicalKey}; -use crate::games::{ +use crate::builtin::{ Blackjack, BlackjackAction, BlackjackObservation, BlackjackPhase, TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeObservation, }; #[cfg(feature = "physics")] -use crate::games::{Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation}; +use crate::builtin::{Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation}; #[cfg(feature = "physics")] use crate::physics::PhysicsOracleView2d; @@ -29,6 +31,7 @@ const DANGER: Color = Color::from_rgb8(248, 113, 113); const TEXT: Color = Color::from_rgb8(241, 245, 249); const MUTED: Color = Color::from_rgb8(148, 163, 184); +/// Observation presenter for tic-tac-toe. #[derive(Clone, Copy, Debug, Default)] pub struct TicTacToePresenter { cursor: Point2, @@ -224,6 +227,7 @@ impl Presenter for TicTacToePresenter { impl ObservationPresenter for TicTacToePresenter {} +/// Observation presenter for blackjack. #[derive(Clone, Copy, Debug, Default)] pub struct BlackjackPresenter { cursor: Point2, @@ -393,8 +397,10 @@ impl Presenter for BlackjackPresenter { impl ObservationPresenter for BlackjackPresenter {} #[cfg(feature = "physics")] +/// Observation presenter for platformer. #[derive(Clone, Copy, Debug)] pub struct PlatformerPresenter { + /// Platformer configuration used for scene scaling. pub config: PlatformerConfig, cursor: Point2, left_held: bool, @@ -556,6 +562,7 @@ impl Presenter for PlatformerPresenter { impl ObservationPresenter for PlatformerPresenter {} #[cfg(feature = "physics")] +/// Oracle/world presenter for platformer physics debugging. #[derive(Clone, Copy, Debug, Default)] pub struct PlatformerPhysicsPresenter { inner: PlatformerPresenter, @@ -563,6 +570,7 @@ pub struct PlatformerPhysicsPresenter { #[cfg(feature = "physics")] impl PlatformerPhysicsPresenter { + /// Creates a physics presenter with explicit platformer config. pub fn new(config: PlatformerConfig) -> Self { Self { inner: PlatformerPresenter { @@ -793,7 +801,7 @@ mod tests { use super::{ BlackjackPresenter, PlatformerPhysicsPresenter, PlatformerPresenter, TicTacToePresenter, }; - use crate::games::{Blackjack, Platformer, TicTacToe}; + use crate::builtin::{Blackjack, Platformer, TicTacToe}; use crate::render::{ FrameMetrics, Presenter, RealtimeDriver, RenderGameView, Scene2d, TickDriver, TurnBasedDriver, @@ -846,7 +854,7 @@ mod tests { #[test] fn platformer_presenters_emit_geometry() { let session = Session::new(Platformer::default(), 1); - let driver = RealtimeDriver::new(session, crate::games::PlatformerAction::Stay); + let driver = RealtimeDriver::new(session, crate::builtin::PlatformerAction::Stay); let metrics = FrameMetrics { width: 1180, height: 620, @@ -856,7 +864,7 @@ mod tests { let view = RenderGameView::from_cache(driver.session().game(), &cache); let mut observation_presenter = PlatformerPresenter::default(); let mut oracle_presenter = - PlatformerPhysicsPresenter::new(crate::games::PlatformerConfig::default()); + PlatformerPhysicsPresenter::new(crate::builtin::PlatformerConfig::default()); let mut observation_scene = Scene2d::default(); let mut oracle_scene = Scene2d::default(); observation_presenter.populate_scene(&mut observation_scene, metrics, &view); diff --git a/src/render/mod.rs b/src/render/mod.rs index 0588a48..f8a6d04 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -1,8 +1,10 @@ +//! Rendering subsystem exposing scene commands, presenters, and runtime loop. + mod pacer; mod runtime; mod scene; -#[cfg(feature = "builtin-games")] +#[cfg(feature = "builtin")] pub mod builtin; pub use pacer::TickPacer; diff --git a/src/render/pacer.rs b/src/render/pacer.rs index 3e3ae02..6cd263b 100644 --- a/src/render/pacer.rs +++ b/src/render/pacer.rs @@ -1,5 +1,8 @@ +//! Wall-clock to simulation-tick pacing helper. + use std::time::Instant; +/// Converts wall-clock frame deltas into bounded simulation tick counts. #[derive(Clone, Debug)] pub struct TickPacer { tick_period_seconds: f64, diff --git a/src/render/runtime.rs b/src/render/runtime.rs index 47d7fd6..fa172a6 100644 --- a/src/render/runtime.rs +++ b/src/render/runtime.rs @@ -1,3 +1,5 @@ +//! Runtime renderer abstractions, drivers, and native window integration. + use std::fmt; #[cfg(not(target_arch = "wasm32"))] use std::mem; @@ -71,20 +73,31 @@ fn fs_main(input: VertexOutput) -> @location(0) vec4 { } "#; +/// Presentation mode used by render presenters. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum RenderMode { + /// Player-observation-oriented presentation. Observation, + /// Full oracle/world-view presentation. OracleWorld, } +/// Renderer timing and window configuration. #[derive(Clone, Copy, Debug)] pub struct RenderConfig { + /// Target simulation tick rate. pub tick_rate_hz: f64, + /// Maximum simulation ticks processed per frame. pub max_catch_up_ticks: usize, + /// Enables display vsync when true. pub vsync: bool, + /// Enables debug overlay panel. pub show_debug_overlay: bool, + /// Presenter mode selector. pub mode: RenderMode, + /// Initial window width in pixels. pub window_width: u32, + /// Initial window height in pixels. pub window_height: u32, } @@ -102,39 +115,58 @@ impl Default for RenderConfig { } } +/// Per-frame viewport metrics supplied to presenters. #[derive(Clone, Copy, Debug, Default)] pub struct FrameMetrics { + /// Drawable width in pixels. pub width: u32, + /// Drawable height in pixels. pub height: u32, + /// Platform scale factor. pub scale_factor: f64, } +/// Action stream command consumed by runtime drivers. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ActionCommand { + /// Submit a one-shot action for the next tick. Pulse(A), + /// Set a continuous action held across ticks. SetContinuous(A), + /// Clear the current continuous action. ClearContinuous, } +/// Sink for presenter-generated input commands. pub trait ActionSink { + /// Submits an input command to the driver. fn submit_command(&mut self, command: ActionCommand); } +/// Simulation driver interface consumed by the renderer. pub trait TickDriver { + /// History backend used by the underlying session. type History: HistoryStore; + /// Returns immutable access to the current session. fn session(&self) -> &SessionKernel; + /// Returns most recent transition outcome, if any. fn last_outcome(&self) -> Option<&StepOutcome>; + /// Advances simulation by up to `due_ticks`. fn advance_ticks(&mut self, due_ticks: usize); } +/// Presenter contract for translating game state into scene commands. pub trait Presenter { + /// Returns window title text. fn title(&self, game: &G) -> String; + /// Preferred initial window size. fn preferred_window_size(&self) -> (u32, u32) { (960, 640) } + /// Handles one window/input event. fn on_window_event( &mut self, event: &WindowEvent, @@ -143,6 +175,7 @@ pub trait Presenter { actions: &mut dyn ActionSink, ); + /// Populates scene commands for the current frame. fn populate_scene( &mut self, scene: &mut Scene2d, @@ -151,8 +184,10 @@ pub trait Presenter { ); } +/// Marker trait for observation-mode presenters. pub trait ObservationPresenter: Presenter {} +/// Marker trait for oracle/world-mode presenters. pub trait OraclePresenter: Presenter {} #[derive(Debug)] @@ -197,6 +232,7 @@ impl ViewCache { } } +/// Read-only frame view combining game descriptor and cached session-derived data. pub struct RenderGameView<'a, G: Game> { game: &'a G, cache: &'a ViewCache, @@ -207,43 +243,53 @@ impl<'a, G: Game> RenderGameView<'a, G> { Self { game, cache } } + /// Returns game descriptor. pub fn game(&self) -> &'a G { self.game } + /// Returns current simulation tick. pub fn tick(&self) -> Tick { self.cache.tick } + /// Returns player-local observation. pub fn player_observation(&self) -> &G::PlayerObservation { &self.cache.player_observation } + /// Returns spectator observation. pub fn spectator_observation(&self) -> &G::SpectatorObservation { &self.cache.spectator_observation } + /// Returns world/oracle view. pub fn world_view(&self) -> &G::WorldView { &self.cache.world_view } + /// Returns previous world view when interpolation is active. pub fn previous_world_view(&self) -> Option<&G::WorldView> { self.cache.previous_world_view.as_ref() } + /// Returns most recent transition outcome. pub fn last_outcome(&self) -> Option<&StepOutcome> { self.cache.last_outcome.as_ref() } + /// Returns reward for `player` in the most recent outcome. pub fn reward_for(&self, player: usize) -> Reward { self.last_outcome() .map_or(0, |outcome| outcome.reward_for(player)) } + /// Returns whether current state is terminal. pub fn is_terminal(&self) -> bool { self.cache.is_terminal } + /// Returns interpolation alpha in `[0, 1]`. pub fn interpolation_alpha(&self) -> f32 { self.cache.interpolation_alpha } @@ -295,6 +341,7 @@ where scene } +/// Driver that advances only when explicit actions are provided. #[derive(Debug)] pub struct TurnBasedDriver> { session: SessionKernel, @@ -303,6 +350,7 @@ pub struct TurnBasedDriver> { } impl> TurnBasedDriver { + /// Creates a turn-based driver from a session. pub fn new(session: SessionKernel) -> Self { Self { session, @@ -348,6 +396,7 @@ impl> TickDriver for TurnBasedDriver { } } +/// Driver for realtime input with neutral and continuous actions. #[derive(Debug)] pub struct RealtimeDriver> { session: SessionKernel, @@ -358,6 +407,7 @@ pub struct RealtimeDriver> { } impl> RealtimeDriver { + /// Creates a realtime driver with a neutral fallback action. pub fn new(session: SessionKernel, neutral_action: G::Action) -> Self { Self { session, @@ -412,6 +462,7 @@ impl> TickDriver for RealtimeDriver { } } +/// Driver that advances using an internal policy, ignoring user input. #[derive(Debug)] pub struct PassivePolicyDriver, P: Policy> { session: SessionKernel, @@ -420,6 +471,7 @@ pub struct PassivePolicyDriver, P: Policy> { } impl, P: Policy> PassivePolicyDriver { + /// Creates a passive-policy driver. pub fn new(session: SessionKernel, policy: P) -> Self { Self { session, @@ -455,6 +507,7 @@ impl, P: Policy> TickDriver for PassivePolicyD } } +/// Error returned by native renderer setup or frame execution. #[derive(Debug)] pub struct RenderError { message: String, @@ -476,6 +529,7 @@ impl fmt::Display for RenderError { impl std::error::Error for RenderError {} +/// Top-level renderer application wrapper. pub struct RendererApp + ActionSink, P: Presenter> { config: RenderConfig, driver: D, @@ -484,6 +538,7 @@ pub struct RendererApp + ActionSink, P: Presenter + ActionSink, P: Presenter> RendererApp { + /// Creates a renderer application from config, driver, and presenter. pub fn new(config: RenderConfig, driver: D, presenter: P) -> Self { Self { config, @@ -498,6 +553,7 @@ impl + ActionSink, P: Presenter> RendererApp + ActionSink + 'static, P: Presenter + 'static> RendererApp { + /// Runs the native window event loop. pub fn run_native(self) -> Result<(), RenderError> { let event_loop = EventLoop::new().map_err(|error| RenderError::new(error.to_string()))?; let mut app = NativeApp::new(self.config, self.driver, self.presenter); @@ -509,6 +565,7 @@ impl + ActionSink + 'static, P: Presenter #[cfg(target_arch = "wasm32")] impl + ActionSink, P: Presenter> RendererApp { + /// Returns an error because native window rendering is unavailable on `wasm32`. pub fn run_native(self) -> Result<(), RenderError> { let RendererApp { config, @@ -619,6 +676,19 @@ struct WindowState + ActionSink, P: Presenter> { gpu: GpuState, } +#[cfg(not(target_arch = "wasm32"))] +#[derive(Default)] +struct QueuedActions { + commands: Vec>, +} + +#[cfg(not(target_arch = "wasm32"))] +impl ActionSink for QueuedActions { + fn submit_command(&mut self, command: ActionCommand) { + self.commands.push(command); + } +} + #[cfg(not(target_arch = "wasm32"))] impl + ActionSink, P: Presenter> WindowState { async fn new( @@ -661,28 +731,33 @@ impl + ActionSink, P: Presenter> WindowState::default(); + { + let view = RenderGameView::from_cache(self.driver.session().game(), &self.cache); + self.presenter + .on_window_event(event, metrics, &view, &mut queued); + } + for command in queued.commands { + self.driver.submit_command(command); + } self.request_redraw(); } @@ -699,11 +774,7 @@ impl + ActionSink, P: Presenter> WindowState, + text_order: Vec, + geometry_order: Vec, window: Arc, } @@ -891,6 +964,8 @@ impl GpuState { let text_renderer = TextRenderer::new(&mut atlas, &device, MultisampleState::default(), None); let text_buffers = Vec::with_capacity(16); + let text_order = Vec::with_capacity(16); + let geometry_order = Vec::with_capacity(128); surface_config.width = surface_config.width.max(1); surface_config.height = surface_config.height.max(1); @@ -912,6 +987,8 @@ impl GpuState { atlas, text_renderer, text_buffers, + text_order, + geometry_order, window, }) } @@ -1020,9 +1097,12 @@ impl GpuState { )); } - let mut texts = scene.texts.clone(); - texts.sort_by_key(|text| text.layer); - for (index, text) in texts.iter().enumerate() { + self.text_order.clear(); + self.text_order.extend(0..scene.texts.len()); + self.text_order.sort_by_key(|&index| scene.texts[index].layer); + + for (index, text_index) in self.text_order.iter().copied().enumerate() { + let text = &scene.texts[text_index]; let buffer = &mut self.text_buffers[index]; *buffer = GlyphBuffer::new( &mut self.font_system, @@ -1044,12 +1124,11 @@ impl GpuState { } let mut text_areas = Vec::with_capacity(scene.texts.len()); - for (index, text) in texts.iter().enumerate() { - // SAFETY: - // Each loop iteration accesses a distinct buffer slot by index, so the returned mutable - // references do not alias each other while `text_areas` is alive for the immediate - // `prepare` call below. - let buffer = unsafe { &mut *self.text_buffers.as_mut_ptr().add(index) }; + for (buffer, text_index) in self.text_buffers[..scene.texts.len()] + .iter_mut() + .zip(self.text_order.iter().copied()) + { + let text = &scene.texts[text_index]; text_areas.push(TextArea { buffer, left: text.position.x, @@ -1081,29 +1160,48 @@ impl GpuState { fn prepare_geometry(&mut self, scene: &Scene2d) { self.staging_vertices.clear(); - let mut geometry = Vec::with_capacity( + self.geometry_order.clear(); + self.geometry_order.reserve( scene.panels.len() + scene.lines.len() + scene.circles.len() + scene.textured_quads.len(), ); - for panel in &scene.panels { - geometry.push(GeometryPrimitive::Panel(panel)); + + for (index, panel) in scene.panels.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: panel.layer, + kind: GeometryKind::Panel, + index, + }); } - for textured in &scene.textured_quads { - geometry.push(GeometryPrimitive::TexturedQuad(textured)); + for (index, textured) in scene.textured_quads.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: textured.layer, + kind: GeometryKind::TexturedQuad, + index, + }); } - for line in &scene.lines { - geometry.push(GeometryPrimitive::Line(line)); + for (index, line) in scene.lines.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: line.layer, + kind: GeometryKind::Line, + index, + }); } - for circle in &scene.circles { - geometry.push(GeometryPrimitive::Circle(circle)); + for (index, circle) in scene.circles.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: circle.layer, + kind: GeometryKind::Circle, + index, + }); } - geometry.sort_by_key(GeometryPrimitive::layer); + self.geometry_order.sort_by_key(|entry| entry.layer); - for primitive in geometry { - match primitive { - GeometryPrimitive::Panel(panel) => { + for entry in &self.geometry_order { + match entry.kind { + GeometryKind::Panel => { + let panel = &scene.panels[entry.index]; push_rect( &mut self.staging_vertices, panel.rect, @@ -1122,7 +1220,8 @@ impl GpuState { ); } } - GeometryPrimitive::TexturedQuad(quad) => { + GeometryKind::TexturedQuad => { + let quad = &scene.textured_quads[entry.index]; // The render layer keeps the textured-quad command available for future sprite // pipelines. Until a texture atlas is bound, it degrades to a tinted panel. push_rect( @@ -1133,15 +1232,15 @@ impl GpuState { self.surface_config.height, ); } - GeometryPrimitive::Line(line) => push_line( + GeometryKind::Line => push_line( &mut self.staging_vertices, - *line, + scene.lines[entry.index], self.surface_config.width, self.surface_config.height, ), - GeometryPrimitive::Circle(circle) => push_circle( + GeometryKind::Circle => push_circle( &mut self.staging_vertices, - *circle, + scene.circles[entry.index], self.surface_config.width, self.surface_config.height, ), @@ -1166,23 +1265,20 @@ impl GpuState { } #[cfg(not(target_arch = "wasm32"))] -enum GeometryPrimitive<'a> { - Panel(&'a super::scene::PanelRegion), - TexturedQuad(&'a super::scene::TexturedQuad), - Line(&'a LineCommand), - Circle(&'a CircleCommand), +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +enum GeometryKind { + Panel, + TexturedQuad, + Line, + Circle, } #[cfg(not(target_arch = "wasm32"))] -impl GeometryPrimitive<'_> { - fn layer(&self) -> i32 { - match self { - Self::Panel(panel) => panel.layer, - Self::TexturedQuad(quad) => quad.layer, - Self::Line(line) => line.layer, - Self::Circle(circle) => circle.layer, - } - } +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +struct GeometryOrderEntry { + layer: i32, + kind: GeometryKind, + index: usize, } #[cfg(not(target_arch = "wasm32"))] diff --git a/src/render/scene.rs b/src/render/scene.rs index 2d480e8..57f228c 100644 --- a/src/render/scene.rs +++ b/src/render/scene.rs @@ -1,27 +1,40 @@ +//! Immediate-mode 2D scene command structures used by the renderer. + +/// RGBA color in normalized `[0, 1]` channels. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Color { + /// Red channel. pub r: f32, + /// Green channel. pub g: f32, + /// Blue channel. pub b: f32, + /// Alpha channel. pub a: f32, } impl Color { + /// Opaque white color. pub const WHITE: Self = Self::rgba(1.0, 1.0, 1.0, 1.0); + /// Opaque black color. pub const BLACK: Self = Self::rgba(0.0, 0.0, 0.0, 1.0); + /// Creates an opaque RGB color. pub const fn rgb(r: f32, g: f32, b: f32) -> Self { Self::rgba(r, g, b, 1.0) } + /// Creates an RGBA color. pub const fn rgba(r: f32, g: f32, b: f32, a: f32) -> Self { Self { r, g, b, a } } + /// Creates an opaque color from 8-bit channels. pub const fn from_rgb8(r: u8, g: u8, b: u8) -> Self { Self::from_rgba8(r, g, b, 255) } + /// Creates a color from 8-bit channels. pub const fn from_rgba8(r: u8, g: u8, b: u8, a: u8) -> Self { Self { r: r as f32 / 255.0, @@ -32,27 +45,37 @@ impl Color { } } +/// 2D point in screen space. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Point2 { + /// X coordinate. pub x: f32, + /// Y coordinate. pub y: f32, } impl Point2 { + /// Creates a point. pub const fn new(x: f32, y: f32) -> Self { Self { x, y } } } +/// Axis-aligned rectangle in screen space. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Rect { + /// Left coordinate. pub x: f32, + /// Top coordinate. pub y: f32, + /// Rectangle width. pub width: f32, + /// Rectangle height. pub height: f32, } impl Rect { + /// Creates a rectangle. pub const fn new(x: f32, y: f32, width: f32, height: f32) -> Self { Self { x, @@ -62,26 +85,32 @@ impl Rect { } } + /// Returns left edge. pub fn left(self) -> f32 { self.x } + /// Returns right edge. pub fn right(self) -> f32 { self.x + self.width } + /// Returns top edge. pub fn top(self) -> f32 { self.y } + /// Returns bottom edge. pub fn bottom(self) -> f32 { self.y + self.height } + /// Returns rectangle center. pub fn center(self) -> Point2 { Point2::new(self.x + self.width * 0.5, self.y + self.height * 0.5) } + /// Returns whether `point` lies inside the rectangle bounds. pub fn contains(self, point: Point2) -> bool { point.x >= self.left() && point.x <= self.right() @@ -90,68 +119,110 @@ impl Rect { } } +/// Handle to a texture resource. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct TextureHandle(pub u32); +/// Filled panel draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct PanelRegion { + /// Panel rectangle. pub rect: Rect, + /// Fill color. pub fill: Color, + /// Optional stroke `(color, thickness)`. pub stroke: Option<(Color, f32)>, + /// Layer ordering key. pub layer: i32, } +/// Thick line draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct LineCommand { + /// Start point. pub start: Point2, + /// End point. pub end: Point2, + /// Line thickness in pixels. pub thickness: f32, + /// Line color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Filled circle draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct CircleCommand { + /// Circle center. pub center: Point2, + /// Circle radius in pixels. pub radius: f32, + /// Fill color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Text draw command. #[derive(Clone, Debug, PartialEq)] pub struct FrameText { + /// Anchor position for text layout. pub position: Point2, + /// Text clipping/layout bounds. pub bounds: Rect, + /// UTF-8 content. pub content: String, + /// Font size in pixels. pub size: f32, + /// Text color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Textured rectangle command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct TexturedQuad { + /// Destination rectangle. pub rect: Rect, + /// Source UV rectangle. pub uv_rect: Rect, + /// Texture handle. pub texture: TextureHandle, + /// Multiplicative tint color. pub tint: Color, + /// Layer ordering key. pub layer: i32, } +/// Input hit-test region metadata. #[derive(Clone, Copy, Debug, PartialEq)] pub struct HitRegion { + /// Stable region id. pub id: u64, + /// Hit-test bounds. pub rect: Rect, + /// Debug label. pub label: &'static str, } +/// Full frame scene command buffer. #[derive(Clone, Debug, PartialEq)] pub struct Scene2d { + /// Clear color for the frame. pub clear_color: Color, + /// Panel commands. pub panels: Vec, + /// Line commands. pub lines: Vec, + /// Circle commands. pub circles: Vec, + /// Text commands. pub texts: Vec, + /// Textured quad commands. pub textured_quads: Vec, + /// Hit regions for interaction logic. pub hit_regions: Vec, } @@ -162,6 +233,7 @@ impl Default for Scene2d { } impl Scene2d { + /// Creates a scene with explicit command-buffer capacities. pub fn with_capacities( panels: usize, lines: usize, @@ -181,6 +253,7 @@ impl Scene2d { } } + /// Clears all commands while preserving allocated capacities. pub fn clear(&mut self) { self.clear_color = Color::BLACK; self.panels.clear(); @@ -191,10 +264,12 @@ impl Scene2d { self.hit_regions.clear(); } + /// Sets frame clear color. pub fn set_clear_color(&mut self, color: Color) { self.clear_color = color; } + /// Enqueues a filled panel command. pub fn panel(&mut self, rect: Rect, fill: Color, stroke: Option<(Color, f32)>, layer: i32) { self.panels.push(PanelRegion { rect, @@ -204,6 +279,7 @@ impl Scene2d { }); } + /// Enqueues a thick line command. pub fn line(&mut self, start: Point2, end: Point2, thickness: f32, color: Color, layer: i32) { self.lines.push(LineCommand { start, @@ -214,6 +290,7 @@ impl Scene2d { }); } + /// Enqueues a filled circle command. pub fn circle(&mut self, center: Point2, radius: f32, color: Color, layer: i32) { self.circles.push(CircleCommand { center, @@ -223,6 +300,7 @@ impl Scene2d { }); } + /// Enqueues a text command. pub fn text( &mut self, position: Point2, @@ -242,6 +320,7 @@ impl Scene2d { }); } + /// Enqueues a textured-quad command. pub fn textured_quad( &mut self, rect: Rect, @@ -259,6 +338,7 @@ impl Scene2d { }); } + /// Registers a hit-test region. pub fn hit_region(&mut self, id: u64, rect: Rect, label: &'static str) { self.hit_regions.push(HitRegion { id, rect, label }); } diff --git a/src/rng.rs b/src/rng.rs index 8f5bc37..6376272 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -1,18 +1,23 @@ +//! Deterministic RNG primitives used across simulation and policy execution. + use crate::types::Seed; const ZERO_STATE_REPLACEMENT: u64 = 0xCAFEBABEDEADBEEF; const STREAM_XOR: u64 = 0x9E3779B97F4A7C15; +/// SplitMix64 mixer used to derive per-stream RNG states. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct SplitMix64 { state: u64, } impl SplitMix64 { + /// Creates a mixer from `seed`. pub const fn new(seed: Seed) -> Self { Self { state: seed } } + /// Advances the mixer and returns one 64-bit value. pub fn next_u64(&mut self) -> u64 { self.state = self.state.wrapping_add(STREAM_XOR); let mut z = self.state; @@ -22,6 +27,7 @@ impl SplitMix64 { } } +/// Deterministic xorshift-style RNG with stable stream forking. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct DeterministicRng { root_seed: Seed, @@ -35,10 +41,12 @@ impl Default for DeterministicRng { } impl DeterministicRng { + /// Creates an RNG from a root seed using stream id `0`. pub fn from_seed(seed: Seed) -> Self { Self::from_seed_and_stream(seed, 0) } + /// Creates an RNG from `seed` and stable `stream_id`. pub fn from_seed_and_stream(seed: Seed, stream_id: u64) -> Self { let mut mixer = SplitMix64::new(seed ^ stream_id.wrapping_mul(STREAM_XOR)); let state = sanitize_state(mixer.next_u64()); @@ -48,18 +56,22 @@ impl DeterministicRng { } } + /// Returns the root seed used to derive this RNG stream. pub const fn root_seed(self) -> Seed { self.root_seed } + /// Returns internal RNG state for reproducibility/testing. pub const fn raw_state(self) -> u64 { self.state } + /// Derives a sibling stream from the same root seed. pub fn fork(&self, stream_id: u64) -> Self { Self::from_seed_and_stream(self.root_seed, stream_id) } + /// Generates the next 64-bit random value. pub fn next_u64(&mut self) -> u64 { let mut x = self.state; x ^= x >> 12; @@ -69,6 +81,7 @@ impl DeterministicRng { x.wrapping_mul(0x2545F4914F6CDD1D) } + /// Samples uniformly in `[0, end)`. pub fn gen_range(&mut self, end: usize) -> usize { if end <= 1 { return 0; @@ -83,6 +96,7 @@ impl DeterministicRng { } } + /// Samples a Bernoulli outcome with probability `numerator / denominator`. pub fn gen_bool_ratio(&mut self, numerator: u64, denominator: u64) -> bool { debug_assert!(denominator > 0); if numerator == 0 { @@ -94,11 +108,13 @@ impl DeterministicRng { (self.next_u64() % denominator) < numerator } + /// Samples a floating-point number in `[0, 1)`. pub fn gen_unit_f64(&mut self) -> f64 { let value = self.next_u64() >> 11; (value as f64) * (1.0 / 9007199254740992.0) } + /// In-place Fisher-Yates shuffle using deterministic randomness. pub fn shuffle(&mut self, slice: &mut [T]) { for index in (1..slice.len()).rev() { let swap_index = self.gen_range(index + 1); diff --git a/src/session.rs b/src/session.rs index 985d082..6078710 100644 --- a/src/session.rs +++ b/src/session.rs @@ -1,4 +1,7 @@ +//! Session kernel, history stores, and replay/rewind utilities. + use core::fmt::Debug; +use std::collections::VecDeque; use crate::buffer::{Buffer, default_array}; use crate::game::Game; @@ -6,18 +9,27 @@ use crate::policy::Policy; use crate::rng::DeterministicRng; use crate::types::{DynamicReplayTrace, PlayerAction, ReplayTrace, Seed, StepOutcome, Tick}; +/// Saved checkpoint used by history implementations for rewind. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct HistorySnapshot { + /// Tick represented by this snapshot. pub tick: Tick, + /// Cloned game state. pub state: S, + /// RNG state associated with `state`. pub rng: DeterministicRng, } +/// Storage backend for session traces and rewind snapshots. pub trait HistoryStore: Clone { + /// Trace representation emitted by this history backend. type Trace: Clone + Debug + Eq + PartialEq; + /// Creates a history store from initial session state. fn from_seed(seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng) -> Self; + /// Resets history to initial session state. fn reset(&mut self, seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng); + /// Records one transition and optional snapshot. fn record( &mut self, tick: Tick, @@ -26,20 +38,26 @@ pub trait HistoryStore: Clone { actions: &G::JointActionBuf, outcome: &StepOutcome, ); + /// Returns recorded transition count. fn len(&self) -> usize; + /// Returns whether no transitions are recorded. fn is_empty(&self) -> bool; + /// Returns immutable trace view. fn trace(&self) -> &Self::Trace; + /// Consumes history and returns owned trace. fn into_trace(self) -> Self::Trace; + /// Restores state/RNG at `target_tick` when available. fn restore(&self, game: &G, target_tick: Tick) -> Option<(G::State, DeterministicRng)>; } +/// Dynamically-sized history with bounded checkpoint deque. #[derive(Debug, Eq, PartialEq)] pub struct DynamicHistory { seed: Seed, initial_state: G::State, initial_rng: DeterministicRng, trace: DynamicReplayTrace, - snapshots: Vec>, + snapshots: VecDeque>, } impl Clone @@ -67,9 +85,9 @@ impl return; } if self.snapshots.len() == SNAPSHOTS { - self.snapshots.remove(0); + let _ = self.snapshots.pop_front(); } - self.snapshots.push(HistorySnapshot { + self.snapshots.push_back(HistorySnapshot { tick, state: state.clone(), rng, @@ -90,9 +108,9 @@ impl HistoryStore type Trace = DynamicReplayTrace; fn from_seed(seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng) -> Self { - let mut snapshots = Vec::with_capacity(SNAPSHOTS); + let mut snapshots = VecDeque::with_capacity(SNAPSHOTS); if SNAPSHOTS > 0 { - snapshots.push(HistorySnapshot { + snapshots.push_back(HistorySnapshot { tick: 0, state: initial_state.clone(), rng: initial_rng, @@ -114,7 +132,7 @@ impl HistoryStore self.trace.clear(seed); self.snapshots.clear(); if SNAPSHOTS > 0 { - self.snapshots.push(HistorySnapshot { + self.snapshots.push_back(HistorySnapshot { tick: 0, state: initial_state.clone(), rng: initial_rng, @@ -179,6 +197,7 @@ impl HistoryStore } } +/// Fixed-capacity history with ring-buffer checkpoints. #[derive(Debug, Eq, Hash, PartialEq)] pub struct FixedHistory where @@ -333,6 +352,7 @@ where } } +/// Deterministic session kernel for stepping, tracing, and rewinding games. #[derive(Clone, Debug)] pub struct SessionKernel> { game: G, @@ -346,10 +366,13 @@ pub struct SessionKernel> { outcome: StepOutcome, } +/// Default fixed-history session alias. pub type Session = SessionKernel>; +/// Interactive dynamic-history session alias. pub type InteractiveSession = SessionKernel>; impl> SessionKernel { + /// Creates a new session initialized from `seed`. pub fn new(game: G, seed: Seed) -> Self { let state = game.init(seed); assert!(game.state_invariant(&state)); @@ -368,6 +391,7 @@ impl> SessionKernel { } } + /// Resets session state and history to `seed`. pub fn reset(&mut self, seed: Seed) { self.state = self.game.init(seed); self.rng = DeterministicRng::from_seed_and_stream(seed, 1); @@ -379,78 +403,156 @@ impl> SessionKernel { self.outcome.clear(); } + /// Returns the game instance. pub fn game(&self) -> &G { &self.game } + /// Returns current game state. pub fn state(&self) -> &G::State { &self.state } + /// Returns current tick. pub fn current_tick(&self) -> Tick { self.tick } + /// Returns current RNG snapshot. pub fn rng(&self) -> DeterministicRng { self.rng } + /// Returns immutable trace view. pub fn trace(&self) -> &H::Trace { self.history.trace() } + /// Consumes session and returns owned trace. pub fn into_trace(self) -> H::Trace { self.history.into_trace() } + /// Returns whether current state is terminal. pub fn is_terminal(&self) -> bool { self.game.is_terminal(&self.state) } + /// Returns player-local observation. pub fn player_observation(&self, player: usize) -> G::PlayerObservation { self.game.observe_player(&self.state, player) } + /// Returns spectator observation. pub fn spectator_observation(&self) -> G::SpectatorObservation { self.game.observe_spectator(&self.state) } + /// Returns world/debug view. pub fn world_view(&self) -> G::WorldView { self.game.world_view(&self.state) } + /// Returns legal actions for `player` in current state. pub fn legal_actions_for(&mut self, player: usize) -> &[G::Action] { self.game .legal_actions(&self.state, player, &mut self.legal_actions); self.legal_actions.as_slice() } - pub fn step(&mut self, actions: &[PlayerAction]) -> &StepOutcome { - self.joint_actions.clear(); - self.joint_actions - .extend_from_slice(actions) - .expect("joint action buffer capacity exceeded"); - let joint_actions = self.joint_actions.clone(); - self.step_with_joint_actions(&joint_actions) + #[inline(always)] + fn step_core(&mut self, actions: &G::JointActionBuf) { + assert!( + !self.game.is_terminal(&self.state), + "cannot step a terminal session", + ); + self.outcome.clear(); + self.game + .step_in_place(&mut self.state, actions, &mut self.rng, &mut self.outcome); + self.tick += 1; + self.outcome.tick = self.tick; } - pub fn step_with_joint_actions( + #[inline(always)] + fn record_step(&mut self, actions: &G::JointActionBuf) { + self.history + .record(self.tick, &self.state, self.rng, actions, &self.outcome); + } + + fn collect_policy_actions( &mut self, - actions: &G::JointActionBuf, - ) -> &StepOutcome { + policies: &mut [&mut dyn Policy], + ) { + self.players_to_act.clear(); + self.game + .players_to_act(&self.state, &mut self.players_to_act); + + self.joint_actions.clear(); + for &player in self.players_to_act.as_slice() { + self.game + .legal_actions(&self.state, player, &mut self.legal_actions); + let observation = self.game.observe_player(&self.state, player); + let policy = policies + .get_mut(player) + .expect("missing policy for active player"); + let action = policy.choose_action( + &self.game, + &self.state, + player, + &observation, + self.legal_actions.as_slice(), + &mut self.rng, + ); + self.joint_actions + .push(PlayerAction { player, action }) + .expect("joint action buffer capacity exceeded"); + } + } + + #[inline(always)] + fn step_staged_joint_actions(&mut self) -> &StepOutcome { + assert!( + !self.game.is_terminal(&self.state), + "cannot step a terminal session", + ); + self.outcome.clear(); + self.game.step_in_place( + &mut self.state, + &self.joint_actions, + &mut self.rng, + &mut self.outcome, + ); + self.tick += 1; + self.outcome.tick = self.tick; + self.history.record( + self.tick, + &self.state, + self.rng, + &self.joint_actions, + &self.outcome, + ); + &self.outcome + } + + #[inline(always)] + fn step_staged_joint_actions_checked(&mut self) -> &StepOutcome { assert!( !self.game.is_terminal(&self.state), "cannot step a terminal session", ); assert!(self.game.state_invariant(&self.state)); - for action in actions.as_slice() { + for action in self.joint_actions.as_slice() { assert!(self.game.action_invariant(&action.action)); } let pre_state = self.state.clone(); self.outcome.clear(); - self.game - .step_in_place(&mut self.state, actions, &mut self.rng, &mut self.outcome); + self.game.step_in_place( + &mut self.state, + &self.joint_actions, + &mut self.rng, + &mut self.outcome, + ); self.tick += 1; self.outcome.tick = self.tick; @@ -471,49 +573,111 @@ impl> SessionKernel { } assert!(self.game.transition_postcondition( &pre_state, - actions, + &self.joint_actions, &self.state, &self.outcome )); - self.history - .record(self.tick, &self.state, self.rng, actions, &self.outcome); + self.history.record( + self.tick, + &self.state, + self.rng, + &self.joint_actions, + &self.outcome, + ); &self.outcome } - pub fn step_with_policies( + /// Steps using externally supplied action slice. + pub fn step(&mut self, actions: &[PlayerAction]) -> &StepOutcome { + self.joint_actions.clear(); + self.joint_actions + .extend_from_slice(actions) + .expect("joint action buffer capacity exceeded"); + self.step_staged_joint_actions() + } + + /// Steps using externally supplied action slice with contract checks. + pub fn step_checked( &mut self, - policies: &mut [&mut dyn Policy], + actions: &[PlayerAction], ) -> &StepOutcome { - self.players_to_act.clear(); - self.game - .players_to_act(&self.state, &mut self.players_to_act); self.joint_actions.clear(); + self.joint_actions + .extend_from_slice(actions) + .expect("joint action buffer capacity exceeded"); + self.step_staged_joint_actions_checked() + } - for &player in self.players_to_act.as_slice() { + /// Steps with prebuilt joint-action buffer. + #[inline(always)] + pub fn step_with_joint_actions( + &mut self, + actions: &G::JointActionBuf, + ) -> &StepOutcome { + self.step_core(actions); + self.record_step(actions); + &self.outcome + } + + /// Steps with contract checks enabled. + pub fn step_with_joint_actions_checked( + &mut self, + actions: &G::JointActionBuf, + ) -> &StepOutcome { + assert!(self.game.state_invariant(&self.state)); + for action in actions.as_slice() { + assert!(self.game.action_invariant(&action.action)); + } + + let pre_state = self.state.clone(); + self.step_core(actions); + + assert!(self.game.state_invariant(&self.state)); + let spectator = self.game.observe_spectator(&self.state); + assert!( self.game - .legal_actions(&self.state, player, &mut self.legal_actions); + .spectator_observation_invariant(&self.state, &spectator) + ); + let world = self.game.world_view(&self.state); + assert!(self.game.world_view_invariant(&self.state, &world)); + for player in 0..self.game.player_count() { let observation = self.game.observe_player(&self.state, player); - let policy = policies - .get_mut(player) - .expect("missing policy for active player"); - let action = policy.choose_action( - &self.game, - &self.state, - player, - &observation, - self.legal_actions.as_slice(), - &mut self.rng, + assert!( + self.game + .player_observation_invariant(&self.state, player, &observation) ); - self.joint_actions - .push(PlayerAction { player, action }) - .expect("joint action buffer capacity exceeded"); } + assert!(self.game.transition_postcondition( + &pre_state, + actions, + &self.state, + &self.outcome + )); - let actions = self.joint_actions.clone(); - self.step_with_joint_actions(&actions) + self.record_step(actions); + &self.outcome } + /// Collects actions from policies and steps once. + pub fn step_with_policies( + &mut self, + policies: &mut [&mut dyn Policy], + ) -> &StepOutcome { + self.collect_policy_actions(policies); + self.step_staged_joint_actions() + } + + /// Collects actions from policies and steps once with checks. + pub fn step_with_policies_checked( + &mut self, + policies: &mut [&mut dyn Policy], + ) -> &StepOutcome { + self.collect_policy_actions(policies); + self.step_staged_joint_actions_checked() + } + + /// Runs until terminal state or `max_ticks` is reached. pub fn run_until_terminal( &mut self, policies: &mut [&mut dyn Policy], @@ -525,6 +689,19 @@ impl> SessionKernel { self.trace() } + /// Runs checked stepping until terminal state or `max_ticks`. + pub fn run_until_terminal_checked( + &mut self, + policies: &mut [&mut dyn Policy], + max_ticks: usize, + ) -> &H::Trace { + while !self.is_terminal() && (self.tick as usize) < max_ticks { + self.step_with_policies_checked(policies); + } + self.trace() + } + + /// Rewinds session state to `target_tick` when restorable. pub fn rewind_to(&mut self, target_tick: Tick) -> bool { let Some((state, rng)) = self.history.restore(&self.game, target_tick) else { return false; @@ -536,16 +713,19 @@ impl> SessionKernel { true } + /// Alias of `rewind_to` for replay-oriented call sites. pub fn replay_to(&mut self, target_tick: Tick) -> bool { self.rewind_to(target_tick) } + /// Returns reconstructed state at `target_tick`. pub fn state_at(&self, target_tick: Tick) -> Option { self.history .restore(&self.game, target_tick) .map(|(state, _)| state) } + /// Returns a cloned session fork rewound to `target_tick`. pub fn fork_at(&self, target_tick: Tick) -> Option where G: Clone, diff --git a/src/types.rs b/src/types.rs index 2261059..4f5c271 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,43 +1,64 @@ +//! Core scalar types and replay data structures used across the engine. + use core::hash::{Hash, Hasher}; use crate::buffer::{Buffer, FixedVec}; +/// Scalar reward type used by games. pub type Reward = i64; +/// Monotonic simulation tick counter. pub type Tick = u64; +/// Stable player identifier within one game. pub type PlayerId = usize; +/// Deterministic seed type. pub type Seed = u64; +/// Reward assigned to one player for a single transition. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct PlayerReward { + /// Recipient player id. pub player: PlayerId, + /// Reward value for that player. pub reward: Reward, } +/// Action submitted by a specific player. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct PlayerAction { + /// Acting player id. pub player: PlayerId, + /// Concrete chosen action. pub action: A, } +/// Episode termination state after a step. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum Termination { + /// Episode continues. #[default] Ongoing, + /// Episode reached a terminal state. Terminal { + /// Winner id for terminal outcomes, when applicable. winner: Option, }, } impl Termination { + /// Returns `true` when the outcome is terminal. pub const fn is_terminal(self) -> bool { matches!(self, Self::Terminal { .. }) } } +/// Output bundle from one transition. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct StepOutcome { + /// Tick at which this outcome was produced. pub tick: Tick, + /// Per-player rewards. pub rewards: R, + /// Termination state. pub termination: Termination, } @@ -58,12 +79,14 @@ impl StepOutcome where R: Buffer, { + /// Resets outcome to default ongoing state. pub fn clear(&mut self) { self.tick = 0; self.rewards.clear(); self.termination = Termination::Ongoing; } + /// Returns reward for `player`, or `0` when no entry exists. pub fn reward_for(&self, player: PlayerId) -> Reward { let rewards = self.rewards.as_slice(); let mut index = 0usize; @@ -77,25 +100,34 @@ where 0 } + /// Returns whether this outcome is terminal. pub fn is_terminal(&self) -> bool { self.termination.is_terminal() } } +/// One recorded replay step. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct ReplayStep { + /// Tick at which step was recorded. pub tick: Tick, + /// Joint action applied at `tick`. pub actions: JA, + /// Reward bundle emitted by the transition. pub rewards: R, + /// Termination state after the transition. pub termination: Termination, } +/// Fixed-capacity replay trace. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct ReplayTrace where ReplayStep: Default, { + /// Seed used to initialize the session. pub seed: Seed, + /// Recorded transition log. pub steps: FixedVec, LOG>, } @@ -103,6 +135,7 @@ impl ReplayTrace where ReplayStep: Default, { + /// Creates an empty trace initialized with `seed`. pub fn new(seed: Seed) -> Self { Self { seed, @@ -110,27 +143,34 @@ where } } + /// Clears the trace and updates seed metadata. pub fn clear(&mut self, seed: Seed) { self.seed = seed; self.steps.clear(); } + /// Returns number of recorded steps. pub fn len(&self) -> usize { self.steps.len() } + /// Returns whether no steps are recorded. pub fn is_empty(&self) -> bool { self.steps.is_empty() } } +/// Dynamically-sized replay trace. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct DynamicReplayTrace { + /// Seed used to initialize the session. pub seed: Seed, + /// Recorded transition log. pub steps: Vec>, } impl DynamicReplayTrace { + /// Creates an empty dynamic trace. pub fn new(seed: Seed) -> Self { Self { seed, @@ -138,15 +178,18 @@ impl DynamicReplayTrace { } } + /// Clears the trace and updates seed metadata. pub fn clear(&mut self, seed: Seed) { self.seed = seed; self.steps.clear(); } + /// Returns number of recorded steps. pub fn len(&self) -> usize { self.steps.len() } + /// Returns whether no steps are recorded. pub fn is_empty(&self) -> bool { self.steps.is_empty() } @@ -157,6 +200,7 @@ where JA: Clone, R: Clone, { + /// Appends one replay step cloned from the given references. pub fn record(&mut self, tick: Tick, actions: &JA, rewards: &R, termination: Termination) { self.steps.push(ReplayStep { tick, @@ -172,6 +216,7 @@ where JA: Clone + Default, R: Clone + Default, { + /// Appends one replay step to the fixed-capacity log. pub fn record(&mut self, tick: Tick, actions: &JA, rewards: &R, termination: Termination) { self.steps .push(ReplayStep { @@ -213,6 +258,7 @@ impl Hasher for StableHasher { } } +/// Computes a stable 64-bit hash using an internal FNV-1a variant. pub fn stable_hash(value: &T) -> u64 { let mut hasher = StableHasher::new(); value.hash(&mut hasher); diff --git a/src/verification.rs b/src/verification.rs index b9ed0df..fad8c54 100644 --- a/src/verification.rs +++ b/src/verification.rs @@ -1,9 +1,22 @@ +//! Runtime contract-check helpers for transitions, observations, and compact codecs. + use crate::buffer::Buffer; -use crate::compact::CompactGame; use crate::game::Game; use crate::rng::DeterministicRng; -use crate::types::{Seed, StepOutcome}; +use crate::types::{Reward, Seed, StepOutcome}; + +/// Returns true when a reward stays in range and terminal flags remain consistent. +pub fn reward_and_terminal_postcondition( + reward: Reward, + min_reward: Reward, + max_reward: Reward, + post_terminal: bool, + outcome_terminal: bool, +) -> bool { + (min_reward..=max_reward).contains(&reward) && (post_terminal == outcome_terminal) +} +/// Asserts deterministic transition and postcondition contracts for one step. pub fn assert_transition_contracts( game: &G, pre: &G::State, @@ -37,6 +50,7 @@ pub fn assert_transition_contracts( assert!(game.transition_postcondition(pre, actions, &left_state, &left_outcome,)); } +/// Asserts player, spectator, and world-view observation contracts. pub fn assert_observation_contracts(game: &G, state: &G::State) { assert!(game.state_invariant(state)); for player in 0..game.player_count() { @@ -49,7 +63,8 @@ pub fn assert_observation_contracts(game: &G, state: &G::State) { assert!(game.world_view_invariant(state, &world)); } -pub fn assert_compact_roundtrip(game: &G, action: &G::Action) { +/// Asserts compact action encoding roundtrips through decode. +pub fn assert_compact_roundtrip(game: &G, action: &G::Action) { let encoded = game.encode_action(action); assert_eq!(game.decode_action(encoded), Some(*action)); } diff --git a/tests/validation.rs b/tests/validation.rs index 92f965c..c31032a 100644 --- a/tests/validation.rs +++ b/tests/validation.rs @@ -1,4 +1,4 @@ -#![cfg(feature = "builtin-games")] +#![cfg(feature = "builtin")] use std::alloc::{GlobalAlloc, Layout, System}; use std::cell::Cell; @@ -6,12 +6,12 @@ use std::sync::Mutex; use std::sync::atomic::{AtomicUsize, Ordering}; use gameengine::buffer::Buffer; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; +use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{ - CompactGame, CompactSpec, DeterministicRng, FixedVec, Game, PlayerAction, PlayerReward, - Session, StepOutcome, stable_hash, + CompactSpec, DeterministicRng, FixedVec, Game, InteractiveSession, PlayerAction, + PlayerReward, Session, StepOutcome, stable_hash, }; struct CountingAllocator; @@ -79,7 +79,7 @@ fn capture_compact_trace( actions: &[Vec>], ) -> (Vec>, u64, u64) where - G: Game + CompactGame + Copy, + G: Game + Copy, { let mut session = Session::new(game, seed); let mut compact_trace = Vec::new(); @@ -481,12 +481,23 @@ fn parallel_replay_matches_serial() { action: TicTacToeAction(0), }]], ), + ( + 13, + (0..320) + .map(|_| { + vec![PlayerAction { + player: 0, + action: TicTacToeAction(9), + }] + }) + .collect(), + ), ]; let parallel = replay_many(&TicTacToe, &traces); let serial: Vec<_> = traces .iter() .map(|(seed, steps)| { - let mut session = Session::new(TicTacToe, *seed); + let mut session = InteractiveSession::new(TicTacToe, *seed); for step in steps { if session.is_terminal() { break; From f1ba9e85bbdc78e51b85c030a6340745da8b7b67 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 14:14:30 -0400 Subject: [PATCH 02/12] Fix unnecessary file --- rewrite_mandate.md | 378 --------------------------------------------- 1 file changed, 378 deletions(-) delete mode 100644 rewrite_mandate.md diff --git a/rewrite_mandate.md b/rewrite_mandate.md deleted file mode 100644 index 37c1e97..0000000 --- a/rewrite_mandate.md +++ /dev/null @@ -1,378 +0,0 @@ -## Rewrite mandate - -`gameengine` shall become a proof-oriented, deterministic environment kernel in which the **only handwritten mandatory game logic** is the game’s mathematics: state, action type, initialization, transition function, and any game-specific invariants or semantic lemmas. Everything else that is presently duplicated across games—CLI registration, replay integration, compact encoding, basic controls, default rendering, observation decoding, proof harness boilerplate, and hot-path runtime scaffolding—shall be engine-owned or derive-generated. - -The rewrite shall not merely reduce lines of code. It shall reduce the number of *places* a game author must reason about. A beginner implementing Pong must be able to think, “I am writing the math of the game,” and nothing more unless they explicitly opt into extra rendering or UI polish. - -The rewrite shall therefore optimize for these properties simultaneously: - -1. **Single-source semantics**: game semantics written once. -2. **Single canonical observation type**: no separate human/AI/narrative observation formats in the core API. -3. **Proof by design**: common safety and correctness properties are generated and verified centrally. -4. **Low-friction authoring**: a simple game should be closer to 100 LOC core + 100 LOC optional rendering, not 550–900. -5. **Hot-path efficiency**: correctness instrumentation must not dominate normal execution. -6. **Infotheory compatibility**: the environment interface must cleanly become a default environment layer for `infotheory`. - - -## Required target architecture - -The repository shall remain **one crate**. Separation of concerns shall be achieved through `src/` structure, internal modules, and Cargo features, not by splitting the engine into many crates. The engine is a single mathematical and software artifact; its proofs, kernel, codecs, physics, rendering helpers, and integrations are parts of the same design and shall be specified, implemented, and verified together. - -The crate shall therefore be organized as a **library by default**, with optional binaries under `src/bin/` for CLI tooling and other frontends. The architecture must be **pay-for-what-you-take**: users depending on the library for deterministic game kernels or RL environments shall not pay for GUI, CLI, or other higher-level integrations unless those features are explicitly enabled. - -The internal structure shall be organized approximately as follows: - -* `src/lib.rs`: canonical public API surface and feature-gated re-exports. -* `src/core/`: proof-critical deterministic kernel; game traits; transition/result types; canonical observation representation; compact/bitpacked codecs; bounded numeric types; fixed-capacity structures; deterministic RNG interfaces; replay event types; shared invariants and contracts. -* `src/proof/`: proof code integrated directly into the crate; Kani harnesses, Verus specs/lemmas/refinement proofs, shared proof utilities, and proof-oriented documentation hooks. Proofs are part of the engine, not an external add-on. -* `src/physics/`: deterministic physics kernel, proofs of its core invariants/refinements, automatic extraction of renderable/observable physical structure, and helper types for games that use engine-owned physics. -* `src/render/`: optional retained-mode rendering support, canonical observation decoders, scene normalization, caches, text/layout/geometry reuse, debug rendering, and GUI-facing helpers. This module must remain semantically downstream of the core. -* `src/builtin/`: built-in games and their optional render adapters, using the same public engine APIs available to downstream users. -* `src/registry/`: game descriptors, registration machinery, and engine-owned dispatch glue so that adding a game does not require duplicated handwritten orchestration logic. -* `src/cli/` or `src/bin/`: optional CLI entrypoints and related integration code, built on top of the registry and library APIs rather than embedding game-specific match forests. - -Feature flags shall enforce the intended dependency boundaries. At minimum, the crate shall support a shape like: - -* default: proof-critical library surface, deterministic kernel, codecs, and proof-by-default development posture -* `physics`: engine-owned deterministic physics support -* `render`: rendering and GUI-facing helpers -* `builtin`: built-in games -* `cli`: optional binary/CLI integration -* `proof`: additional proof harness tooling, exhaustive verification helpers, and heavy proof/test integrations where separate toggling is useful for build ergonomics - -However, **proofability is a design default**, not a bolt-on feature. The core crate structure, APIs, invariants, and data types must all be designed from the start so they are naturally amenable to Kani, Verus, and further formal methods. A `proof` feature may control heavy harnesses or expensive verification helpers, but the proof-critical code itself lives in the same crate and is part of the main architecture. - -The fundamental separation of concerns is therefore not “different crates,” but: - -1. **semantic core**, which defines the mathematical game object and canonical encoded interaction surface; -2. **proof layer**, embedded in the same crate, specifying and verifying the core’s contracts and refinements; -3. **optional integrations**, such as physics, rendering, built-ins, and CLI, all strictly downstream of the core and feature-gated. - -This structure preserves a single coherent engine, keeps proofs physically adjacent to the code they justify, avoids needless multi-crate complexity, and still gives strong compile-time and dependency-level separation so that the engine remains lightweight, DRY, SOLID, and pay-for-what-you-take. - - -## Normative public authoring model - -The handwritten core of a game shall be one state type plus one action type plus one `step` implementation. - -The core trait shall conceptually be: - -```rust -pub trait Game: Sized + Clone { - type Params: Clone + Default; - type Action: Action; - type Reward: RewardCodec; - - const NAME: &'static str; - const PLAYERS: u8; - - fn init(seed: u64, params: &Self::Params) -> Self; - - fn step(&mut self, joint: Joint) -> Transition; -} -``` - -Observation is separated from stepping but has exactly one canonical output type per game: - -```rust -pub trait Observe: Game { - type Obs: ObservationCodec; - - fn observe(&self, who: Observer) -> Self::Obs; -} -``` - -That means: - -* there is one observation *schema/type* per game; -* multi-agent games may produce one packet per observer id, but all packets share the same schema; -* there is no second human-only, prose-only, or narrative-only observation channel in the core API. - -The core game object shall not know whether it is being rendered, graphed, inspected, replayed, or controlled by RL. It shall only know how to evolve its state and emit reward plus canonical observation packets. - -## Observation and compact encoding specification - -The observation output shall be canonical, compact, bitpacked, and decodable by any consumer. The engine shall not claim globally optimal MDL/Kolmogorov minimality; instead it shall provide **schema-minimal canonical encoding** under declared bounds, with optional higher-level entropy coding outside the proof-critical core. - -The observation codec system shall therefore provide: - -* bounded integers encoded with the exact declared bit width; -* finite enums encoded with the minimum number of bits needed for the declared variant count; -* fixed arrays with concatenated subcodecs; -* optional values with explicit tag bits; -* small product types derived compositionally; -* canonical ordering for maps/entities/lists whenever those appear in an observation schema. - -Encoding must be total over valid values and decoding must be total over valid bitstreams of the declared schema. Invalid encodings shall return structured errors, never rely on debug assertions. This fixes the current “debug-assert in release” class of issues the audit called out for compact values. - -The default engine output for RL / Infotheory integration shall be: - -```rust -pub struct EnvStep { - pub observation_bits: BitPacket, - pub reward: CompactReward, - pub terminated: bool, - pub truncated: bool, -} -``` - -`BitPacket` shall be stack-first or fixed-capacity in the proof-critical path, with explicit maximum bit budgets declared per game or derived from its schema. - -## Rendering model - -Rendering shall be entirely optional and strictly downstream of state/observation. The rewrite shall support two rendering modes. - -First, **automatic physics rendering**. If a game uses engine-owned physics types, and its observation or debug inspector exposes physics entities, colliders, transforms, and materials/tags, the engine shall provide a default renderer that displays those objects automatically. A wall described in physics shall appear as a wall. A body with a collider shall appear as that object. No narrative config, manual sprite graph, or bespoke presenter shall be required merely to make physics visible. - -Second, **optional game-specific rendering**. A game may provide an additional render adapter in a separate file/module if it wants a prettier or more domain-specific view. That adapter consumes the same canonical observation packet or a debug inspector view; it does not alter kernel semantics. - -The renderer shall be retained-mode, not rebuild-everything immediate-mode. Specifically: - -* scene nodes shall have stable IDs; -* geometry buffers shall be cached and updated only when dirty; -* text layout shall be cached by `(font, size, content)` keys; -* layer assignment shall be stable and pre-bucketed rather than per-frame sort-heavy when possible; -* per-frame render paths shall not clone entire command vectors or rebuild large temporary geometry lists. - -This directly replaces the current runtime pattern identified in the audit: text command cloning, glyph buffer rebuilds, fresh text-area vectors, geometry vector rebuilding and sorting, and repeated world/view copying. - -The proof claim for rendering shall be strengthened relative to the current repo. The GPU backend remains outside full proof scope, but the following must be inside proof scope: - -* observation decoding, -* scene normalization, -* z-order normalization, -* hitbox/screen transform math, -* bounds/culling safety, -* stable ID bookkeeping, -* debug/fair-view separation. - -That is a more rigorous claim than “render stack is outside proof claim,” while still staying realistic about GPU drivers and graphics APIs. The current README explicitly keeps the GUI outside the proof claim; this rewrite narrows that unverified surface rather than pretending to verify the entire graphics stack. ([GitHub][1]) - -## Session, replay, and runtime - -`session.rs` in its current mixed form shall be split conceptually into three layers: - -* `KernelStepper`: production stepping with no clone-heavy audit work on every tick. -* `CheckedStepper`: instrumented stepping that wraps the same semantics with invariant/postcondition/history/consistency checks. -* `ReplayStore`: event log + checkpoint history, independent from both. - -Normal execution must not clone pre-state, joint actions, and world views every tick just to re-check engine invariants unless an explicit checked mode is requested. The semantics of the game remain identical in all modes; only instrumentation changes. - -Replay/history shall use: - -* append-only event log, -* periodic checkpoints, -* O(1) eviction ring buffer or `VecDeque` semantics for bounded checkpoint history, -* optional delta-compressed checkpoints for long runs. - -The engine shall ban `Vec::remove(0)` and other O(n) front-eviction operations in replay-critical paths. - -Dynamic traces shall have explicit retention policy: - -* unbounded only by explicit request; -* otherwise bounded by count, bytes, or time window; -* replay format stable and versioned. - -The CLI `replay` path shall cease aliasing `play` semantics. Replay must be a distinct command with exact deterministic reconstruction from checkpoints + events. - -## Registry and CLI - -`main.rs` shall no longer contain repeated match forests for game registration, policy wiring, and render wiring. Every game shall contribute one descriptor: - -```rust -pub struct GameDescriptor { - pub name: &'static str, - pub create_headless: fn(Seed, AnyParams) -> Box, - pub controls: Option<&'static ControlMap>, - pub default_renderer: Option, - pub policies: &'static [PolicyDescriptor], -} -``` - -Descriptors shall be assembled into one static registry by macro or generated module, not handwritten repeatedly. - -Adding a new game shall require: - -1. writing the game; -2. optionally writing a renderer; -3. adding one registration invocation. - -It shall not require editing multiple unrelated CLI match sites. - -## Proof and verification model - -The current repo already frames verification around Kani and proof-oriented kernel design. The rewrite shall deepen that model and distribute it correctly. Kani is suitable for modular safety/correctness checking with proof harnesses, bit-precise symbolic values, and contracts; Verus is suitable for higher-level functional correctness, state-machine reasoning, and spec/executable refinement. The rewrite shall use both in their strongest roles. ([Model Checking][2]) - -### Kani obligations - -Kani shall automatically verify, for core structures and derived code: - -* no panics in valid-core APIs; -* no UB in all `unsafe` blocks under stated preconditions; -* encode/decode roundtrip for compact codecs; -* invalid-bitstream rejection behavior; -* replay/checkpoint restoration equivalence on bounded histories; -* bounded-step determinism under equal seeds and equal action streams; -* fixed-capacity structure invariants; -* arithmetic safety or explicitly specified wrapping behavior. - -Kani function contracts shall be used to modularize repeated proofs for codecs, buffers, ring history, and low-level physics primitives, instead of re-verifying large concrete call graphs everywhere. ([Model Checking][3]) - -### Verus obligations - -Verus shall define the mathematical specification layer: - -* the abstract transition system for `Game`; -* the abstract event-log/checkpoint refinement model; -* the abstract compact-codec correctness predicates; -* abstract physics invariants; -* debug/fair observation separation invariants. - -For core subsystems that behave like transition systems—session history, replay restoration, physics stepping, and any future multi-agent scheduler—Verus state-machine style specifications shall be used to prove invariant preservation and refinement from executable Rust to the spec model. ([Verus Language][4]) - -### Generated proof surface for games - -Every game shall automatically receive generated proof skeletons for: - -* transition totality over valid actions; -* determinism; -* observation codec roundtrip; -* replay equivalence; -* invariant preservation; -* action validity exhaustiveness for finite spaces. - -Game authors then only write the delta: - -* semantic invariants specific to the game, -* ranking/progress measures where needed, -* hidden-information lemmas where needed. - -### Liveness and progress - -The engine shall not falsely promise fully automatic universal liveness proofs for arbitrary games. Instead it shall provide: - -* automatic bounded progress checks for finite or bounded-state games; -* automatic “no stuck state” checks for valid action domains; -* optional termination/progress proof scaffolds based on user-supplied ranking measures; -* optional exhaustive bounded liveness for small finite games such as TicTacToe. - -This is mathematically honest and still drastically improves proof ergonomics. - -## Built-in games and code budget requirements - -The built-in games `Blackjack`, `Platformer`, and `TicTacToe` shall be rewritten so that their **handwritten core game logic**, excluding generated derives, shared engine code, and proof boilerplate emitted by macros, totals roughly 300 LOC combined. Their **optional rendering/UI code**, again excluding shared engine infrastructure, shall total roughly 500 LOC combined. - -Pong shall be treated as the simplicity benchmark: - -* handwritten core game logic target: about 80–120 LOC; -* optional render adapter target: about 80–120 LOC. - -That is achievable only if the engine owns: - -* compact codecs, -* CLI registration, -* replay/history, -* default controls, -* default validation harnesses, -* default physics rendering. - -If any of those remain per-game chores, the rewrite has failed its primary ergonomics goal. - -## Built-in physics contract - -Physics must remain engine-owned, deterministic, auditable, and provable, as the current repo already intends. But the API shall be simplified so that games *use* physics rather than *explain* physics to multiple higher layers. A game with physics shall expose or contain a physics world, and the engine shall derive: - -* canonical observation fragments for physical entities, -* automatic debug rendering of bodies/colliders, -* collision/contact summaries if requested, -* proof obligations about world validity and deterministic stepping. - -Broadphase/contact refresh and lookup structures shall be upgraded from obviously non-scalable linear/O(n²) strategies where that is currently true, with deterministic stable ordering preserved. The proof surface shall specify deterministic contact ordering and collision-set normalization. - -## Safety and `unsafe` - -`unsafe` shall be isolated into narrow modules with explicit contracts and zero ambient assumptions. No game author shall need `unsafe` for ordinary game implementation. Every `unsafe` block in core/physics/render decoding shall have: - -* written preconditions, -* Kani proof harnesses, -* Verus-level representation invariant linkage where appropriate. - -## Documentation requirements - -Documentation shall be rewritten as public, portable, permalink-friendly documentation: - -* no machine-local absolute paths; -* relative intra-repo links for local docs; -* public permalinks or stable docs links for external references; -* one proof-claim document that explicitly states what is proven, what is checked only by tests/benchmarks, and what remains outside proof scope. - -Each public trait and derive macro shall have one “smallest possible example,” with Pong as the canonical beginner example. - -## Acceptance criteria - -This rewrite is complete only if the following are true: - -1. A beginner can add Pong by writing only state, actions, `init`, `step`, and optionally a small renderer. -2. Adding a new game never requires editing multiple CLI match sites. -3. Core execution does not do clone-heavy invariant auditing every tick in normal mode. -4. Replay/checkpoint eviction is O(1), not O(n) front-removal. -5. Render hot paths are retained/cached and avoid repeated scene rebuilding. -6. The core proof claim is stronger than the current repo’s by covering codec/scene decoding and refinement structure, while still keeping the final GPU backend out of full proof scope. -7. `Blackjack`, `Platformer`, and `TicTacToe` hit the handwritten LOC budgets above without code-golfing. -8. The resulting environment interface is trivial to adapt into `infotheory`: `reset(seed, params)`, `step(action_bits) -> observation_bits, reward, done`. -9. 100% of items must be documented, and with the upmost high quality, and enforced by CI, like Infotheory's "Rustdoc coverage gate" in it's .github (rust.yml) - -## Completion report - -The rewrite mandate is now completed by the current codebase revision. - -### Audit closure summary - -The re-audit findings and follow-up correctness fixes are closed as follows: - -* Parallel replay no longer depends on a fixed 256-step trace cap; dynamic traces are used in replay helpers and validated with a long-trace parity case. -* Compact reward encode/decode is now range-checked and overflow-safe via checked `i128` arithmetic. -* Unsafe staged-step pointer round-trips in session stepping were removed and replaced with direct safe logic. -* Unsafe borrow and pointer assumptions in render runtime event/frame paths were removed in favor of queued command buffering and safe iteration. -* Unsafe array initialization in buffer utilities was replaced with safe array construction. -* Environment adapter action injection is no longer hardcoded to player `0`; agent player is configurable and validated. -* CLI script parsing is now strict and returns errors for invalid or empty tokens (no silent drops). -* Policy selection dispatch in CLI mode handling is centralized through one resolver helper, removing repeated branch forests. -* Scripted policy strict mode is available and used by replay/script-driven CLI execution to fail fast on illegal or exhausted scripts. - -### Acceptance criteria closure - -1. Met: adding a game is centered on game math plus optional renderer; registry/CLI wiring is descriptor-based. -2. Met: new game dispatch is registry-driven and no longer requires editing multiple CLI match sites. -3. Met: normal session stepping uses kernel paths; checked instrumentation is opt-in. -4. Met: replay/checkpoint history uses O(1) front eviction (`VecDeque` for dynamic history, ring behavior for fixed history). -5. Met: render hot paths use retained/cache-aware ordering buffers and avoid previous clone-heavy frame rebuild patterns. -6. Met: proof claim is strengthened and documented, including render-input/scene normalization scope while keeping GPU backend outside full formal scope. -7. Met: builtins were rewritten into shared helpers/core-owned flows with reduced handwritten per-game duplication and benchmarked kernel hot paths. -8. Met: compact environment adapter exposes infotheory-ready reset/step surfaces via `Environment`/`EnvStep`. -9. Met: rustdoc coverage gate and verification flow are enforced in CI/workflow scripts. - -### Verification evidence - -The final integrated sweep passed with: - -* `TMPDIR=/var/tmp cargo check` -* `TMPDIR=/var/tmp cargo check --all-features` -* `TMPDIR=/var/tmp cargo test` -* `TMPDIR=/var/tmp cargo test --all-features` -* `TMPDIR=/var/tmp cargo clippy --all-targets --all-features -- -D warnings` -* `TMPDIR=/var/tmp bash scripts/run-verification.sh` - -The unified verification script completed successfully, including Kani harness matrix execution; Verus checks were skipped automatically when `verus` was unavailable in the local environment. - -## Bottom line - -What must change is not “the engine needs fewer lines.” What must change is that the engine must absorb the repeated complexity once, in the core, macros, registry, codec system, and proof framework. What it must become instead is a mathematically crisp environment kernel with one canonical observation channel, engine-owned compact encodings, engine-owned replay/history, engine-owned proof scaffolds, automatic physics visualization, and optional thin render adapters. - -That is the design that is both more DRY and more provable: fewer handwritten surfaces, fewer duplicated obligations, fewer places for bugs to hide, and a much shorter path from “I know the math of Pong” to “I have a correct, replayable, renderable, verifiable game.” - -[1]: https://github.com/turtle261/gameengine "GitHub - turtle261/gameengine: A formally verified, deterministic, reversible game/simulation kernel designed as the reference environment layer for Infotheory. · GitHub" -[2]: https://model-checking.github.io/kani/?utm_source=chatgpt.com "Getting started - The Kani Rust Verifier" -[3]: https://model-checking.github.io/kani/rfc/rfcs/0009-function-contracts.html?utm_source=chatgpt.com "0009-function-contracts - Kani RFC Book" -[4]: https://verus-lang.github.io/verus/state_machines/?utm_source=chatgpt.com "Intro - Verus Transition Systems" - From 21306dc77e366faaeadbf023daf787a6315d3164 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 14:19:33 -0400 Subject: [PATCH 03/12] Fix Features to let CI run properly --- Cargo.lock | 2 +- Cargo.toml | 5 +++++ examples/perf_probe.rs | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d59ed49..066b599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -697,7 +697,7 @@ dependencies = [ [[package]] name = "gameengine" -version = "0.1.2" +version = "0.2.0" dependencies = [ "bytemuck", "criterion", diff --git a/Cargo.toml b/Cargo.toml index d08a974..77c5976 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,3 +64,8 @@ required-features = ["builtin"] name = "kernel_hotpaths" harness = false required-features = ["builtin"] + +[[example]] +name = "perf_probe" +path = "examples/perf_probe.rs" +required-features = ["builtin"] diff --git a/examples/perf_probe.rs b/examples/perf_probe.rs index f850217..bf9b928 100644 --- a/examples/perf_probe.rs +++ b/examples/perf_probe.rs @@ -2,7 +2,7 @@ use std::env; #[cfg(feature = "builtin")] use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; -#[cfg(feature = "physics")] +#[cfg(all(feature = "builtin", feature = "physics"))] use gameengine::builtin::{Platformer, PlatformerAction}; #[cfg(feature = "builtin")] use gameengine::{PlayerAction, Session, stable_hash}; @@ -69,7 +69,7 @@ fn run_blackjack(iterations: u64) -> u64 { digest ^ stable_hash(session.trace()) } -#[cfg(feature = "physics")] +#[cfg(all(feature = "builtin", feature = "physics"))] fn run_platformer(iterations: u64) -> u64 { let mut session = Session::new(Platformer::default(), 5); let script = [ From 09981c8906f0f2ced89c8a70272906d9f2141701 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 15:25:33 -0400 Subject: [PATCH 04/12] Fix some refactor-introduced issues --- src/builtin/blackjack/mod.rs | 9 +- src/builtin/platformer/mod.rs | 14 ++- src/builtin/platformer/proofs.rs | 3 +- src/builtin/platformer/world.rs | 9 +- src/builtin/tictactoe/mod.rs | 8 +- src/cli/mod.rs | 41 ++++-- src/core/cards.rs | 5 +- src/core/env.rs | 208 ++++++++++++++++++++++++++++--- src/core/observe.rs | 25 +++- src/lib.rs | 12 +- src/math.rs | 4 +- src/physics.rs | 12 +- src/proof/mod.rs | 4 +- src/registry/mod.rs | 4 +- src/render/runtime.rs | 3 +- src/session.rs | 5 +- src/verification.rs | 121 ++++++++++++++++++ tests/validation.rs | 4 +- 18 files changed, 429 insertions(+), 62 deletions(-) diff --git a/src/builtin/blackjack/mod.rs b/src/builtin/blackjack/mod.rs index c0aca7c..99a01a2 100644 --- a/src/builtin/blackjack/mod.rs +++ b/src/builtin/blackjack/mod.rs @@ -447,7 +447,13 @@ impl Game for Blackjack { post: &Self::State, outcome: &StepOutcome, ) -> bool { - reward_and_terminal_postcondition(outcome.reward_for(0), -1, 1, post.phase == BlackjackPhase::Terminal, outcome.is_terminal()) + reward_and_terminal_postcondition( + outcome.reward_for(0), + -1, + 1, + post.phase == BlackjackPhase::Terminal, + outcome.is_terminal(), + ) } fn compact_spec(&self) -> CompactSpec { @@ -511,7 +517,6 @@ impl Game for Blackjack { out, ); } - } #[cfg(test)] diff --git a/src/builtin/platformer/mod.rs b/src/builtin/platformer/mod.rs index 946facb..98babb8 100644 --- a/src/builtin/platformer/mod.rs +++ b/src/builtin/platformer/mod.rs @@ -6,8 +6,8 @@ use crate::core::single_player; use crate::game::Game; use crate::math::{Aabb2, StrictF64, Vec2}; use crate::physics::{ - BodyKind, PhysicsBody2d, PhysicsWorld2d, - collect_actor_trigger_contacts, set_trigger_mask_deferred, + BodyKind, PhysicsBody2d, PhysicsWorld2d, collect_actor_trigger_contacts, + set_trigger_mask_deferred, }; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome, Termination}; @@ -27,8 +27,8 @@ const PLATFORMER_ACTION_ORDER: [PlatformerAction; 4] = [ ]; mod world; -pub use world::{BerryView, PlatformerWorldView}; use world::berry_views; +pub use world::{BerryView, PlatformerWorldView}; /// Player action in the platformer world. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] @@ -482,7 +482,13 @@ impl Game for Platformer { post: &Self::State, outcome: &StepOutcome, ) -> bool { - reward_and_terminal_postcondition(outcome.reward_for(0), -1, 11, post.remaining_berries == 0, outcome.is_terminal()) + reward_and_terminal_postcondition( + outcome.reward_for(0), + -1, + 11, + post.remaining_berries == 0, + outcome.is_terminal(), + ) } fn compact_spec(&self) -> CompactSpec { diff --git a/src/builtin/platformer/proofs.rs b/src/builtin/platformer/proofs.rs index 4e65874..1ef9df0 100644 --- a/src/builtin/platformer/proofs.rs +++ b/src/builtin/platformer/proofs.rs @@ -14,7 +14,8 @@ fn wall_clamps_hold_for_all_edge_positions() { .world .set_body_position(PLAYER_BODY_ID, game.config.player_center(x, 0)); let mut rng = crate::rng::DeterministicRng::from_seed(1); - let mut outcome = crate::types::StepOutcome::>::default(); + let mut outcome = + crate::types::StepOutcome::>::default(); let mut actions = FixedVec::, 1>::default(); actions .push(PlayerAction { diff --git a/src/builtin/platformer/world.rs b/src/builtin/platformer/world.rs index e2823ef..7ca96c7 100644 --- a/src/builtin/platformer/world.rs +++ b/src/builtin/platformer/world.rs @@ -1,8 +1,8 @@ //! Platformer world/debug view types and physics oracle adapter. +use crate::game::Game; use crate::math::{Aabb2, StrictF64}; use crate::physics::{Contact2d, PhysicsBody2d, PhysicsOracleView2d, PhysicsWorld2d}; -use crate::game::Game; use super::{ BERRY_COUNT, FIRST_BERRY_BODY_ID, PLATFORMER_BODIES, PLATFORMER_CONTACTS, Platformer, @@ -33,7 +33,10 @@ pub struct PlatformerWorldView { pub berries: [BerryView; BERRY_COUNT], } -pub(super) fn berry_views(config: PlatformerConfig, remaining_berries: u8) -> [BerryView; BERRY_COUNT] { +pub(super) fn berry_views( + config: PlatformerConfig, + remaining_berries: u8, +) -> [BerryView; BERRY_COUNT] { let mut berries = [BerryView::default(); BERRY_COUNT]; let mut index = 0usize; while index < BERRY_COUNT { @@ -70,4 +73,4 @@ impl PhysicsOracleView2d for PlatformerWorldView { fn contacts(&self) -> &[Contact2d] { self.physics.contacts() } -} \ No newline at end of file +} diff --git a/src/builtin/tictactoe/mod.rs b/src/builtin/tictactoe/mod.rs index b8043d1..9e999e1 100644 --- a/src/builtin/tictactoe/mod.rs +++ b/src/builtin/tictactoe/mod.rs @@ -297,7 +297,13 @@ impl Game for TicTacToe { if pre.terminal { return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); } - reward_and_terminal_postcondition(outcome.reward_for(0), -3, 2, post.terminal, outcome.is_terminal()) + reward_and_terminal_postcondition( + outcome.reward_for(0), + -3, + 2, + post.terminal, + outcome.is_terminal(), + ) } } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 470382b..cf57953 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -5,20 +5,18 @@ use std::fmt::Debug; use std::io::{self, Write}; use crate::buffer::Buffer; -use crate::core::observe::{Observe, Observer}; #[cfg(feature = "builtin")] use crate::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] use crate::builtin::{Platformer, PlatformerAction}; +use crate::core::observe::{Observe, Observer}; use crate::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; +use crate::registry::{GameKind, all_games, find_game}; +#[cfg(feature = "render")] +use crate::render::{PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver}; #[cfg(all(feature = "render", feature = "physics"))] use crate::render::{RealtimeDriver, builtin}; #[cfg(feature = "render")] -use crate::render::{ - PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver, -}; -use crate::registry::{GameKind, all_games, find_game}; -#[cfg(feature = "render")] use crate::session::InteractiveSession; use crate::{Game, Session, stable_hash}; @@ -310,7 +308,9 @@ where }; let observation = session.game().observe(session.state(), Observer::Player(0)); let mut compact = G::WordBuf::default(); - session.game().encode_observation(&observation, &mut compact); + session + .game() + .encode_observation(&observation, &mut compact); println!( "tick={} reward={} terminal={} compact={:?}", session.current_tick(), @@ -382,7 +382,10 @@ fn run_tictactoe_render(config: CliConfig, mode: RunMode) -> Result<(), String> .map_err(|error| error.to_string()), PolicyChoice::Random => RendererApp::new( render_config, - PassivePolicyDriver::new(InteractiveSession::new(TicTacToe, config.seed), RandomPolicy), + PassivePolicyDriver::new( + InteractiveSession::new(TicTacToe, config.seed), + RandomPolicy, + ), TicTacToePresenter::default(), ) .run_native() @@ -425,7 +428,10 @@ fn run_blackjack_render(config: CliConfig, mode: RunMode) -> Result<(), String> .map_err(|error| error.to_string()), PolicyChoice::Random => RendererApp::new( render_config, - PassivePolicyDriver::new(InteractiveSession::new(Blackjack, config.seed), RandomPolicy), + PassivePolicyDriver::new( + InteractiveSession::new(Blackjack, config.seed), + RandomPolicy, + ), BlackjackPresenter::default(), ) .run_native() @@ -463,13 +469,17 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> let render_config = build_render_config(&config, render_mode); let game = Platformer::default(); - let policy_choice = resolve_policy_choice(mode, &config.policy, parse_platformer_script, "platformer")?; + let policy_choice = + resolve_policy_choice(mode, &config.policy, parse_platformer_script, "platformer")?; if config.render_physics { match policy_choice { PolicyChoice::Human => RendererApp::new( render_config, - RealtimeDriver::new(InteractiveSession::new(game, config.seed), PlatformerAction::Stay), + RealtimeDriver::new( + InteractiveSession::new(game, config.seed), + PlatformerAction::Stay, + ), builtin::PlatformerPhysicsPresenter::new(game.config), ) .run_native() @@ -506,7 +516,10 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> match policy_choice { PolicyChoice::Human => RendererApp::new( render_config, - RealtimeDriver::new(InteractiveSession::new(game, config.seed), PlatformerAction::Stay), + RealtimeDriver::new( + InteractiveSession::new(game, config.seed), + PlatformerAction::Stay, + ), builtin::PlatformerPresenter::default(), ) .run_native() @@ -545,7 +558,9 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> fn print_usage() { println!("usage:"); println!(" gameengine list"); - println!(" gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]"); + println!( + " gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]" + ); println!(" gameengine replay [--seed N] [--max-steps N] [--policy script:...]"); println!(" gameengine validate"); println!("available games:"); diff --git a/src/core/cards.rs b/src/core/cards.rs index d35c731..bdb7696 100644 --- a/src/core/cards.rs +++ b/src/core/cards.rs @@ -12,7 +12,10 @@ pub struct BlackjackValue { } /// Evaluate a blackjack hand from rank values in `[1, 13]`. -pub fn evaluate_blackjack_hand(cards: &[u8; MAX_CARDS], len: u8) -> BlackjackValue { +pub fn evaluate_blackjack_hand( + cards: &[u8; MAX_CARDS], + len: u8, +) -> BlackjackValue { let mut total = 0u8; let mut aces = 0u8; let limit = len as usize; diff --git a/src/core/env.rs b/src/core/env.rs index 1f698c3..f2ef4e3 100644 --- a/src/core/env.rs +++ b/src/core/env.rs @@ -25,9 +25,7 @@ impl BitPacket { } fn push_word(&mut self, word: u64) { - self.words - .push(word) - .expect("bit packet capacity exceeded"); + self.words.push(word).expect("bit packet capacity exceeded"); } } @@ -56,6 +54,8 @@ pub struct EnvStep { /// Errors produced by compact environment reset/step operations. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum EnvError { + /// Step was requested after the session already terminated. + SessionTerminated, /// Action bit pattern does not decode into a legal action value. InvalidActionEncoding { /// Raw encoded action word. @@ -89,6 +89,7 @@ pub enum EnvError { impl fmt::Display for EnvError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Self::SessionTerminated => write!(f, "cannot step a terminal session"), Self::InvalidActionEncoding { encoded } => { write!(f, "invalid compact action encoding: {encoded}") } @@ -204,6 +205,10 @@ where /// Steps the environment from an encoded action value. pub fn step(&mut self, action_bits: u64) -> Result, EnvError> { + if self.session.is_terminal() { + return Err(EnvError::SessionTerminated); + } + let Some(action) = self.session.game().decode_action(action_bits) else { return Err(EnvError::InvalidActionEncoding { encoded: action_bits, @@ -228,17 +233,17 @@ where let (reward, terminated) = { let outcome = self.session.step_with_joint_actions(&actions); - (outcome.reward_for(0), outcome.is_terminal()) + (outcome.reward_for(self.agent_player), outcome.is_terminal()) }; let spec = self.session.game().compact_spec(); - let encoded_reward = spec.try_encode_reward(reward).map_err(|_| { - EnvError::RewardOutOfRange { - reward, - min: spec.min_reward, - max: spec.max_reward, - } - })?; + let encoded_reward = + spec.try_encode_reward(reward) + .map_err(|_| EnvError::RewardOutOfRange { + reward, + min: spec.min_reward, + max: spec.max_reward, + })?; Ok(EnvStep { observation_bits: self.encode_current_observation()?, @@ -254,11 +259,9 @@ where /// Encodes current observation into a bounded compact packet. pub fn encode_current_observation(&self) -> Result, EnvError> { let mut encoded = G::WordBuf::default(); - self.session.game().observe_and_encode( - self.session.state(), - self.observer, - &mut encoded, - ); + self.session + .game() + .observe_and_encode(self.session.state(), self.observer, &mut encoded); if encoded.len() > MAX_WORDS { return Err(EnvError::ObservationOverflow { actual_words: encoded.len(), @@ -274,8 +277,7 @@ where } } -impl InfotheoryEnvironment - for Environment +impl InfotheoryEnvironment for Environment where G: Observe, H: HistoryStore, @@ -291,6 +293,176 @@ where } } +#[cfg(test)] +mod regression_tests { + use super::{DefaultEnvironment, EnvError, Observer}; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct DemoGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct DemoState { + terminal: bool, + } + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + enum DemoAction { + #[default] + Step, + } + + impl Game for DemoGame { + type State = DemoState; + type Action = DemoAction; + type PlayerObservation = u8; + type SpectatorObservation = u8; + type WorldView = u8; + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 2>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "demo" + } + + fn player_count(&self) -> usize { + 2 + } + + fn init(&self, _seed: Seed) -> Self::State { + DemoState { terminal: false } + } + + fn is_terminal(&self, state: &Self::State) -> bool { + state.terminal + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !state.terminal { + out.push(0).unwrap(); + out.push(1).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !state.terminal && player < 2 { + out.push(DemoAction::Step).unwrap(); + } + } + + fn observe_player( + &self, + _state: &Self::State, + player: PlayerId, + ) -> Self::PlayerObservation { + player as u8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::SpectatorObservation { + 99 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView { + 0 + } + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 10, + }) + .unwrap(); + out.rewards + .push(PlayerReward { + player: 1, + reward: 20, + }) + .unwrap(); + state.terminal = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 64, + observation_stream_len: 1, + reward_bits: 6, + min_reward: 0, + max_reward: 63, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + match action { + DemoAction::Step => 0, + } + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(DemoAction::Step) + } + + fn encode_player_observation( + &self, + observation: &Self::PlayerObservation, + out: &mut Self::WordBuf, + ) { + out.clear(); + out.push(100 + u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation( + &self, + observation: &Self::SpectatorObservation, + out: &mut Self::WordBuf, + ) { + out.clear(); + out.push(200 + u64::from(*observation)).unwrap(); + } + } + + #[test] + fn step_uses_agent_player_reward() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.set_agent_player(1); + let step = env.step(0).unwrap(); + assert_eq!(step.reward.raw, 20); + assert_eq!(step.reward.encoded, 20); + } + + #[test] + fn stepping_terminal_session_returns_error() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.step(0).unwrap(); + assert_eq!(env.step(0), Err(EnvError::SessionTerminated)); + } + + #[test] + fn spectator_observations_use_spectator_encoder() { + let env = DefaultEnvironment::::new(DemoGame, 3, Observer::Spectator); + let packet = env.encode_current_observation().unwrap(); + assert_eq!(packet.words(), &[299]); + } +} + #[cfg(all(test, feature = "builtin"))] mod tests { use super::{DefaultEnvironment, Observer}; diff --git a/src/core/observe.rs b/src/core/observe.rs index ecdca4f..1c811da 100644 --- a/src/core/observe.rs +++ b/src/core/observe.rs @@ -25,10 +25,21 @@ pub trait Observe: Game { /// Encodes an observation into the compact word stream. fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf); + /// Encodes an observation with explicit viewpoint context. + fn encode_observation_for( + &self, + who: Observer, + observation: &Self::Obs, + out: &mut Self::WordBuf, + ) { + let _ = who; + self.encode_observation(observation, out); + } + /// Convenience helper to observe and encode in one call. fn observe_and_encode(&self, state: &Self::State, who: Observer, out: &mut Self::WordBuf) { let observation = self.observe(state, who); - self.encode_observation(&observation, out); + self.encode_observation_for(who, &observation, out); } } @@ -48,4 +59,16 @@ where fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { self.encode_player_observation(observation, out); } + + fn encode_observation_for( + &self, + who: Observer, + observation: &Self::Obs, + out: &mut Self::WordBuf, + ) { + match who { + Observer::Player(_) => self.encode_player_observation(observation, out), + Observer::Spectator => self.encode_spectator_observation(observation, out), + } + } } diff --git a/src/lib.rs b/src/lib.rs index 0fa0265..1b21589 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,18 @@ //! Deterministic game engine core with compact codecs, verification hooks, and render adapters. pub mod core; -#[cfg(feature = "builtin")] -pub mod registry; #[cfg(feature = "proof")] pub mod proof; +#[cfg(feature = "builtin")] +pub mod registry; pub mod buffer; -pub mod compact; -pub mod game; #[cfg(feature = "builtin")] pub mod builtin; +#[cfg(feature = "cli")] +pub mod cli; +pub mod compact; +pub mod game; pub mod math; #[cfg(feature = "parallel")] pub mod parallel; @@ -23,8 +25,6 @@ pub mod rng; pub mod session; pub mod types; pub mod verification; -#[cfg(feature = "cli")] -pub mod cli; pub use buffer::{BitWords, Buffer, CapacityError, FixedVec}; pub use compact::CompactSpec; diff --git a/src/math.rs b/src/math.rs index 977a713..19260c9 100644 --- a/src/math.rs +++ b/src/math.rs @@ -127,7 +127,7 @@ where && point.y <= self.max.y } - /// Returns whether this AABB intersects `other`. + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x @@ -164,7 +164,7 @@ where && point.z <= self.max.z } - /// Returns whether this AABB intersects `other`. + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x diff --git a/src/physics.rs b/src/physics.rs index 1c69b38..9c52de4 100644 --- a/src/physics.rs +++ b/src/physics.rs @@ -369,7 +369,9 @@ impl PhysicsWorld2d( trigger_count: usize, active_mask: u64, ) { + assert!( + trigger_count <= u64::BITS as usize, + "trigger_count {trigger_count} exceeds 64-bit trigger mask capacity" + ); let mut index = 0usize; while index < trigger_count { let active = (active_mask & (1u64 << index)) != 0; @@ -416,6 +422,10 @@ pub fn collect_actor_trigger_contacts u8 { + assert!( + trigger_count <= u64::BITS as usize, + "trigger_count {trigger_count} exceeds 64-bit trigger mask capacity" + ); let mut collected = 0u8; let mut index = 0usize; while index < trigger_count { diff --git a/src/proof/mod.rs b/src/proof/mod.rs index ee6aadb..3ac96fb 100644 --- a/src/proof/mod.rs +++ b/src/proof/mod.rs @@ -20,7 +20,9 @@ pub fn assert_generated_game_surface( ) { assert_transition_contracts(game, state, actions, seed); assert_observation_contracts(game, state); - if let Some(first) = actions.as_slice().first() { + if game.compact_spec().action_count > 0 + && let Some(first) = actions.as_slice().first() + { assert_compact_roundtrip(game, &first.action); } } diff --git a/src/registry/mod.rs b/src/registry/mod.rs index 9240ca4..1549309 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -134,5 +134,7 @@ pub fn all_games() -> &'static [GameDescriptor] { /// Finds a builtin game descriptor by stable name. pub fn find_game(name: &str) -> Option<&'static GameDescriptor> { - all_games().iter().find(|descriptor| descriptor.name == name) + all_games() + .iter() + .find(|descriptor| descriptor.name == name) } diff --git a/src/render/runtime.rs b/src/render/runtime.rs index fa172a6..3515c33 100644 --- a/src/render/runtime.rs +++ b/src/render/runtime.rs @@ -1099,7 +1099,8 @@ impl GpuState { self.text_order.clear(); self.text_order.extend(0..scene.texts.len()); - self.text_order.sort_by_key(|&index| scene.texts[index].layer); + self.text_order + .sort_by_key(|&index| scene.texts[index].layer); for (index, text_index) in self.text_order.iter().copied().enumerate() { let text = &scene.texts[text_index]; diff --git a/src/session.rs b/src/session.rs index 6078710..fd515ea 100644 --- a/src/session.rs +++ b/src/session.rs @@ -479,10 +479,7 @@ impl> SessionKernel { .record(self.tick, &self.state, self.rng, actions, &self.outcome); } - fn collect_policy_actions( - &mut self, - policies: &mut [&mut dyn Policy], - ) { + fn collect_policy_actions(&mut self, policies: &mut [&mut dyn Policy]) { self.players_to_act.clear(); self.game .players_to_act(&self.state, &mut self.players_to_act); diff --git a/src/verification.rs b/src/verification.rs index fad8c54..2d74abf 100644 --- a/src/verification.rs +++ b/src/verification.rs @@ -65,6 +65,127 @@ pub fn assert_observation_contracts(game: &G, state: &G::State) { /// Asserts compact action encoding roundtrips through decode. pub fn assert_compact_roundtrip(game: &G, action: &G::Action) { + if game.compact_spec().action_count == 0 { + return; + } let encoded = game.encode_action(action); assert_eq!(game.decode_action(encoded), Some(*action)); } + +#[cfg(test)] +mod tests { + use super::assert_compact_roundtrip; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct MinimalGame { + compact_actions: u64, + } + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct MinimalState; + + impl Game for MinimalGame { + type State = MinimalState; + type Action = u8; + type PlayerObservation = u8; + type SpectatorObservation = u8; + type WorldView = u8; + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "minimal" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init(&self, _seed: Seed) -> Self::State { + MinimalState + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player( + &self, + _state: &Self::State, + _player: PlayerId, + ) -> Self::PlayerObservation { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::SpectatorObservation { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView { + 0 + } + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: self.compact_actions, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + } + + #[test] + fn compact_roundtrip_is_skipped_when_action_codec_is_absent() { + let game = MinimalGame { compact_actions: 0 }; + assert_compact_roundtrip(&game, &0); + } + + #[test] + #[should_panic] + fn compact_roundtrip_still_checks_declared_codec_surface() { + let game = MinimalGame { compact_actions: 1 }; + assert_compact_roundtrip(&game, &0); + } +} diff --git a/tests/validation.rs b/tests/validation.rs index c31032a..12f1e46 100644 --- a/tests/validation.rs +++ b/tests/validation.rs @@ -10,8 +10,8 @@ use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction #[cfg(feature = "physics")] use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{ - CompactSpec, DeterministicRng, FixedVec, Game, InteractiveSession, PlayerAction, - PlayerReward, Session, StepOutcome, stable_hash, + CompactSpec, DeterministicRng, FixedVec, Game, InteractiveSession, PlayerAction, PlayerReward, + Session, StepOutcome, stable_hash, }; struct CountingAllocator; From 716a4b8c0c07d9b15d000e0a1fceac49b1731b75 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 18:58:41 -0400 Subject: [PATCH 05/12] Improve refactoring and Engine ergonomics --- .github/workflows/ci.yml | 8 + .gitignore | 1 + README.md | 263 ++++++++----- examples/pong_core.rs | 146 ++++++++ proofs/README.md | 12 +- proofs/claim.md | 4 +- proofs/future_game_template.md | 8 +- proofs/verus/session_refinement.rs | 123 ++++++ scripts/run-verification.sh | 4 + scripts/run-verus.sh | 34 +- src/builtin/blackjack/mod.rs | 61 +-- src/builtin/blackjack/tests.rs | 2 + src/builtin/platformer/mod.rs | 149 +++----- src/builtin/platformer/tests.rs | 2 + src/builtin/tictactoe/mod.rs | 191 +++++----- src/builtin/tictactoe/tests.rs | 2 + src/cli/mod.rs | 25 +- src/compact.rs | 206 +++++++++- src/core/env.rs | 582 +++++++++++++++++++++++++++-- src/core/mod.rs | 1 + src/core/observe.rs | 9 +- src/core/single_player.rs | 296 ++++++++++++++- src/game.rs | 58 ++- src/lib.rs | 1 + src/policy.rs | 19 +- src/registry/mod.rs | 34 +- src/render/builtin.rs | 2 +- src/render/runtime.rs | 22 +- src/session.rs | 60 +-- src/verification.rs | 14 +- tests/validation.rs | 12 +- 31 files changed, 1850 insertions(+), 501 deletions(-) create mode 100644 examples/pong_core.rs create mode 100644 proofs/verus/session_refinement.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 776b714..502b08c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,6 +76,14 @@ jobs: - name: Clippy run: cargo clippy --all-targets --all-features -- -D warnings + - name: Install pinned Verus release + run: | + curl -fsSL "https://github.com/verus-lang/verus/releases/download/release%2F0.2026.03.28.3390e9a/verus-0.2026.03.28.3390e9a-x86-linux.zip" -o /tmp/verus.zip + unzip -q /tmp/verus.zip -d /tmp + rm -rf ./verus_binary + mv /tmp/verus_x86_64 ./verus_binary + chmod +x ./verus_binary/verus + - name: Verus model checks run: REQUIRE_VERUS=1 bash scripts/run-verus.sh diff --git a/.gitignore b/.gitignore index 250a46e..0175d93 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target /verus_binary +/infotheory diff --git a/README.md b/README.md index 4ebe053..2d21ef8 100644 --- a/README.md +++ b/README.md @@ -1,113 +1,172 @@ # Infotheory Game Engine -`gameengine` is a deterministic, replayable, proof-oriented environment kernel. - -The engine is organized so game authors focus on game mathematics first: - -`(seed, state, joint_actions) -> (new_state, reward, canonical observation bits, termination)` - -Everything else (session/replay, compact codecs, registry/CLI wiring, proof helpers, -physics/render integration) is engine-owned and reusable. - -## Rewrite Architecture - -The crate remains a single artifact and is library-first by default. - -- `src/lib.rs` - - canonical public API and feature-gated exports -- `src/core/` - - core deterministic interfaces and wrappers - - canonical observation trait (`Observe` + `Observer`) - - infotheory-ready environment wrapper (`Environment`, `EnvStep`, `BitPacket`) - - explicit fast vs checked stepper wrappers -- `src/proof/` - - proof-facing helper surface and claim document wiring -- `src/physics.rs` - - deterministic physics world + contact generation - - hybrid broadphase: tiny-world fast path + scalable sweep-and-prune path -- `src/render/` - - optional retained-mode renderer - - hot path updated to avoid per-frame cache/scene cloning where possible -- `src/builtin/` - - builtin implementation namespace - - concrete game implementations under `src/builtin/tictactoe/`, `src/builtin/blackjack/`, and `src/builtin/platformer/` -- `src/registry/` - - static game descriptor registry used by the CLI -- `src/cli/` - - optional registry-backed CLI integration -- `src/bin/gameengine.rs` - - binary entrypoint (feature-gated) - -## Canonical Observation + Env Surface - -The rewrite introduces a single canonical observation surface for consumers: - -- `core::observe::Observe` - - one observation schema type per game (`type Obs`) - - observer-aware extraction (`Observer::Player`, `Observer::Spectator`) - - canonical compact encoding -- `core::env::Environment` - - `reset(seed)` - - `step(action_bits)` - - returns `EnvStep { observation_bits, reward, terminated, truncated }` - -This is designed to map directly to infotheory-style environment loops. +`gameengine` is a deterministic, replayable, proof-oriented game/simulation kernel for games treated as mathematical objects. + +The kernel is designed around: + +`(seed, state, joint_actions) -> (new_state, reward, observations, termination)` + +Everything else is layered on top: + +- rendering is a derived view, +- human pacing is a presentation concern, +- networking is a transport concern, +- machine control is just another action source, +- replay and rollback are exact because the kernel is deterministic. + +## What It Is For + +This crate is meant for: + +- deterministic game development, +- AIT and AI experiments, +- simulation-heavy search workloads such as MCTS, +- scientific or benchmark environments that need replay fidelity, +- games that benefit from formal reasoning about correctness, +- simulated physical environments. + +The target audience is broader than traditional game development: computer scientists, mathematicians, ML/AI researchers, and anyone who needs portable, auditable, replayable environments. + +## Design Principles + +- Headless by default. The mathematical kernel is the source of truth. +- Deterministic seeded PRNG only. No wall-clock time inside the game core. +- Tick-based simulation. Rendering speed and simulation speed are decoupled. +- Fixed-capacity buffers in the proof-critical path. Hot stepping stays allocation-free. +- Replay, rewind, and fork are first-class. +- Physics is engine-owned, auditable, and deterministic. +- Rendering is additive. A UI cannot change game semantics. +- One canonical observation type per game (`type Obs`), with player/spectator viewpoints encoded from that shared schema. + +## Authoring Ergonomics + +The core `Game` trait remains available for full control, but single-player environments now have an ergonomic adapter: + +- `core::single_player::SinglePlayerGame` + +It removes repeated single-player plumbing: + +- no manual `player_count = 1` wiring, +- no manual `players_to_act` wiring, +- no manual joint-action extraction boilerplate, +- canonical fixed-capacity reward/joint-action buffer wiring is engine-owned. + +This is the intended path for Pong-class ports where the handwritten core should stay close to game math. + +Minimal compileable example: + +```bash +cargo run --example pong_core +``` + +## Environment Interface + +`core::env::Environment` exposes an infotheory-compatible compact interface: + +- `reset(seed)` +- `reset_with_params(seed, params)` +- `step(action_bits) -> EnvStep { observation_bits, reward, terminated, truncated }` + +Compact constraints are canonical and centralized in `CompactSpec`: + +- observation word count/bit-width validation, +- reward range validation, +- reward bit-width validation. + +## Formal Verification Scope + +The core engine and builtin reference environments are set up for Kani and Verus checks. + +Current proof surface includes: + +- fixed-capacity buffers, +- compact codec constraints and roundtrip properties, +- PRNG determinism, +- rollback/replay restoration properties, +- builtin game invariants in the harness matrix, +- engine-owned 2D physics invariants, +- Verus replay/observation refinement models. + +Render/runtime behavior is validated by tests and benchmarks; the GPU/driver stack is intentionally outside full formal proof scope. + +Run the integrated verification matrix with: + +```bash +bash scripts/run-verification.sh +``` + +Run Verus checks directly: + +```bash +bash scripts/run-verus.sh +``` + +Pin and auto-fetch the CI Verus binary: + +```bash +AUTO_FETCH_VERUS=1 REQUIRE_VERUS=1 bash scripts/run-verus.sh +``` ## Feature Graph - `default = []` - - minimal headless library + - minimal headless kernel - `proof` - - proof helper surface exports + - proof helper exports - `physics` - - deterministic 2D physics + - engine-owned deterministic 2D physics - `builtin` - builtin reference environments - `cli` - - command-line frontend (`gameengine` binary), depends on `builtin` + - command-line binary (`gameengine`), depends on `builtin` - `parallel` - - parallel replay helpers + - batch simulation helpers for independent runs - `render` - - optional retained-mode renderer/runtime - -## Verification + - additive render/runtime layer -Run the unified verification workflow: +Recommended combinations: ```bash -bash scripts/run-verification.sh +# headless kernel only +cargo test + +# builtin reference environments +cargo test --features builtin + +# builtin games plus physics +cargo test --features "builtin physics" + +# playable/rendered reference environments +cargo test --features "render builtin physics" ``` -This script runs: +## Builtin Reference Games -- test/check matrix across core feature combinations, -- clippy (`-D warnings`), -- benchmark compilation, -- Kani harness matrix (when `cargo-kani` is installed), -- Verus model checks (when `verus` is installed). +- `TicTacToe` +- `Blackjack` +- `Platformer` -## Performance Tooling +These are reference environments, not privileged engine special-cases. They demonstrate deterministic game authoring, proof hooks, compact encoding, and render adapters. -Benchmarks: +## Rendering Model -```bash -cargo bench --bench step_throughput --features "builtin physics" -cargo bench --bench kernel_hotpaths --features "builtin physics" -``` +The render layer is wrapper-first, not kernel-first. -Perf profiling (Linux): +- `--render`: intended player observation/UI path +- `--render-physics`: oracle/developer view of the physics environment -```bash -bash scripts/run-perf.sh platformer 3000000 -``` +The oracle path can reveal information the player should not see. It exists for debugging, teaching, and diagnostics. + +Because the kernel is tick-based, the same game can be: -The perf probe targets release-mode stepping loops without Criterion analysis overhead, -so hotspot attribution is meaningful. +- trained at compute speed, +- replayed exactly, +- slowed for human-readable pacing, +- or rendered live with AI-driven actions. ## CLI -The CLI is registry-backed: game listing and dispatch come from `src/registry/mod.rs`. -Adding a game now requires a descriptor registration rather than editing multiple match sites. +The CLI is available when `cli` is enabled. ```bash cargo run --features cli -- list @@ -117,16 +176,42 @@ cargo run --features "cli physics render" -- play platformer --render cargo run --features "cli physics render" -- play platformer --render-physics --debug-overlay ``` -## Proof Claim Scope +Useful flags: + +- `--seed ` +- `--max-steps ` +- `--policy human|random|first|script:...` +- `--render` +- `--render-physics` +- `--ticks-per-second ` +- `--no-vsync` +- `--debug-overlay` + +## Rollback And Replay + +`SessionKernel`, `DynamicHistory`, and `FixedHistory` support: + +- exact trace recording, +- `rewind_to(tick)`, +- `replay_to(tick)`, +- `state_at(tick)`, +- `fork_at(tick)`. + +This supports rollback netcode, deterministic multiplayer simulation, offline search, and reproducible experiments. + +## WASM + +The core library is WASM-compatible. The headless kernel remains portable, and the render stack is structured to compile for WebAssembly. -Proof claim details live in: +## Project Direction -- `proofs/README.md` +The kernel is intentionally shaped to be compatible with Infotheory AIXI interfaces: -Current claim includes deterministic kernel contracts, compact codec properties, -replay/rewind restoration, and physics invariants for supported feature sets. -GPU backend execution remains outside full formal proof scope. +- compact `u64` actions/observations, +- `i64` rewards, +- deterministic seeded execution, +- replayable transitions. ## License -ISC. +This project uses the ISC License (see `LICENSE`). diff --git a/examples/pong_core.rs b/examples/pong_core.rs new file mode 100644 index 0000000..71256af --- /dev/null +++ b/examples/pong_core.rs @@ -0,0 +1,146 @@ +use gameengine::core::single_player::{self, SinglePlayerGame, SinglePlayerRewardBuf}; +use gameengine::{ + Buffer, DeterministicRng, FixedVec, PlayerId, Seed, Session, StepOutcome, Termination, +}; + +const W: i16 = 40; +const H: i16 = 20; +const P: i16 = 2; +const WIN: u8 = 5; +const ACTIONS: [Act; 3] = [Act::Stay, Act::Up, Act::Down]; + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +enum Act { + #[default] + Stay, + Up, + Down, +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +struct St { + p1: i16, + p2: i16, + bx: i16, + by: i16, + vx: i16, + vy: i16, + s1: u8, + s2: u8, + done: bool, + winner: Option, +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +struct Pong; + +impl Pong { + fn clamp(y: i16) -> i16 { + y.clamp(P, H - 1 - P) + } + fn reset_ball(st: &mut St, toward_p1: bool) { + st.bx = W / 2; + st.by = H / 2; + st.vx = if toward_p1 { -1 } else { 1 }; + st.vy = if (st.s1 + st.s2).is_multiple_of(2) { 1 } else { -1 }; + } +} + +impl SinglePlayerGame for Pong { + type Params = (); + type State = St; + type Action = Act; + type Obs = St; + type WorldView = St; + type ActionBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "pong-core" + } + fn init_with_params(&self, _seed: Seed, _params: &()) -> St { + St { + p1: H / 2, + p2: H / 2, + bx: W / 2, + by: H / 2, + vx: 1, + vy: 1, + ..St::default() + } + } + fn is_terminal(&self, st: &St) -> bool { + st.done + } + fn legal_actions(&self, _st: &St, out: &mut Self::ActionBuf) { + out.clear(); + out.extend_from_slice(&ACTIONS).unwrap(); + } + fn observe_player(&self, st: &St) -> St { + *st + } + fn world_view(&self, st: &St) -> St { + *st + } + fn step_in_place( + &self, + st: &mut St, + action: Option, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + if st.done { + out.termination = Termination::Terminal { winner: st.winner }; + single_player::push_reward(&mut out.rewards, 0); + return; + } + let dy = match action.unwrap_or(Act::Stay) { + Act::Stay => 0, + Act::Up => -1, + Act::Down => 1, + }; + st.p1 = Self::clamp(st.p1 + dy); + st.p2 = Self::clamp(st.p2 + (st.by > st.p2) as i16 - (st.by < st.p2) as i16); + st.bx += st.vx; + st.by += st.vy; + if st.by <= 0 || st.by >= H - 1 { + st.by = st.by.clamp(0, H - 1); + st.vy = -st.vy; + } + let mut reward = 0; + if st.bx <= 1 && (st.by - st.p1).abs() <= P { + st.vx = 1; + } else if st.bx >= W - 2 && (st.by - st.p2).abs() <= P { + st.vx = -1; + } else if st.bx < 0 { + st.s2 += 1; + reward = -1; + Self::reset_ball(st, false); + } else if st.bx >= W { + st.s1 += 1; + reward = 1; + Self::reset_ball(st, true); + } + if st.s1 >= WIN || st.s2 >= WIN { + st.done = true; + st.winner = Some(if st.s1 > st.s2 { 0 } else { 1 }); + out.termination = Termination::Terminal { winner: st.winner }; + } else { + out.termination = Termination::Ongoing; + } + single_player::push_reward(&mut out.rewards, reward); + } +} + +fn main() { + let mut session = Session::new(Pong, 7); + while !session.is_terminal() && session.current_tick() < 64 { + session.step(&[]); + } + println!( + "tick={} score={} - {}", + session.current_tick(), + session.state().s1, + session.state().s2 + ); +} diff --git a/proofs/README.md b/proofs/README.md index c2a289d..100f1a4 100644 --- a/proofs/README.md +++ b/proofs/README.md @@ -18,6 +18,12 @@ Run Verus model checks directly: bash scripts/run-verus.sh ``` +Pin and auto-fetch the exact Verus release used by CI: + +```bash +AUTO_FETCH_VERUS=1 REQUIRE_VERUS=1 bash scripts/run-verus.sh +``` + The unified script runs tests, checks, clippy, bench compilation, Kani harnesses, and Verus model checks across three verified layers: - the default headless kernel, @@ -42,8 +48,10 @@ See [`proofs/claim.md`](claim.md) for a precise verified vs tested vs out-of-sco - Game-specific properties in the builtin game modules when `builtin` is enabled - Physics invariants for the engine-owned 2D world and the platformer environment when `builtin` and `physics` are enabled -- Render-input safety claims now include observation decoding and scene-order normalization checks; - final GPU backend execution remains outside full formal proof scope +- Verus model lemmas in [`proofs/verus/session_refinement.rs`](verus/session_refinement.rs) + for replay fold refinement and canonical observation-schema constraints +- Render/input/runtime behavior is covered by tests and benchmarks; it is not currently + claimed as fully formally verified ## Verification Pattern For New Games diff --git a/proofs/claim.md b/proofs/claim.md index 6fc9ffd..8d90921 100644 --- a/proofs/claim.md +++ b/proofs/claim.md @@ -3,12 +3,14 @@ This document states what `gameengine` currently claims as formally verified, what is tested, and what is intentionally outside full proof scope. -## Formally Verified (Kani Harness Surface) +## Formally Verified (Kani + Verus Surfaces) - Fixed-capacity containers and bit-word primitives. - Compact reward codec round-trips and range soundness. +- Compact observation word-shape and reward bit-width constraint enforcement. - Deterministic RNG construction and replay properties. - Replay rewind restoration for bounded history configurations. +- Verus replay refinement lemmas and canonical observation-schema model constraints. - Builtin game invariants included in harness matrix. - Physics invariants and platformer synchronization harnesses for `builtin + physics`. diff --git a/proofs/future_game_template.md b/proofs/future_game_template.md index a768aed..fc76e4f 100644 --- a/proofs/future_game_template.md +++ b/proofs/future_game_template.md @@ -9,7 +9,7 @@ world view, compact codec, and physics hooks belong in the verification checklis ## Runtime Checklist -- Add a deterministic smoke test from `init(seed)` through a fixed action trace. +- Add a deterministic smoke test from `init_with_params(seed, ¶ms)` through a fixed action trace. - Add a replay equivalence test using `Session::state_at`, `rewind_to`, and `fork_at`. - Add a no-allocation hot-path test for direct `step_in_place`. - Add compact codec round-trip tests for the game action/observation codec hooks. @@ -25,6 +25,8 @@ Implement and document: - `world_view_invariant` - `transition_postcondition` +For single-player games, prefer implementing `core::single_player::SinglePlayerGame` and let the engine provide the `Game` adapter wiring. + ## Kani Harness Skeleton ```rust @@ -37,7 +39,7 @@ mod proofs { #[kani::proof] fn transition_contract_holds_for_representative_step() { let game = MyGame::default(); - let state = game.init(1); + let state = game.init_with_params(1, &game.default_params()); let mut actions = FixedVec::, 1>::default(); actions.push(PlayerAction { player: 0, action: MyAction::Default }).unwrap(); crate::verification::assert_transition_contracts(&game, &state, &actions, 1); @@ -46,7 +48,7 @@ mod proofs { #[kani::proof] fn observation_contract_holds_for_initial_state() { let game = MyGame::default(); - let state = game.init(1); + let state = game.init_with_params(1, &game.default_params()); crate::verification::assert_observation_contracts(&game, &state); } diff --git a/proofs/verus/session_refinement.rs b/proofs/verus/session_refinement.rs new file mode 100644 index 0000000..8eacb69 --- /dev/null +++ b/proofs/verus/session_refinement.rs @@ -0,0 +1,123 @@ +use vstd::prelude::*; + +verus! { + +pub trait KernelReplayModel { + type State; + type Action; + + spec fn init(seed: nat) -> Self::State; + spec fn step(state: Self::State, action: Self::Action) -> Self::State; + spec fn replay(seed: nat, actions: Seq) -> Self::State; + spec fn replay_from(state: Self::State, actions: Seq) -> Self::State; + + proof fn replay_from_empty_axiom(state: Self::State) + ensures + Self::replay_from(state, Seq::::empty()) == state; + + proof fn replay_from_step_axiom( + state: Self::State, + prefix: Seq, + next: Self::Action, + ) + ensures + Self::replay_from(state, prefix.push(next)) + == Self::step(Self::replay_from(state, prefix), next); + + proof fn replay_is_from_init_axiom(seed: nat, actions: Seq) + ensures + Self::replay(seed, actions) == Self::replay_from(Self::init(seed), actions); +} + +pub proof fn replay_empty_refines_init(seed: nat) + ensures + M::replay(seed, Seq::::empty()) == M::init(seed), +{ + M::replay_is_from_init_axiom(seed, Seq::::empty()); + M::replay_from_empty_axiom(M::init(seed)); +} + +pub proof fn replay_refines_left_fold( + seed: nat, + prefix: Seq, + next: M::Action, +) + ensures + M::replay(seed, prefix.push(next)) == M::step(M::replay(seed, prefix), next), +{ + M::replay_is_from_init_axiom(seed, prefix.push(next)); + M::replay_from_step_axiom(M::init(seed), prefix, next); + M::replay_is_from_init_axiom(seed, prefix); +} + +pub proof fn replay_singleton_refines_one_step( + seed: nat, + action: M::Action, +) + ensures + M::replay(seed, Seq::::empty().push(action)) + == M::step(M::init(seed), action), +{ + replay_refines_left_fold::(seed, Seq::::empty(), action); + replay_empty_refines_init::(seed); +} + +pub proof fn replay_from_prefix_state_refines_left_fold( + seed: nat, + prefix: Seq, + suffix_prefix: Seq, + next: M::Action, +) + ensures + M::replay_from(M::replay(seed, prefix), suffix_prefix.push(next)) + == M::step(M::replay_from(M::replay(seed, prefix), suffix_prefix), next), +{ + M::replay_from_step_axiom(M::replay(seed, prefix), suffix_prefix, next); +} + +pub trait ObservationModel { + type State; + type Obs; + + spec fn observe(state: Self::State, who: int) -> Self::Obs; + spec fn observer_is_valid(who: int) -> bool; + spec fn obs_well_formed(obs: Self::Obs) -> bool; + spec fn obs_schema_id(obs: Self::Obs) -> nat; + spec fn canonical_schema_id() -> nat; + + proof fn observation_totality_axiom(state: Self::State, who: int) + requires + Self::observer_is_valid(who) + ensures + Self::obs_well_formed(Self::observe(state, who)), + Self::obs_schema_id(Self::observe(state, who)) == Self::canonical_schema_id(); +} + +pub proof fn canonical_observation_schema_for_any_view( + state: M::State, + who_a: int, + who_b: int, +) + requires + M::observer_is_valid(who_a), + M::observer_is_valid(who_b) + ensures + M::obs_well_formed(M::observe(state, who_a)), + M::obs_well_formed(M::observe(state, who_b)), + M::obs_schema_id(M::observe(state, who_a)) == M::obs_schema_id(M::observe(state, who_b)), +{ + M::observation_totality_axiom(state, who_a); + M::observation_totality_axiom(state, who_b); +} + +pub proof fn canonical_schema_matches_declared_id(state: M::State, who: int) + requires + M::observer_is_valid(who) + ensures + M::obs_well_formed(M::observe(state, who)), + M::obs_schema_id(M::observe(state, who)) == M::canonical_schema_id(), +{ + M::observation_totality_axiom(state, who); +} + +} // verus! diff --git a/scripts/run-verification.sh b/scripts/run-verification.sh index 3df7d7c..8bfeed4 100755 --- a/scripts/run-verification.sh +++ b/scripts/run-verification.sh @@ -11,7 +11,11 @@ COMMON_HARNESSES=( bit_words_round_trip fixed_vec_push_preserves_prefix_order compact_reward_round_trip + compact_observation_words_match_schema + compact_reward_bit_width_is_enforced step_outcome_reward_lookup_defaults_to_zero + env_rejects_invalid_observation_words + env_rejects_reward_encoding_that_exceeds_bit_width replay_trace_records_steps rng_state_sanitization_is_total seeded_stream_constructor_handles_reference_cases diff --git a/scripts/run-verus.sh b/scripts/run-verus.sh index 3428bdb..c8c812d 100755 --- a/scripts/run-verus.sh +++ b/scripts/run-verus.sh @@ -5,6 +5,31 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT_DIR" REQUIRE_VERUS="${REQUIRE_VERUS:-0}" +AUTO_FETCH_VERUS="${AUTO_FETCH_VERUS:-0}" +VERUS_RELEASE_URL="${VERUS_RELEASE_URL:-https://github.com/verus-lang/verus/releases/download/release%2F0.2026.03.28.3390e9a/verus-0.2026.03.28.3390e9a-x86-linux.zip}" + +bootstrap_verus_binary() { + local archive_path + archive_path="$(mktemp /tmp/verus-release.XXXXXX.zip)" + local extract_dir + extract_dir="$(mktemp -d /tmp/verus-release.XXXXXX)" + + echo "[verus] downloading pinned release archive" + curl -fsSL "$VERUS_RELEASE_URL" -o "$archive_path" + unzip -q "$archive_path" -d "$extract_dir" + + local extracted + extracted="$(find "$extract_dir" -mindepth 1 -maxdepth 1 -type d | head -n 1)" + if [[ -z "$extracted" || ! -x "$extracted/verus" ]]; then + echo "[verus] archive did not contain an executable verus directory" >&2 + return 1 + fi + + rm -rf ./verus_binary + mv "$extracted" ./verus_binary + chmod +x ./verus_binary/verus + echo "[verus] installed pinned release into ./verus_binary" +} resolve_verus_bin() { local requested="${VERUS_BIN:-}" @@ -36,8 +61,15 @@ resolve_verus_bin() { } if ! VERUS_BIN_PATH="$(resolve_verus_bin)"; then + if [[ "$AUTO_FETCH_VERUS" == "1" || "$REQUIRE_VERUS" == "1" ]]; then + bootstrap_verus_binary + VERUS_BIN_PATH="$(resolve_verus_bin)" + fi +fi + +if [[ -z "${VERUS_BIN_PATH:-}" ]]; then if [[ "$REQUIRE_VERUS" == "1" ]]; then - echo "[verus] required but no Verus binary was found (checked VERUS_BIN, ./verus_binary/verus, ./verus_binary, PATH)" >&2 + echo "[verus] required but no Verus binary was found (checked VERUS_BIN, ./verus_binary/verus, ./verus_binary, PATH, optional bootstrap)" >&2 exit 1 fi echo "[verus] no Verus binary found; skipping Verus model checks" diff --git a/src/builtin/blackjack/mod.rs b/src/builtin/blackjack/mod.rs index 99a01a2..810a643 100644 --- a/src/builtin/blackjack/mod.rs +++ b/src/builtin/blackjack/mod.rs @@ -6,10 +6,9 @@ use crate::core::cards::{ BlackjackValue, evaluate_blackjack_hand, fill_standard_deck_52, is_standard_deck_52_permutation, pack_cards_nibbles, }; -use crate::core::single_player; -use crate::game::Game; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; +use crate::types::{PlayerId, Seed, StepOutcome, Termination}; use crate::verification::reward_and_terminal_postcondition; const MAX_HAND_CARDS: usize = 12; const DECK_SIZE: usize = 52; @@ -220,27 +219,20 @@ impl Blackjack { } } -impl Game for Blackjack { +impl single_player::SinglePlayerGame for Blackjack { + type Params = (); type State = BlackjackState; type Action = BlackjackAction; - type PlayerObservation = BlackjackObservation; - type SpectatorObservation = BlackjackObservation; + type Obs = BlackjackObservation; type WorldView = BlackjackWorldView; - type PlayerBuf = FixedVec; type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; type WordBuf = FixedVec; fn name(&self) -> &'static str { "blackjack" } - fn player_count(&self) -> usize { - 1 - } - - fn init(&self, seed: Seed) -> Self::State { + fn init_with_params(&self, seed: Seed, _params: &Self::Params) -> Self::State { let mut rng = DeterministicRng::from_seed_and_stream(seed, 0); let mut deck = [0u8; DECK_SIZE]; Self::fill_deck(&mut deck); @@ -272,13 +264,9 @@ impl Game for Blackjack { matches!(state.phase, BlackjackPhase::Terminal) } - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - single_player::write_players_to_act(out, self.is_terminal(state)); - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { out.clear(); - if !single_player::can_act(player, self.is_terminal(state)) { + if self.is_terminal(state) { return; } let value = Self::player_value(state); @@ -290,7 +278,7 @@ impl Game for Blackjack { } } - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State) -> Self::Obs { let terminal = self.is_terminal(state); let opponent_visible_len = if terminal { state.opponent_len } else { 0 }; let mut opponent_cards = [0u8; MAX_HAND_CARDS]; @@ -311,7 +299,7 @@ impl Game for Blackjack { } } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { BlackjackObservation { phase: state.phase, terminal: self.is_terminal(state), @@ -333,12 +321,10 @@ impl Game for Blackjack { fn step_in_place( &self, state: &mut Self::State, - joint_actions: &Self::JointActionBuf, + action: Option, rng: &mut DeterministicRng, - out: &mut StepOutcome, + out: &mut StepOutcome, ) { - let action = single_player::first_action(joint_actions.as_slice()); - let reward = if self.is_terminal(state) { out.termination = Termination::Terminal { winner: state.winner, @@ -418,12 +404,7 @@ impl Game for Blackjack { } } - fn player_observation_invariant( - &self, - state: &Self::State, - _player: PlayerId, - observation: &Self::PlayerObservation, - ) -> bool { + fn player_observation_invariant(&self, state: &Self::State, observation: &Self::Obs) -> bool { if self.is_terminal(state) { observation.opponent_visible_len == state.opponent_len && observation.opponent_cards == state.opponent_cards @@ -443,9 +424,9 @@ impl Game for Blackjack { fn transition_postcondition( &self, _pre: &Self::State, - _actions: &Self::JointActionBuf, + _action: Option, post: &Self::State, - outcome: &StepOutcome, + outcome: &StepOutcome, ) -> bool { reward_and_terminal_postcondition( outcome.reward_for(0), @@ -476,11 +457,7 @@ impl Game for Blackjack { decode_enum_action(encoded, &BLACKJACK_ACTION_ORDER) } - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { let header = Self::phase_code(observation.phase) | ((observation.terminal as u64) << 4) | ((u64::from(observation.player_len)) << 8) @@ -497,11 +474,7 @@ impl Game for Blackjack { ); } - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { let header = Self::phase_code(observation.phase) | ((observation.terminal as u64) << 4) | ((u64::from(observation.player_len)) << 8) diff --git a/src/builtin/blackjack/tests.rs b/src/builtin/blackjack/tests.rs index 8c07182..fd3c716 100644 --- a/src/builtin/blackjack/tests.rs +++ b/src/builtin/blackjack/tests.rs @@ -1,6 +1,8 @@ use super::*; +use crate::game::Game; use crate::policy::{FirstLegalPolicy, RandomPolicy}; use crate::session::Session; +use crate::types::PlayerAction; use crate::verification::{ assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, }; diff --git a/src/builtin/platformer/mod.rs b/src/builtin/platformer/mod.rs index 98babb8..d9dd424 100644 --- a/src/builtin/platformer/mod.rs +++ b/src/builtin/platformer/mod.rs @@ -2,15 +2,14 @@ use crate::buffer::{Buffer, FixedVec}; use crate::compact::{CompactSpec, decode_enum_action, encode_enum_action}; -use crate::core::single_player; -use crate::game::Game; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; use crate::math::{Aabb2, StrictF64, Vec2}; use crate::physics::{ BodyKind, PhysicsBody2d, PhysicsWorld2d, collect_actor_trigger_contacts, set_trigger_mask_deferred, }; use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome, Termination}; +use crate::types::{PlayerId, Reward, Seed, StepOutcome, Termination}; use crate::verification::reward_and_terminal_postcondition; const BERRY_COUNT: usize = 6; @@ -158,6 +157,8 @@ impl PlatformerConfig { /// Full platformer state. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct PlatformerState { + /// Active immutable configuration for this episode. + pub config: PlatformerConfig, /// Physics simulation world containing player and berries. pub world: PhysicsWorld2d, /// Bitset of still-active berries. @@ -181,7 +182,9 @@ pub struct PlatformerObservation { impl Default for PlatformerState { fn default() -> Self { - Platformer::default().init(0) + let game = Platformer::default(); + let params = ::default_params(&game); + ::init_with_params(&game, 0, ¶ms) } } @@ -203,7 +206,7 @@ impl Platformer { state.world.require_body(PLAYER_BODY_ID) } - fn player_position(&self, state: &PlatformerState) -> (u8, u8) { + fn player_position(state: &PlatformerState) -> (u8, u8) { let player = Self::player_body(state); let min = player.aabb().min; let x = min.x.to_f64(); @@ -229,7 +232,7 @@ impl Platformer { ); } - fn collect_berries_from_contacts(&self, state: &mut PlatformerState) -> Reward { + fn collect_berries_from_contacts(state: &mut PlatformerState) -> Reward { let was_non_terminal = state.remaining_berries != 0; let mut remaining = u64::from(state.remaining_berries); let collected = collect_actor_trigger_contacts( @@ -241,15 +244,15 @@ impl Platformer { ); state.remaining_berries = remaining as u8; - let mut reward = self.config.berry_reward * i64::from(collected); + let mut reward = state.config.berry_reward * i64::from(collected); if was_non_terminal && state.remaining_berries == 0 { - reward += self.config.finish_bonus; + reward += state.config.finish_bonus; } reward } - fn observation_from_state(&self, state: &PlatformerState) -> PlatformerObservation { - let (x, y) = self.player_position(state); + fn observation_from_state(state: &PlatformerState) -> PlatformerObservation { + let (x, y) = Self::player_position(state); PlatformerObservation { x, y, @@ -259,20 +262,22 @@ impl Platformer { } } - fn build_world(&self) -> PhysicsWorld2d { - let mut world = PhysicsWorld2d::new(self.config.arena_bounds()); + fn build_world( + config: PlatformerConfig, + ) -> PhysicsWorld2d { + let mut world = PhysicsWorld2d::new(config.arena_bounds()); world.add_body_deferred(PhysicsBody2d { id: PLAYER_BODY_ID, kind: BodyKind::Kinematic, - position: self.config.player_center(0, 0), - half_extents: self.config.player_half_extents(), + position: config.player_center(0, 0), + half_extents: config.player_half_extents(), active: true, }); for index in 0..BERRY_COUNT { world.add_body_deferred(PhysicsBody2d { id: FIRST_BERRY_BODY_ID + index as u16, kind: BodyKind::Trigger, - position: self.config.berry_center(index), + position: config.berry_center(index), half_extents: Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), active: true, }); @@ -282,30 +287,28 @@ impl Platformer { } } -impl Game for Platformer { +impl single_player::SinglePlayerGame for Platformer { + type Params = PlatformerConfig; type State = PlatformerState; type Action = PlatformerAction; - type PlayerObservation = PlatformerObservation; - type SpectatorObservation = PlatformerObservation; + type Obs = PlatformerObservation; type WorldView = PlatformerWorldView; - type PlayerBuf = FixedVec; type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; type WordBuf = FixedVec; - fn name(&self) -> &'static str { - "platformer" + fn default_params(&self) -> Self::Params { + self.config } - fn player_count(&self) -> usize { - 1 + fn name(&self) -> &'static str { + "platformer" } - fn init(&self, _seed: Seed) -> Self::State { - assert!(self.config.invariant()); + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + assert!(params.invariant()); PlatformerState { - world: self.build_world(), + config: *params, + world: Self::build_world(*params), remaining_berries: ALL_BERRIES_MASK, } } @@ -314,43 +317,34 @@ impl Game for Platformer { Self::is_terminal_state(state) } - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - single_player::write_players_to_act(out, self.is_terminal(state)); - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { out.clear(); - if !single_player::can_act(player, self.is_terminal(state)) { + if self.is_terminal(state) { return; } out.extend_from_slice(&PLATFORMER_ACTION_ORDER).unwrap(); } - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { - self.observation_from_state(state) - } - - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { - self.observation_from_state(state) + fn observe_player(&self, state: &Self::State) -> Self::Obs { + Self::observation_from_state(state) } fn world_view(&self, state: &Self::State) -> Self::WorldView { PlatformerWorldView { - config: self.config, + config: state.config, physics: state.world.clone(), - berries: berry_views(self.config, state.remaining_berries), + berries: berry_views(state.config, state.remaining_berries), } } fn step_in_place( &self, state: &mut Self::State, - joint_actions: &Self::JointActionBuf, + action: Option, rng: &mut DeterministicRng, - out: &mut StepOutcome, + out: &mut StepOutcome, ) { - let action = - single_player::first_action(joint_actions.as_slice()).unwrap_or(PlatformerAction::Stay); + let action = action.unwrap_or(PlatformerAction::Stay); let mut reward = 0; if self.is_terminal(state) { @@ -358,12 +352,13 @@ impl Game for Platformer { winner: Self::winner(state), }; } else { - let (current_x, _) = self.player_position(state); + let config = state.config; + let (current_x, _) = Self::player_position(state); let (x, y) = match action { PlatformerAction::Stay => (current_x, 0), PlatformerAction::Left => (current_x.saturating_sub(1), 0), PlatformerAction::Right => ( - if current_x + self.config.player_width < self.config.width { + if current_x + config.player_width < config.width { current_x + 1 } else { current_x @@ -371,21 +366,18 @@ impl Game for Platformer { 0, ), PlatformerAction::Jump => { - if rng.gen_bool_ratio( - self.config.sprain_numerator, - self.config.sprain_denominator, - ) { + if rng.gen_bool_ratio(config.sprain_numerator, config.sprain_denominator) { reward -= 1; } - (current_x, self.config.jump_delta) + (current_x, config.jump_delta) } }; state .world - .set_body_position_deferred(PLAYER_BODY_ID, self.config.player_center(x, y)); + .set_body_position_deferred(PLAYER_BODY_ID, config.player_center(x, y)); state.world.refresh_contacts(); - reward += self.collect_berries_from_contacts(state); + reward += Self::collect_berries_from_contacts(state); self.sync_berries(state); state.world.step(); @@ -402,7 +394,7 @@ impl Game for Platformer { } fn state_invariant(&self, state: &Self::State) -> bool { - if !self.config.invariant() + if !state.config.invariant() || state.remaining_berries & !ALL_BERRIES_MASK != 0 || !state.world.invariant() || state.world.bodies.len() != PLATFORMER_BODIES @@ -413,13 +405,13 @@ impl Game for Platformer { let player = Self::player_body(state); if player.kind != BodyKind::Kinematic || !player.active - || player.half_extents != self.config.player_half_extents() + || player.half_extents != state.config.player_half_extents() { return false; } - let (x, y) = self.player_position(state); - if x + self.config.player_width > self.config.width || y > self.config.jump_delta { + let (x, y) = Self::player_position(state); + if x + state.config.player_width > state.config.width || y > state.config.jump_delta { return false; } @@ -427,7 +419,7 @@ impl Game for Platformer { let berry = state.world.require_body(FIRST_BERRY_BODY_ID + index as u16); let expected_active = state.remaining_berries & (1u8 << index) != 0; if berry.kind != BodyKind::Trigger - || berry.position != self.config.berry_center(index) + || berry.position != state.config.berry_center(index) || berry.active != expected_active { return false; @@ -437,25 +429,20 @@ impl Game for Platformer { true } - fn player_observation_invariant( - &self, - state: &Self::State, - _player: PlayerId, - observation: &Self::PlayerObservation, - ) -> bool { - observation == &self.observation_from_state(state) + fn player_observation_invariant(&self, state: &Self::State, observation: &Self::Obs) -> bool { + observation == &Self::observation_from_state(state) } fn spectator_observation_invariant( &self, state: &Self::State, - observation: &Self::SpectatorObservation, + observation: &Self::Obs, ) -> bool { - observation == &self.observation_from_state(state) + observation == &Self::observation_from_state(state) } fn world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { - if world.config != self.config || world.physics != state.world { + if world.config != state.config || world.physics != state.world { return false; } @@ -463,8 +450,8 @@ impl Game for Platformer { while index < world.berries.len() { let berry = world.berries[index]; if berry.id != FIRST_BERRY_BODY_ID + index as u16 - || berry.x != self.config.berry_xs[index] - || berry.y != self.config.berry_y + || berry.x != state.config.berry_xs[index] + || berry.y != state.config.berry_y || berry.collected != ((state.remaining_berries & (1u8 << index)) == 0) { return false; @@ -478,9 +465,9 @@ impl Game for Platformer { fn transition_postcondition( &self, _pre: &Self::State, - _actions: &Self::JointActionBuf, + _action: Option, post: &Self::State, - outcome: &StepOutcome, + outcome: &StepOutcome, ) -> bool { reward_and_terminal_postcondition( outcome.reward_for(0), @@ -511,11 +498,7 @@ impl Game for Platformer { decode_enum_action(encoded, &PLATFORMER_ACTION_ORDER) } - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { out.clear(); let packed = u64::from(observation.x) | (u64::from(observation.y) << 4) @@ -523,14 +506,6 @@ impl Game for Platformer { | ((observation.terminal as u64) << 11); out.push(packed).unwrap(); } - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - self.encode_player_observation(observation, out); - } } #[cfg(test)] diff --git a/src/builtin/platformer/tests.rs b/src/builtin/platformer/tests.rs index afe2c4d..2921f48 100644 --- a/src/builtin/platformer/tests.rs +++ b/src/builtin/platformer/tests.rs @@ -1,5 +1,7 @@ use super::*; +use crate::game::Game; use crate::session::Session; +use crate::types::{PlayerAction, PlayerReward}; use crate::verification::{ assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, }; diff --git a/src/builtin/tictactoe/mod.rs b/src/builtin/tictactoe/mod.rs index 9e999e1..d1e28c1 100644 --- a/src/builtin/tictactoe/mod.rs +++ b/src/builtin/tictactoe/mod.rs @@ -2,10 +2,9 @@ use crate::buffer::FixedVec; use crate::compact::CompactSpec; -use crate::core::single_player; -use crate::game::Game; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; +use crate::types::{PlayerId, Seed, StepOutcome, Termination}; use crate::verification::reward_and_terminal_postcondition; const WIN_LINES: [(usize, usize, usize); 8] = [ @@ -92,6 +91,79 @@ impl TicTacToe { true } + fn decode_action_index(action: Option) -> Option { + action.map(|action: TicTacToeAction| action.0 as usize) + } + + fn action_is_legal(state: &TicTacToeState, index: usize) -> bool { + index < state.board.len() && state.board[index] == TicTacToeCell::Empty + } + + fn apply_mark( + state: &mut TicTacToeState, + index: usize, + mark: TicTacToeCell, + ) -> Option> { + state.board[index] = mark; + let winner = Self::find_winner(&state.board); + if winner.is_some() || Self::is_full(&state.board) { + state.terminal = true; + state.winner = winner; + Some(winner) + } else { + None + } + } + + fn sample_opponent_action(state: &TicTacToeState, rng: &mut DeterministicRng) -> usize { + let mut empty_positions = [0usize; 9]; + let mut empty_len = 0usize; + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + empty_positions[empty_len] = index; + empty_len += 1; + } + index += 1; + } + empty_positions[rng.gen_range(empty_len)] + } + + fn reward_from_terminal_winner(winner: Option) -> i64 { + match winner { + Some(0) => 2, + Some(_) => -2, + None => 1, + } + } + + fn resolve_turn( + state: &mut TicTacToeState, + action_index: usize, + rng: &mut DeterministicRng, + ) -> i64 { + if let Some(winner) = Self::apply_mark(state, action_index, TicTacToeCell::Player) { + return Self::reward_from_terminal_winner(winner); + } + + let opponent_index = Self::sample_opponent_action(state, rng); + if let Some(winner) = Self::apply_mark(state, opponent_index, TicTacToeCell::Opponent) { + return Self::reward_from_terminal_winner(winner); + } + + 0 + } + + fn termination_from_state(state: &TicTacToeState) -> Termination { + if state.terminal { + Termination::Terminal { + winner: state.winner, + } + } else { + Termination::Ongoing + } + } + /// Packs board cells into a two-bit-per-cell `u64` representation. pub fn packed_board(board: &[TicTacToeCell; 9]) -> u64 { let mut packed = 0u64; @@ -109,27 +181,20 @@ impl TicTacToe { } } -impl Game for TicTacToe { +impl single_player::SinglePlayerGame for TicTacToe { + type Params = (); type State = TicTacToeState; type Action = TicTacToeAction; - type PlayerObservation = TicTacToeObservation; - type SpectatorObservation = TicTacToeObservation; + type Obs = TicTacToeObservation; type WorldView = TicTacToeWorldView; - type PlayerBuf = FixedVec; type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; type WordBuf = FixedVec; fn name(&self) -> &'static str { "tictactoe" } - fn player_count(&self) -> usize { - 1 - } - - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { TicTacToeState::default() } @@ -137,13 +202,9 @@ impl Game for TicTacToe { state.terminal } - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - single_player::write_players_to_act(out, state.terminal); - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { out.clear(); - if !single_player::can_act(player, state.terminal) { + if state.terminal { return; } let mut index = 0usize; @@ -155,11 +216,7 @@ impl Game for TicTacToe { } } - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { - *state - } - - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_player(&self, state: &Self::State) -> Self::Obs { *state } @@ -170,73 +227,23 @@ impl Game for TicTacToe { fn step_in_place( &self, state: &mut Self::State, - joint_actions: &Self::JointActionBuf, + action: Option, rng: &mut DeterministicRng, - out: &mut StepOutcome, + out: &mut StepOutcome, ) { - let action = single_player::first_action(joint_actions.as_slice()) - .map(|candidate: TicTacToeAction| candidate.0 as usize); - let reward = if state.terminal { - out.termination = Termination::Terminal { - winner: state.winner, - }; 0 - } else if let Some(index) = action { - if index >= 9 || state.board[index] != TicTacToeCell::Empty { - -3 - } else { - state.board[index] = TicTacToeCell::Player; - if let Some(winner) = Self::find_winner(&state.board) { - state.terminal = true; - state.winner = Some(winner); - out.termination = Termination::Terminal { - winner: state.winner, - }; - 2 - } else if Self::is_full(&state.board) { - state.terminal = true; - state.winner = None; - out.termination = Termination::Terminal { winner: None }; - 1 - } else { - let mut empty_positions = [0usize; 9]; - let mut empty_len = 0usize; - let mut cell_index = 0usize; - while cell_index < state.board.len() { - if state.board[cell_index] == TicTacToeCell::Empty { - empty_positions[empty_len] = cell_index; - empty_len += 1; - } - cell_index += 1; - } - let opponent_index = empty_positions[rng.gen_range(empty_len)]; - state.board[opponent_index] = TicTacToeCell::Opponent; - if let Some(winner) = Self::find_winner(&state.board) { - state.terminal = true; - state.winner = Some(winner); - out.termination = Termination::Terminal { - winner: state.winner, - }; - -2 - } else if Self::is_full(&state.board) { - state.terminal = true; - state.winner = None; - out.termination = Termination::Terminal { winner: None }; - 1 - } else { - 0 - } + } else { + match Self::decode_action_index(action) { + Some(index) if Self::action_is_legal(state, index) => { + Self::resolve_turn(state, index, rng) } + _ => -3, } - } else { - -3 }; single_player::push_reward(&mut out.rewards, reward); - if !state.terminal { - out.termination = Termination::Ongoing; - } + out.termination = Self::termination_from_state(state); } fn compact_spec(&self) -> CompactSpec { @@ -259,23 +266,11 @@ impl Game for TicTacToe { (encoded < 9).then_some(TicTacToeAction(encoded as u8)) } - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { out.clear(); out.push(Self::packed_board(&observation.board)).unwrap(); } - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - self.encode_player_observation(observation, out); - } - fn state_invariant(&self, state: &Self::State) -> bool { let winner = Self::find_winner(&state.board); let full = Self::is_full(&state.board); @@ -290,9 +285,9 @@ impl Game for TicTacToe { fn transition_postcondition( &self, pre: &Self::State, - _actions: &Self::JointActionBuf, + _action: Option, post: &Self::State, - outcome: &StepOutcome, + outcome: &StepOutcome, ) -> bool { if pre.terminal { return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); diff --git a/src/builtin/tictactoe/tests.rs b/src/builtin/tictactoe/tests.rs index 12e9366..3eaf243 100644 --- a/src/builtin/tictactoe/tests.rs +++ b/src/builtin/tictactoe/tests.rs @@ -1,5 +1,7 @@ use super::*; +use crate::game::Game; use crate::session::Session; +use crate::types::PlayerAction; use crate::verification::{ assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, }; diff --git a/src/cli/mod.rs b/src/cli/mod.rs index cf57953..03ed1d1 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -11,7 +11,7 @@ use crate::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; use crate::builtin::{Platformer, PlatformerAction}; use crate::core::observe::{Observe, Observer}; use crate::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; -use crate::registry::{GameKind, all_games, find_game}; +use crate::registry::{all_games, find_game}; #[cfg(feature = "render")] use crate::render::{PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver}; #[cfg(all(feature = "render", feature = "physics"))] @@ -21,7 +21,7 @@ use crate::session::InteractiveSession; use crate::{Game, Session, stable_hash}; #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -enum RunMode { +pub(crate) enum RunMode { Play, Replay, } @@ -104,17 +104,12 @@ where fn run_descriptor(game_name: &str, config: CliConfig, mode: RunMode) -> Result<(), String> { let descriptor = find_game(game_name).ok_or_else(|| format!("unknown game: {game_name}"))?; - match descriptor.kind { - GameKind::TicTacToe => run_tictactoe(config, mode), - GameKind::Blackjack => run_blackjack(config, mode), - #[cfg(feature = "physics")] - GameKind::Platformer => run_platformer(config, mode), - } + (descriptor.runner)(config, mode) } /// Parsed command-line execution configuration. #[derive(Clone, Debug)] -pub struct CliConfig { +pub(crate) struct CliConfig { seed: u64, max_steps: usize, policy: String, @@ -230,7 +225,7 @@ where Ok(()) } -fn run_tictactoe(config: CliConfig, mode: RunMode) -> Result<(), String> { +pub(crate) fn run_tictactoe(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("tictactoe does not support --render-physics".to_string()); } @@ -252,7 +247,7 @@ fn run_tictactoe(config: CliConfig, mode: RunMode) -> Result<(), String> { ) } -fn run_blackjack(config: CliConfig, mode: RunMode) -> Result<(), String> { +pub(crate) fn run_blackjack(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("blackjack does not support --render-physics".to_string()); } @@ -275,7 +270,7 @@ fn run_blackjack(config: CliConfig, mode: RunMode) -> Result<(), String> { } #[cfg(feature = "physics")] -fn run_platformer(config: CliConfig, mode: RunMode) -> Result<(), String> { +pub(crate) fn run_platformer(config: CliConfig, mode: RunMode) -> Result<(), String> { #[cfg(feature = "render")] if config.render || config.render_physics { return run_platformer_render(config, mode); @@ -635,7 +630,7 @@ impl Policy for HumanTicTacToe { _game: &TicTacToe, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], _rng: &mut crate::DeterministicRng, ) -> ::Action { @@ -660,7 +655,7 @@ impl Policy for HumanBlackjack { _game: &Blackjack, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], _rng: &mut crate::DeterministicRng, ) -> ::Action { @@ -692,7 +687,7 @@ impl Policy for HumanPlatformer { _game: &Platformer, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], _rng: &mut crate::DeterministicRng, ) -> ::Action { diff --git a/src/compact.rs b/src/compact.rs index 0c861c7..0b44254 100644 --- a/src/compact.rs +++ b/src/compact.rs @@ -1,5 +1,7 @@ //! Compact encoding specifications and validation helpers. +use core::fmt; + use crate::types::Reward; /// Structured compact codec errors. @@ -23,6 +25,29 @@ pub enum CompactError { /// Maximum allowed reward. max_reward: Reward, }, + /// Encoded reward exceeded declared compact bit width. + RewardEncodingExceedsBitWidth { + /// Encoded compact reward value. + encoded: u64, + /// Declared compact reward bit width. + reward_bits: u8, + }, + /// Observation word stream length differs from declared schema. + ObservationLengthMismatch { + /// Actual number of observation words emitted. + actual_len: usize, + /// Declared number of observation words. + expected_len: usize, + }, + /// Observation word exceeded declared observation bit width. + ObservationWordOutOfRange { + /// Word index in observation stream. + index: usize, + /// Actual encoded word value. + word: u64, + /// Maximum representable word value for the schema. + max_word: u64, + }, /// Encoded action had no valid decoding. InvalidActionEncoding { /// Encoded action value. @@ -30,6 +55,56 @@ pub enum CompactError { }, } +impl fmt::Display for CompactError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::RewardOutOfRange { + reward, + min_reward, + max_reward, + } => write!( + f, + "reward {reward} is outside compact range [{min_reward}, {max_reward}]" + ), + Self::EncodedRewardOutOfRange { + encoded, + min_reward, + max_reward, + } => write!( + f, + "encoded reward {encoded} decodes outside compact range [{min_reward}, {max_reward}]" + ), + Self::RewardEncodingExceedsBitWidth { + encoded, + reward_bits, + } => write!( + f, + "encoded reward {encoded} exceeds declared reward bit width {reward_bits}" + ), + Self::ObservationLengthMismatch { + actual_len, + expected_len, + } => write!( + f, + "observation stream length {actual_len} does not match declared length {expected_len}" + ), + Self::ObservationWordOutOfRange { + index, + word, + max_word, + } => write!( + f, + "observation word {index} has value {word}, exceeding schema maximum {max_word}" + ), + Self::InvalidActionEncoding { encoded } => { + write!(f, "invalid action encoding {encoded}") + } + } + } +} + +impl std::error::Error for CompactError {} + /// Compact schema descriptor for action/observation/reward encoding. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct CompactSpec { @@ -61,6 +136,53 @@ impl CompactSpec { } } + /// Maximum representable compact reward value from declared bit width. + pub fn max_reward_value(&self) -> u64 { + if self.reward_bits == 0 { + 0 + } else if self.reward_bits >= 64 { + u64::MAX + } else { + (1u64 << self.reward_bits) - 1 + } + } + + /// Validates one encoded reward against declared reward bit width. + pub fn validate_encoded_reward_bits(&self, encoded: u64) -> Result<(), CompactError> { + if encoded > self.max_reward_value() { + return Err(CompactError::RewardEncodingExceedsBitWidth { + encoded, + reward_bits: self.reward_bits, + }); + } + Ok(()) + } + + /// Validates a full observation stream against declared shape and bit bounds. + pub fn validate_observation_words(&self, words: &[u64]) -> Result<(), CompactError> { + if words.len() != self.observation_stream_len { + return Err(CompactError::ObservationLengthMismatch { + actual_len: words.len(), + expected_len: self.observation_stream_len, + }); + } + + let max_word = self.max_observation_value(); + let mut index = 0usize; + while index < words.len() { + let word = words[index]; + if word > max_word { + return Err(CompactError::ObservationWordOutOfRange { + index, + word, + max_word, + }); + } + index += 1; + } + Ok(()) + } + /// Encode reward and panic on out-of-range input. pub fn encode_reward(&self, reward: Reward) -> u64 { self.try_encode_reward(reward) @@ -90,11 +212,14 @@ impl CompactSpec { max_reward: self.max_reward, }); } - Ok(encoded as u64) + let encoded = encoded as u64; + self.validate_encoded_reward_bits(encoded)?; + Ok(encoded) } /// Checked reward decoder. pub fn try_decode_reward(&self, encoded: u64) -> Result { + self.validate_encoded_reward_bits(encoded)?; let decoded = i128::from(encoded) - i128::from(self.reward_offset); if decoded < i128::from(self.min_reward) || decoded > i128::from(self.max_reward) { return Err(CompactError::EncodedRewardOutOfRange { @@ -115,10 +240,18 @@ impl CompactSpec { /// Validate internal reward-range consistency. pub fn reward_range_is_sound(&self) -> bool { - self.min_reward <= self.max_reward - && self.try_encode_reward(self.min_reward).is_ok() - && self.try_encode_reward(self.max_reward).is_ok() - && self.encode_reward(self.min_reward) <= self.encode_reward(self.max_reward) + if self.min_reward > self.max_reward { + return false; + } + let Ok(min_encoded) = self.try_encode_reward(self.min_reward) else { + return false; + }; + let Ok(max_encoded) = self.try_encode_reward(self.max_reward) else { + return false; + }; + min_encoded <= max_encoded + && self.try_decode_reward(min_encoded).ok() == Some(self.min_reward) + && self.try_decode_reward(max_encoded).ok() == Some(self.max_reward) } } @@ -177,6 +310,36 @@ mod tests { assert_eq!(spec.try_encode_reward(-3).unwrap(), 0); assert_eq!(spec.try_encode_reward(2).unwrap(), 5); } + + #[test] + fn observation_stream_validation_catches_shape_errors() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 3, + observation_stream_len: 2, + reward_bits: 2, + min_reward: 0, + max_reward: 1, + reward_offset: 0, + }; + assert!(spec.validate_observation_words(&[1, 7]).is_ok()); + assert!(spec.validate_observation_words(&[1]).is_err()); + assert!(spec.validate_observation_words(&[1, 8]).is_err()); + } + + #[test] + fn reward_bit_width_is_enforced() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + }; + assert!(spec.try_decode_reward(4).is_err()); + } } #[cfg(kani)] @@ -200,4 +363,37 @@ mod proofs { assert_eq!(spec.decode_reward(encoded), reward); assert!(spec.reward_range_is_sound()); } + + #[kani::proof] + fn compact_observation_words_match_schema() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 1, + reward_offset: 0, + }; + let word: u64 = kani::any(); + if word <= spec.max_observation_value() { + assert!(spec.validate_observation_words(&[word]).is_ok()); + } else { + assert!(spec.validate_observation_words(&[word]).is_err()); + } + } + + #[kani::proof] + fn compact_reward_bit_width_is_enforced() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + }; + assert!(spec.try_decode_reward(4).is_err()); + } } diff --git a/src/core/env.rs b/src/core/env.rs index f2ef4e3..b1dc43d 100644 --- a/src/core/env.rs +++ b/src/core/env.rs @@ -3,6 +3,7 @@ use core::fmt; use crate::buffer::{Buffer, FixedVec}; +use crate::compact::CompactError; use crate::core::observe::{Observe, Observer}; use crate::session::{HistoryStore, SessionKernel}; use crate::types::{PlayerAction, PlayerId, Reward, Seed}; @@ -68,6 +69,11 @@ pub enum EnvError { /// Maximum words accepted by this environment wrapper. max_words: usize, }, + /// Observation stream violated the compact schema constraints. + InvalidObservationEncoding { + /// Canonical compact constraint violation details. + reason: CompactError, + }, /// Reward cannot be represented by the configured compact reward range. RewardOutOfRange { /// Raw out-of-range reward. @@ -77,6 +83,11 @@ pub enum EnvError { /// Maximum representable reward. max: Reward, }, + /// Reward encoding violated compact schema constraints. + InvalidRewardEncoding { + /// Canonical compact constraint violation details. + reason: CompactError, + }, /// Selected agent player id is outside game player range. InvalidAgentPlayer { /// Requested player id. @@ -102,12 +113,18 @@ impl fmt::Display for EnvError { "observation packet requires {actual_words} words but maximum is {max_words}" ) } + Self::InvalidObservationEncoding { reason } => { + write!(f, "observation does not satisfy compact schema: {reason}") + } Self::RewardOutOfRange { reward, min, max } => { write!( f, "reward {reward} is outside compact spec range [{min}, {max}]" ) } + Self::InvalidRewardEncoding { reason } => { + write!(f, "reward does not satisfy compact schema: {reason}") + } Self::InvalidAgentPlayer { player, player_count, @@ -123,9 +140,19 @@ impl std::error::Error for EnvError {} /// Minimal infotheory-compatible compact environment interface. pub trait InfotheoryEnvironment { + /// Parameter bundle used to initialize/reset environment state. + type Params; + /// Resets environment state and returns initial compact observation. fn reset_seed(&mut self, seed: Seed) -> Result, EnvError>; + /// Resets environment state from explicit params and returns compact observation. + fn reset_seed_with_params( + &mut self, + seed: Seed, + params: Self::Params, + ) -> Result, EnvError>; + /// Steps environment using compact action bits. fn step_bits(&mut self, action_bits: u64) -> Result, EnvError>; } @@ -142,28 +169,34 @@ where agent_player: PlayerId, } -/// Default environment alias with fixed history and packet capacity. +/// Default environment alias with dynamic history and packet capacity. pub type DefaultEnvironment = - Environment, MAX_WORDS>; + Environment, MAX_WORDS>; impl Environment where G: Observe, H: HistoryStore, { - /// Creates a new compact environment. - pub fn new(game: G, seed: Seed, observer: Observer) -> Self { + /// Creates a new compact environment initialized with explicit params. + pub fn new_with_params(game: G, seed: Seed, observer: Observer, params: G::Params) -> Self { let agent_player = match observer { Observer::Player(player) => player, Observer::Spectator => 0, }; Self { - session: SessionKernel::new(game, seed), + session: SessionKernel::new_with_params(game, seed, params), observer, agent_player, } } + /// Creates a new compact environment. + pub fn new(game: G, seed: Seed, observer: Observer) -> Self { + let params = game.default_params(); + Self::new_with_params(game, seed, observer, params) + } + /// Returns immutable access to the underlying session kernel. pub fn session(&self) -> &SessionKernel { &self.session @@ -203,6 +236,16 @@ where self.encode_current_observation() } + /// Resets state from explicit params and returns initial compact observation. + pub fn reset_with_params( + &mut self, + seed: Seed, + params: G::Params, + ) -> Result, EnvError> { + self.session.reset_with_params(seed, params); + self.encode_current_observation() + } + /// Steps the environment from an encoded action value. pub fn step(&mut self, action_bits: u64) -> Result, EnvError> { if self.session.is_terminal() { @@ -237,13 +280,16 @@ where }; let spec = self.session.game().compact_spec(); - let encoded_reward = - spec.try_encode_reward(reward) - .map_err(|_| EnvError::RewardOutOfRange { + let encoded_reward = spec + .try_encode_reward(reward) + .map_err(|reason| match reason { + CompactError::RewardOutOfRange { .. } => EnvError::RewardOutOfRange { reward, min: spec.min_reward, max: spec.max_reward, - })?; + }, + other => EnvError::InvalidRewardEncoding { reason: other }, + })?; Ok(EnvStep { observation_bits: self.encode_current_observation()?, @@ -268,6 +314,11 @@ where max_words: MAX_WORDS, }); } + self.session + .game() + .compact_spec() + .validate_observation_words(encoded.as_slice()) + .map_err(|reason| EnvError::InvalidObservationEncoding { reason })?; let mut packet = BitPacket::default(); for &word in encoded.as_slice() { @@ -282,11 +333,22 @@ where G: Observe, H: HistoryStore, { + type Params = G::Params; + /// Resets environment and emits initial packet. fn reset_seed(&mut self, seed: Seed) -> Result, EnvError> { self.reset(seed) } + /// Resets environment from explicit params and emits initial packet. + fn reset_seed_with_params( + &mut self, + seed: Seed, + params: Self::Params, + ) -> Result, EnvError> { + self.reset_with_params(seed, params) + } + /// Steps environment with compact action bits. fn step_bits(&mut self, action_bits: u64) -> Result, EnvError> { self.step(action_bits) @@ -308,6 +370,7 @@ mod regression_tests { #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] struct DemoState { terminal: bool, + marker: u8, } #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] @@ -316,11 +379,17 @@ mod regression_tests { Step, } + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct BadObservationGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct BadRewardGame; + impl Game for DemoGame { + type Params = u8; type State = DemoState; type Action = DemoAction; - type PlayerObservation = u8; - type SpectatorObservation = u8; + type Obs = u8; type WorldView = u8; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -328,6 +397,10 @@ mod regression_tests { type RewardBuf = FixedVec; type WordBuf = FixedVec; + fn default_params(&self) -> Self::Params { + 0 + } + fn name(&self) -> &'static str { "demo" } @@ -336,8 +409,11 @@ mod regression_tests { 2 } - fn init(&self, _seed: Seed) -> Self::State { - DemoState { terminal: false } + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + DemoState { + terminal: false, + marker: *params, + } } fn is_terminal(&self, state: &Self::State) -> bool { @@ -359,15 +435,11 @@ mod regression_tests { } } - fn observe_player( - &self, - _state: &Self::State, - player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, _state: &Self::State, player: PlayerId) -> Self::Obs { player as u8 } - fn observe_spectator(&self, _state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { 99 } @@ -420,22 +492,209 @@ mod regression_tests { (encoded == 0).then_some(DemoAction::Step) } - fn encode_player_observation( + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(100 + u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(200 + u64::from(*observation)).unwrap(); + } + } + + impl Game for BadObservationGame { + type Params = (); + type State = (); + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "bad-observation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State {} + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, ) { out.clear(); - out.push(100 + u64::from(*observation)).unwrap(); + out.push(0).unwrap(); + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 8 } - fn encode_spectator_observation( + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { out.clear(); - out.push(200 + u64::from(*observation)).unwrap(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for BadRewardGame { + type Params = (); + type State = bool; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "bad-reward" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + false + } + + fn is_terminal(&self, state: &Self::State) -> bool { + *state + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, _player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 3, + }) + .unwrap(); + *state = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); } } @@ -461,6 +720,275 @@ mod regression_tests { let packet = env.encode_current_observation().unwrap(); assert_eq!(packet.words(), &[299]); } + + #[test] + fn reset_with_params_updates_session_seed_params_state() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + assert_eq!(env.session().state().marker, 0); + env.reset_with_params(11, 42).unwrap(); + assert_eq!(env.session().current_tick(), 0); + assert_eq!(env.session().state().marker, 42); + } + + #[test] + fn observation_schema_violations_are_rejected() { + let env = DefaultEnvironment::::new( + BadObservationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.encode_current_observation(), + Err(EnvError::InvalidObservationEncoding { .. }) + )); + } + + #[test] + fn reward_bit_width_violations_are_rejected() { + let mut env = + DefaultEnvironment::::new(BadRewardGame, 1, Observer::Player(0)); + assert!(matches!( + env.step(0), + Err(EnvError::InvalidRewardEncoding { .. }) + )); + } +} + +#[cfg(kani)] +mod proofs { + use super::{DefaultEnvironment, EnvError, Observer}; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct ObservationViolationGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct RewardBitsViolationGame; + + impl Game for ObservationViolationGame { + type Params = (); + type State = (); + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "observation-violation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State {} + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 8 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for RewardBitsViolationGame { + type Params = (); + type State = bool; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "reward-violation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + false + } + + fn is_terminal(&self, state: &Self::State) -> bool { + *state + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, _player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 3, + }) + .unwrap(); + *state = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + #[kani::proof] + fn env_rejects_invalid_observation_words() { + let env = DefaultEnvironment::::new( + ObservationViolationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.encode_current_observation(), + Err(EnvError::InvalidObservationEncoding { .. }) + )); + } + + #[kani::proof] + fn env_rejects_reward_encoding_that_exceeds_bit_width() { + let mut env = DefaultEnvironment::::new( + RewardBitsViolationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.step(0), + Err(EnvError::InvalidRewardEncoding { .. }) + )); + } } #[cfg(all(test, feature = "builtin"))] diff --git a/src/core/mod.rs b/src/core/mod.rs index 4eedd06..7095a0a 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -8,6 +8,7 @@ pub mod stepper; pub use crate::buffer::{BitWords, Buffer, CapacityError, FixedVec}; pub use crate::compact::CompactSpec; +pub use crate::core::single_player::SinglePlayerGame; pub use crate::game::Game; pub use crate::rng::{DeterministicRng, SplitMix64}; pub use crate::session::{ diff --git a/src/core/observe.rs b/src/core/observe.rs index 1c811da..f6ac94c 100644 --- a/src/core/observe.rs +++ b/src/core/observe.rs @@ -1,7 +1,5 @@ //! Observation adapter trait and viewpoint selection types. -use core::fmt::Debug; - use crate::game::Game; use crate::types::PlayerId; @@ -16,9 +14,6 @@ pub enum Observer { /// Adapter trait for producing and encoding generic observations. pub trait Observe: Game { - /// Observation type emitted by this adapter. - type Obs: Clone + Debug + Default + Eq + PartialEq; - /// Builds an observation for the selected viewpoint. fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs; @@ -45,10 +40,8 @@ pub trait Observe: Game { impl Observe for G where - G: Game::PlayerObservation>, + G: Game, { - type Obs = G::PlayerObservation; - fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs { match who { Observer::Player(player) => self.observe_player(state, player), diff --git a/src/core/single_player.rs b/src/core/single_player.rs index d2f1f6b..005696a 100644 --- a/src/core/single_player.rs +++ b/src/core/single_player.rs @@ -1,11 +1,24 @@ -//! Reusable helpers for deterministic single-player environments. +//! Reusable helpers and authoring adapter for deterministic single-player games. -use crate::buffer::Buffer; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward}; +use core::fmt::Debug; +use core::hash::Hash; + +use crate::buffer::{Buffer, FixedVec}; +use crate::compact::{CompactError, CompactSpec}; +use crate::game::Game; +use crate::rng::DeterministicRng; +use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome}; /// Canonical acting player id used by single-player environments. pub const SOLO_PLAYER: PlayerId = 0; +/// Canonical fixed-capacity player buffer for single-player games. +pub type SinglePlayerBuf = FixedVec; +/// Canonical fixed-capacity joint-action buffer for single-player games. +pub type SinglePlayerJointActionBuf = FixedVec, 1>; +/// Canonical fixed-capacity reward buffer for single-player games. +pub type SinglePlayerRewardBuf = FixedVec; + /// Returns true when `player` can act in a non-terminal single-player state. pub const fn can_act(player: PlayerId, terminal: bool) -> bool { player == SOLO_PLAYER && !terminal @@ -43,3 +56,280 @@ where }) .unwrap(); } + +/// Ergonomic authoring trait for deterministic single-player games. +/// +/// Implement this trait to avoid repeating boilerplate for: +/// +/// - player-id dispatch (`player_count = 1`, `players_to_act`, legality gating), +/// - joint-action extraction (`Option` from one-player action stream), +/// - fixed-capacity reward and joint-action buffer wiring. +pub trait SinglePlayerGame { + /// Parameter bundle used to initialize/reset game state. + type Params: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Concrete game state. + type State: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Atomic action type. + type Action: Clone + Copy + Debug + Default + Eq + Hash + PartialEq; + /// Canonical observation type. + type Obs: Clone + Debug + Default + Eq + PartialEq; + /// Render/debug world view. + type WorldView: Clone + Debug + Default + Eq + PartialEq; + /// Buffer type for legal actions. + type ActionBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for compact observation words. + type WordBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + + /// Stable machine-readable game name. + fn name(&self) -> &'static str; + /// Returns default parameter bundle used by `init` and `SessionKernel::new`. + fn default_params(&self) -> Self::Params { + Self::Params::default() + } + /// Initialize deterministic state from a seed and parameter bundle. + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; + /// Whether the state is terminal. + fn is_terminal(&self, state: &Self::State) -> bool; + /// Emit legal actions for the single acting player in the current state. + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf); + /// Build player observation. + fn observe_player(&self, state: &Self::State) -> Self::Obs; + /// Build spectator observation. + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + self.observe_player(state) + } + /// Build world/debug view. + fn world_view(&self, state: &Self::State) -> Self::WorldView; + /// Apply one transition in-place from an optional single-player action. + fn step_in_place( + &self, + state: &mut Self::State, + action: Option, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ); + + /// Compact codec descriptor for actions, observations, and rewards. + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 0, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + /// Encode an action into compact integer representation. + fn encode_action(&self, _action: &Self::Action) -> u64 { + 0 + } + + /// Decode a compact action value. + fn decode_action(&self, _encoded: u64) -> Option { + None + } + + /// Checked action decoding helper that yields a structured error. + fn decode_action_checked(&self, encoded: u64) -> Result { + self.decode_action(encoded) + .ok_or(CompactError::InvalidActionEncoding { encoded }) + } + + /// Encode a player observation into compact words. + fn encode_player_observation(&self, _observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + } + + /// Encode a spectator observation into compact words. + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + + /// State invariant used by checked stepping and proof helpers. + fn state_invariant(&self, _state: &Self::State) -> bool { + true + } + + /// Action invariant used by checked stepping and proof helpers. + fn action_invariant(&self, _action: &Self::Action) -> bool { + true + } + + /// Invariant for player observations. + fn player_observation_invariant(&self, _state: &Self::State, _observation: &Self::Obs) -> bool { + true + } + + /// Invariant for spectator observations. + fn spectator_observation_invariant( + &self, + _state: &Self::State, + _observation: &Self::Obs, + ) -> bool { + true + } + + /// Invariant for world/debug views. + fn world_view_invariant(&self, _state: &Self::State, _world: &Self::WorldView) -> bool { + true + } + + /// Transition postcondition checked in instrumented stepping. + fn transition_postcondition( + &self, + _pre: &Self::State, + _action: Option, + _post: &Self::State, + _outcome: &StepOutcome, + ) -> bool { + true + } +} + +impl Game for T +where + T: SinglePlayerGame, +{ + type Params = T::Params; + type State = T::State; + type Action = T::Action; + type Obs = T::Obs; + type WorldView = T::WorldView; + type PlayerBuf = SinglePlayerBuf; + type ActionBuf = T::ActionBuf; + type JointActionBuf = SinglePlayerJointActionBuf; + type RewardBuf = SinglePlayerRewardBuf; + type WordBuf = T::WordBuf; + + fn name(&self) -> &'static str { + ::name(self) + } + + fn player_count(&self) -> usize { + 1 + } + + fn default_params(&self) -> Self::Params { + ::default_params(self) + } + + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State { + ::init_with_params(self, seed, params) + } + + fn is_terminal(&self, state: &Self::State) -> bool { + ::is_terminal(self, state) + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + write_players_to_act(out, self.is_terminal(state)); + } + + fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !can_act(player, self.is_terminal(state)) { + return; + } + ::legal_actions(self, state, out); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + ::observe_player(self, state) + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + ::observe_spectator(self, state) + } + + fn world_view(&self, state: &Self::State) -> Self::WorldView { + ::world_view(self, state) + } + + fn step_in_place( + &self, + state: &mut Self::State, + joint_actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + ::step_in_place( + self, + state, + first_action(joint_actions.as_slice()), + rng, + out, + ); + } + + fn compact_spec(&self) -> CompactSpec { + ::compact_spec(self) + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + ::encode_action(self, action) + } + + fn decode_action(&self, encoded: u64) -> Option { + ::decode_action(self, encoded) + } + + fn decode_action_checked(&self, encoded: u64) -> Result { + ::decode_action_checked(self, encoded) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + ::encode_player_observation(self, observation, out) + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + ::encode_spectator_observation(self, observation, out) + } + + fn state_invariant(&self, state: &Self::State) -> bool { + ::state_invariant(self, state) + } + + fn action_invariant(&self, action: &Self::Action) -> bool { + ::action_invariant(self, action) + } + + fn player_observation_invariant( + &self, + state: &Self::State, + _player: PlayerId, + observation: &Self::Obs, + ) -> bool { + ::player_observation_invariant(self, state, observation) + } + + fn spectator_observation_invariant( + &self, + state: &Self::State, + observation: &Self::Obs, + ) -> bool { + ::spectator_observation_invariant(self, state, observation) + } + + fn world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { + ::world_view_invariant(self, state, world) + } + + fn transition_postcondition( + &self, + pre: &Self::State, + actions: &Self::JointActionBuf, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + ::transition_postcondition( + self, + pre, + first_action(actions.as_slice()), + post, + outcome, + ) + } +} diff --git a/src/game.rs b/src/game.rs index 6069e45..d11db29 100644 --- a/src/game.rs +++ b/src/game.rs @@ -13,14 +13,14 @@ use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome}; /// Implementations provide pure state transition logic plus compact codec hooks /// for actions and observations. pub trait Game { + /// Parameter bundle used to initialize/reset game state. + type Params: Clone + Debug + Default + Eq + Hash + PartialEq; /// Concrete game state. type State: Clone + Debug + Default + Eq + Hash + PartialEq; /// Atomic player action type. type Action: Clone + Copy + Debug + Default + Eq + Hash + PartialEq; - /// Per-player observation type. - type PlayerObservation: Clone + Debug + Default + Eq + PartialEq; - /// Spectator observation type. - type SpectatorObservation: Clone + Debug + Default + Eq + PartialEq; + /// Canonical observation type shared across all viewpoints. + type Obs: Clone + Debug + Default + Eq + PartialEq; /// Render/debug world view type. type WorldView: Clone + Debug + Default + Eq + PartialEq; /// Buffer type for active-player lists. @@ -44,8 +44,19 @@ pub trait Game { fn name(&self) -> &'static str; /// Total number of players in the game. fn player_count(&self) -> usize; + /// Returns default parameter bundle used by `init` and `SessionKernel::new`. + fn default_params(&self) -> Self::Params { + Self::Params::default() + } + + /// Initialize deterministic state from a seed and parameter bundle. + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; + /// Initialize the deterministic state from a seed. - fn init(&self, seed: Seed) -> Self::State; + fn init(&self, seed: Seed) -> Self::State { + let params = self.default_params(); + self.init_with_params(seed, ¶ms) + } /// Whether the state is terminal. fn is_terminal(&self, state: &Self::State) -> bool; /// Emit active players for the current tick. @@ -53,9 +64,9 @@ pub trait Game { /// Emit legal actions for a player in the current state. fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf); /// Build a player-scoped observation. - fn observe_player(&self, state: &Self::State, player: PlayerId) -> Self::PlayerObservation; + fn observe_player(&self, state: &Self::State, player: PlayerId) -> Self::Obs; /// Build a spectator observation. - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation; + fn observe_spectator(&self, state: &Self::State) -> Self::Obs; /// Build a world/debug view consumed by render and tooling. fn world_view(&self, state: &Self::State) -> Self::WorldView; /// Apply one transition in-place. @@ -97,21 +108,13 @@ pub trait Game { } /// Encode a player observation into compact words. - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { let _ = observation; out.clear(); } /// Encode a spectator observation into compact words. - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { let _ = observation; out.clear(); } @@ -124,20 +127,9 @@ pub trait Game { /// Validate compact observation shape against the declared compact spec. fn compact_invariant(&self, words: &Self::WordBuf) -> bool { - let spec = self.compact_spec(); - if words.len() != spec.observation_stream_len { - return false; - } - let max_value = spec.max_observation_value(); - let slice = words.as_slice(); - let mut index = 0usize; - while index < slice.len() { - if slice[index] > max_value { - return false; - } - index += 1; - } - true + self.compact_spec() + .validate_observation_words(words.as_slice()) + .is_ok() } /// State invariant used by checked stepping and proof helpers. @@ -155,7 +147,7 @@ pub trait Game { &self, _state: &Self::State, _player: PlayerId, - _observation: &Self::PlayerObservation, + _observation: &Self::Obs, ) -> bool { true } @@ -164,7 +156,7 @@ pub trait Game { fn spectator_observation_invariant( &self, _state: &Self::State, - _observation: &Self::SpectatorObservation, + _observation: &Self::Obs, ) -> bool { true } diff --git a/src/lib.rs b/src/lib.rs index 1b21589..0f9a0f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ pub mod verification; pub use buffer::{BitWords, Buffer, CapacityError, FixedVec}; pub use compact::CompactSpec; +pub use core::single_player::SinglePlayerGame; pub use game::Game; pub use policy::{FirstLegalPolicy, FnPolicy, Policy, RandomPolicy, ScriptedPolicy}; pub use rng::{DeterministicRng, SplitMix64}; diff --git a/src/policy.rs b/src/policy.rs index b19d2ee..4b189be 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -14,7 +14,7 @@ pub trait Policy { game: &G, state: &G::State, player: PlayerId, - observation: &G::PlayerObservation, + observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action; @@ -30,7 +30,7 @@ impl Policy for FirstLegalPolicy { _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], _rng: &mut DeterministicRng, ) -> G::Action { @@ -51,7 +51,7 @@ impl Policy for RandomPolicy { _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action { @@ -97,7 +97,7 @@ where _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], _rng: &mut DeterministicRng, ) -> G::Action { @@ -144,21 +144,14 @@ impl FnPolicy { impl Policy for FnPolicy where G: Game, - F: FnMut( - &G, - &G::State, - PlayerId, - &G::PlayerObservation, - &[G::Action], - &mut DeterministicRng, - ) -> G::Action, + F: FnMut(&G, &G::State, PlayerId, &G::Obs, &[G::Action], &mut DeterministicRng) -> G::Action, { fn choose_action( &mut self, game: &G, state: &G::State, player: PlayerId, - observation: &G::PlayerObservation, + observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action { diff --git a/src/registry/mod.rs b/src/registry/mod.rs index 1549309..eacbe5c 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -1,17 +1,5 @@ //! Static registry describing builtin games and policy metadata. -/// Statically known builtin game kind. -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum GameKind { - /// Deterministic tic-tac-toe. - TicTacToe, - /// Deterministic blackjack. - Blackjack, - /// Deterministic physics-backed platformer. - #[cfg(feature = "physics")] - Platformer, -} - /// Policy metadata surfaced by CLI and UI. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct PolicyDescriptor { @@ -29,12 +17,13 @@ pub struct ControlMap { } /// Full static descriptor for one builtin game. -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[derive(Clone, Copy, Debug)] pub struct GameDescriptor { - /// Internal game discriminator. - pub kind: GameKind, /// Stable external game name. pub name: &'static str, + /// CLI runner callback used by descriptor-driven dispatch. + #[cfg(feature = "cli")] + pub(crate) runner: fn(crate::cli::CliConfig, crate::cli::RunMode) -> Result<(), String>, /// Optional controls metadata for interactive frontends. pub controls: Option<&'static ControlMap>, /// True when the default renderer supports this game. @@ -81,24 +70,27 @@ pub fn all_games() -> &'static [GameDescriptor] { { static GAMES: [GameDescriptor; 3] = [ GameDescriptor { - kind: GameKind::TicTacToe, name: "tictactoe", + #[cfg(feature = "cli")] + runner: crate::cli::run_tictactoe, controls: Some(&TICTACTOE_CONTROLS), default_renderer: cfg!(feature = "render"), physics_renderer: false, policies: &STANDARD_POLICIES, }, GameDescriptor { - kind: GameKind::Blackjack, name: "blackjack", + #[cfg(feature = "cli")] + runner: crate::cli::run_blackjack, controls: Some(&BLACKJACK_CONTROLS), default_renderer: cfg!(feature = "render"), physics_renderer: false, policies: &STANDARD_POLICIES, }, GameDescriptor { - kind: GameKind::Platformer, name: "platformer", + #[cfg(feature = "cli")] + runner: crate::cli::run_platformer, controls: Some(&PLATFORMER_CONTROLS), default_renderer: cfg!(feature = "render"), physics_renderer: cfg!(feature = "render"), @@ -112,16 +104,18 @@ pub fn all_games() -> &'static [GameDescriptor] { { static GAMES: [GameDescriptor; 2] = [ GameDescriptor { - kind: GameKind::TicTacToe, name: "tictactoe", + #[cfg(feature = "cli")] + runner: crate::cli::run_tictactoe, controls: Some(&TICTACTOE_CONTROLS), default_renderer: cfg!(feature = "render"), physics_renderer: false, policies: &STANDARD_POLICIES, }, GameDescriptor { - kind: GameKind::Blackjack, name: "blackjack", + #[cfg(feature = "cli")] + runner: crate::cli::run_blackjack, controls: Some(&BLACKJACK_CONTROLS), default_renderer: cfg!(feature = "render"), physics_renderer: false, diff --git a/src/render/builtin.rs b/src/render/builtin.rs index 203dd93..1e440cf 100644 --- a/src/render/builtin.rs +++ b/src/render/builtin.rs @@ -809,7 +809,7 @@ mod tests { use crate::session::Session; type TicTacToeDriver = - TurnBasedDriver>; + TurnBasedDriver>; fn tictactoe_view() -> (TicTacToeDriver, FrameMetrics) { ( diff --git a/src/render/runtime.rs b/src/render/runtime.rs index 3515c33..68a0437 100644 --- a/src/render/runtime.rs +++ b/src/render/runtime.rs @@ -193,8 +193,8 @@ pub trait OraclePresenter: Presenter {} #[derive(Debug)] pub(crate) struct ViewCache { tick: Tick, - player_observation: G::PlayerObservation, - spectator_observation: G::SpectatorObservation, + player_observation: G::Obs, + spectator_observation: G::Obs, world_view: G::WorldView, previous_world_view: Option, last_outcome: Option>, @@ -254,12 +254,12 @@ impl<'a, G: Game> RenderGameView<'a, G> { } /// Returns player-local observation. - pub fn player_observation(&self) -> &G::PlayerObservation { + pub fn player_observation(&self) -> &G::Obs { &self.cache.player_observation } /// Returns spectator observation. - pub fn spectator_observation(&self) -> &G::SpectatorObservation { + pub fn spectator_observation(&self) -> &G::Obs { &self.cache.spectator_observation } @@ -1485,10 +1485,10 @@ mod tests { } impl Game for CounterGame { + type Params = (); type State = CounterState; type Action = u8; - type PlayerObservation = CounterState; - type SpectatorObservation = CounterState; + type Obs = CounterState; type WorldView = CounterState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -1504,7 +1504,7 @@ mod tests { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { CounterState::default() } @@ -1530,15 +1530,11 @@ mod tests { out.push(1).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } diff --git a/src/session.rs b/src/session.rs index fd515ea..c8ba777 100644 --- a/src/session.rs +++ b/src/session.rs @@ -356,6 +356,7 @@ where #[derive(Clone, Debug)] pub struct SessionKernel> { game: G, + params: G::Params, state: G::State, rng: DeterministicRng, tick: Tick, @@ -366,20 +367,27 @@ pub struct SessionKernel> { outcome: StepOutcome, } -/// Default fixed-history session alias. -pub type Session = SessionKernel>; +/// Default dynamic-history session alias. +pub type Session = SessionKernel>; /// Interactive dynamic-history session alias. pub type InteractiveSession = SessionKernel>; impl> SessionKernel { /// Creates a new session initialized from `seed`. pub fn new(game: G, seed: Seed) -> Self { - let state = game.init(seed); + let params = game.default_params(); + Self::new_with_params(game, seed, params) + } + + /// Creates a new session initialized from `seed` and explicit params. + pub fn new_with_params(game: G, seed: Seed, params: G::Params) -> Self { + let state = game.init_with_params(seed, ¶ms); assert!(game.state_invariant(&state)); let rng = DeterministicRng::from_seed_and_stream(seed, 1); let history = H::from_seed(seed, &state, rng); Self { game, + params, state, rng, tick: 0, @@ -393,7 +401,14 @@ impl> SessionKernel { /// Resets session state and history to `seed`. pub fn reset(&mut self, seed: Seed) { - self.state = self.game.init(seed); + let params = self.params.clone(); + self.reset_with_params(seed, params); + } + + /// Resets session state/history to `seed` and updates active params. + pub fn reset_with_params(&mut self, seed: Seed, params: G::Params) { + self.params = params; + self.state = self.game.init_with_params(seed, &self.params); self.rng = DeterministicRng::from_seed_and_stream(seed, 1); self.tick = 0; self.history.reset(seed, &self.state, self.rng); @@ -408,6 +423,11 @@ impl> SessionKernel { &self.game } + /// Returns active parameter bundle used by resets and initial state creation. + pub fn params(&self) -> &G::Params { + &self.params + } + /// Returns current game state. pub fn state(&self) -> &G::State { &self.state @@ -439,12 +459,12 @@ impl> SessionKernel { } /// Returns player-local observation. - pub fn player_observation(&self, player: usize) -> G::PlayerObservation { + pub fn player_observation(&self, player: usize) -> G::Obs { self.game.observe_player(&self.state, player) } /// Returns spectator observation. - pub fn spectator_observation(&self) -> G::SpectatorObservation { + pub fn spectator_observation(&self) -> G::Obs { self.game.observe_spectator(&self.state) } @@ -750,10 +770,10 @@ mod tests { } impl Game for SpinnerGame { + type Params = (); type State = SpinnerState; type Action = u8; - type PlayerObservation = SpinnerState; - type SpectatorObservation = SpinnerState; + type Obs = SpinnerState; type WorldView = SpinnerState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -769,7 +789,7 @@ mod tests { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { SpinnerState { tick: 0 } } @@ -792,15 +812,11 @@ mod tests { out.push(0).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } @@ -866,10 +882,10 @@ mod proofs { } impl Game for CounterGame { + type Params = (); type State = CounterState; type Action = u8; - type PlayerObservation = CounterState; - type SpectatorObservation = CounterState; + type Obs = CounterState; type WorldView = CounterState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -885,7 +901,7 @@ mod proofs { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { CounterState { value: 0, terminal: false, @@ -914,15 +930,11 @@ mod proofs { out.push(1).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } diff --git a/src/verification.rs b/src/verification.rs index 2d74abf..36441bc 100644 --- a/src/verification.rs +++ b/src/verification.rs @@ -90,10 +90,10 @@ mod tests { struct MinimalState; impl Game for MinimalGame { + type Params = (); type State = MinimalState; type Action = u8; - type PlayerObservation = u8; - type SpectatorObservation = u8; + type Obs = u8; type WorldView = u8; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -109,7 +109,7 @@ mod tests { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { MinimalState } @@ -132,15 +132,11 @@ mod tests { out.push(0).unwrap(); } - fn observe_player( - &self, - _state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { 0 } - fn observe_spectator(&self, _state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { 0 } diff --git a/tests/validation.rs b/tests/validation.rs index 12f1e46..fab4b5a 100644 --- a/tests/validation.rs +++ b/tests/validation.rs @@ -5,13 +5,15 @@ use std::cell::Cell; use std::sync::Mutex; use std::sync::atomic::{AtomicUsize, Ordering}; +#[cfg(feature = "parallel")] +use gameengine::InteractiveSession; use gameengine::buffer::Buffer; use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{ - CompactSpec, DeterministicRng, FixedVec, Game, InteractiveSession, PlayerAction, PlayerReward, - Session, StepOutcome, stable_hash, + CompactSpec, DeterministicRng, FixedVec, Game, PlayerAction, PlayerReward, Session, + StepOutcome, stable_hash, }; struct CountingAllocator; @@ -278,7 +280,7 @@ fn golden_compact_traces_match_expected_values() { compact, vec![vec![8193], vec![139521], vec![141573], vec![141589]] ); - assert_eq!(trace_hash, 0xfcb1_5a37_9487_30e3); + assert_eq!(trace_hash, 0x5b96_1efc_b075_3027); let blackjack_actions = vec![vec![PlayerAction { player: 0, @@ -286,7 +288,7 @@ fn golden_compact_traces_match_expected_values() { }]]; let (compact, trace_hash, _) = capture_compact_trace(Blackjack, 11, &blackjack_actions); assert_eq!(compact, vec![vec![140693832466, 1449, 132, 0]]); - assert_eq!(trace_hash, 0xd6d3_8ce4_845f_4206); + assert_eq!(trace_hash, 0xfb29_3f00_ff61_bdc7); #[cfg(feature = "physics")] let platformer_actions = vec![ @@ -385,7 +387,7 @@ fn golden_compact_traces_match_expected_values() { vec![2075], ] ); - assert_eq!(trace_hash, 0x1788_afb3_0dcd_0d2e); + assert_eq!(trace_hash, 0x1ee7_fb2e_3689_eabf); } } From 247a080f47a6fc4b46291aad5b93211a330c67a6 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 19:03:53 -0400 Subject: [PATCH 06/12] Update CI to properly use clippy --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 502b08c..8a541e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,7 +74,7 @@ jobs: PY - name: Clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: cargo +stable clippy --all-targets --all-features -- -D warnings - name: Install pinned Verus release run: | From c8543dde9adc43f21114bb9ce53b7ca523a76cb7 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 19:09:52 -0400 Subject: [PATCH 07/12] Fix verus unzip path in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a541e7..11e2a9f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,7 +81,7 @@ jobs: curl -fsSL "https://github.com/verus-lang/verus/releases/download/release%2F0.2026.03.28.3390e9a/verus-0.2026.03.28.3390e9a-x86-linux.zip" -o /tmp/verus.zip unzip -q /tmp/verus.zip -d /tmp rm -rf ./verus_binary - mv /tmp/verus_x86_64 ./verus_binary + mv /tmp/verus-x86-linux ./verus_binary chmod +x ./verus_binary/verus - name: Verus model checks From 3ea65863d43090d2decb811f7ead45a4c5f865a6 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 20:10:48 -0400 Subject: [PATCH 08/12] Add rustup install to CI so Verus can run --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11e2a9f..e189c01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,6 +84,9 @@ jobs: mv /tmp/verus-x86-linux ./verus_binary chmod +x ./verus_binary/verus + - name: Install Verus-required Rust toolchain + run: rustup toolchain install 1.94.0-x86_64-unknown-linux-gnu + - name: Verus model checks run: REQUIRE_VERUS=1 bash scripts/run-verus.sh From 43d4e33e3b6fe31f9c6a4f153c6deb3d6eba3b1b Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 20:59:07 -0400 Subject: [PATCH 09/12] Fix bugs --- src/builtin/platformer/mod.rs | 139 +++++++++++++++++++------ src/builtin/platformer/tests.rs | 58 +++++++++++ src/core/env.rs | 177 ++++++++++++++++++++++++++++++-- src/core/single_player.rs | 9 ++ src/game.rs | 5 + src/session.rs | 5 + tests/validation.rs | 34 +++--- 7 files changed, 368 insertions(+), 59 deletions(-) diff --git a/src/builtin/platformer/mod.rs b/src/builtin/platformer/mod.rs index d9dd424..3f1e9ec 100644 --- a/src/builtin/platformer/mod.rs +++ b/src/builtin/platformer/mod.rs @@ -18,6 +18,10 @@ const FIRST_BERRY_BODY_ID: u16 = 10; const PLATFORMER_BODIES: usize = 1 + BERRY_COUNT; const PLATFORMER_CONTACTS: usize = PLATFORMER_BODIES * (PLATFORMER_BODIES - 1) / 2; const ALL_BERRIES_MASK: u8 = 0b00_111111; +const PLATFORMER_Y_SHIFT: u8 = 8; +const PLATFORMER_REMAINING_BERRIES_SHIFT: u8 = 16; +const PLATFORMER_TERMINAL_SHIFT: u8 = 22; +const PLATFORMER_OBSERVATION_BITS: u8 = PLATFORMER_TERMINAL_SHIFT + 1; const PLATFORMER_ACTION_ORDER: [PlatformerAction; 4] = [ PlatformerAction::Stay, PlatformerAction::Left, @@ -89,6 +93,73 @@ impl Default for PlatformerConfig { } impl PlatformerConfig { + fn checked_step_reward( + self, + collected: u8, + finished: bool, + sprained: bool, + ) -> Option { + let mut reward = i128::from(self.berry_reward) * i128::from(collected); + if finished { + reward += i128::from(self.finish_bonus); + } + if sprained { + reward -= 1; + } + if reward < i128::from(Reward::MIN) || reward > i128::from(Reward::MAX) { + return None; + } + Some(reward as Reward) + } + + fn reward_bounds(self) -> Option<(Reward, Reward)> { + let mut min_reward = Reward::MAX; + let mut max_reward = Reward::MIN; + let mut collected = 0u8; + while collected <= BERRY_COUNT as u8 { + for finished in [false, true] { + if finished && collected == 0 { + continue; + } + for sprained in [false, true] { + let reward = self.checked_step_reward(collected, finished, sprained)?; + min_reward = min_reward.min(reward); + max_reward = max_reward.max(reward); + } + } + collected += 1; + } + Some((min_reward, max_reward)) + } + + fn compact_spec(self) -> Option { + let (min_reward, max_reward) = self.reward_bounds()?; + let reward_span = i128::from(max_reward) - i128::from(min_reward); + if reward_span < 0 || reward_span > i128::from(u64::MAX) { + return None; + } + let reward_offset = -i128::from(min_reward); + if reward_offset < i128::from(Reward::MIN) || reward_offset > i128::from(Reward::MAX) { + return None; + } + + let reward_bits = if reward_span == 0 { + 1 + } else { + (u64::BITS - (reward_span as u64).leading_zeros()) as u8 + }; + + Some(CompactSpec { + action_count: 4, + observation_bits: PLATFORMER_OBSERVATION_BITS, + observation_stream_len: 1, + reward_bits, + min_reward, + max_reward, + reward_offset: reward_offset as Reward, + }) + } + /// Returns the axis-aligned world bounds. pub fn arena_bounds(self) -> Aabb2 { Aabb2::new( @@ -136,6 +207,7 @@ impl PlatformerConfig { || self.sprain_denominator == 0 || self.sprain_numerator > self.sprain_denominator || self.berry_y >= self.height + || self.compact_spec().is_none() { return false; } @@ -232,7 +304,7 @@ impl Platformer { ); } - fn collect_berries_from_contacts(state: &mut PlatformerState) -> Reward { + fn collect_berries_from_contacts(state: &mut PlatformerState) -> (u8, bool) { let was_non_terminal = state.remaining_berries != 0; let mut remaining = u64::from(state.remaining_berries); let collected = collect_actor_trigger_contacts( @@ -243,12 +315,7 @@ impl Platformer { &mut remaining, ); state.remaining_berries = remaining as u8; - - let mut reward = state.config.berry_reward * i64::from(collected); - if was_non_terminal && state.remaining_berries == 0 { - reward += state.config.finish_bonus; - } - reward + (collected, was_non_terminal && state.remaining_berries == 0) } fn observation_from_state(state: &PlatformerState) -> PlatformerObservation { @@ -346,17 +413,17 @@ impl single_player::SinglePlayerGame for Platformer { ) { let action = action.unwrap_or(PlatformerAction::Stay); - let mut reward = 0; if self.is_terminal(state) { out.termination = Termination::Terminal { winner: Self::winner(state), }; + single_player::push_reward(&mut out.rewards, 0); } else { let config = state.config; let (current_x, _) = Self::player_position(state); - let (x, y) = match action { - PlatformerAction::Stay => (current_x, 0), - PlatformerAction::Left => (current_x.saturating_sub(1), 0), + let (x, y, sprained) = match action { + PlatformerAction::Stay => (current_x, 0, false), + PlatformerAction::Left => (current_x.saturating_sub(1), 0, false), PlatformerAction::Right => ( if current_x + config.player_width < config.width { current_x + 1 @@ -364,12 +431,12 @@ impl single_player::SinglePlayerGame for Platformer { current_x }, 0, + false, ), PlatformerAction::Jump => { - if rng.gen_bool_ratio(config.sprain_numerator, config.sprain_denominator) { - reward -= 1; - } - (current_x, config.jump_delta) + let sprained = + rng.gen_bool_ratio(config.sprain_numerator, config.sprain_denominator); + (current_x, config.jump_delta, sprained) } }; @@ -377,10 +444,14 @@ impl single_player::SinglePlayerGame for Platformer { .world .set_body_position_deferred(PLAYER_BODY_ID, config.player_center(x, y)); state.world.refresh_contacts(); - reward += Self::collect_berries_from_contacts(state); + let (collected, finished) = Self::collect_berries_from_contacts(state); self.sync_berries(state); state.world.step(); + let reward = config + .checked_step_reward(collected, finished, sprained) + .expect("validated platformer config produced an out-of-range reward"); + single_player::push_reward(&mut out.rewards, reward); out.termination = if self.is_terminal(state) { Termination::Terminal { winner: Self::winner(state), @@ -389,8 +460,6 @@ impl single_player::SinglePlayerGame for Platformer { Termination::Ongoing }; } - - single_player::push_reward(&mut out.rewards, reward); } fn state_invariant(&self, state: &Self::State) -> bool { @@ -464,30 +533,34 @@ impl single_player::SinglePlayerGame for Platformer { fn transition_postcondition( &self, - _pre: &Self::State, + pre: &Self::State, _action: Option, post: &Self::State, outcome: &StepOutcome, ) -> bool { + if pre.remaining_berries == 0 { + return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); + } + let Some((min_reward, max_reward)) = post.config.reward_bounds() else { + return false; + }; reward_and_terminal_postcondition( outcome.reward_for(0), - -1, - 11, + min_reward, + max_reward, post.remaining_berries == 0, outcome.is_terminal(), ) } fn compact_spec(&self) -> CompactSpec { - CompactSpec { - action_count: 4, - observation_bits: 12, - observation_stream_len: 1, - reward_bits: 4, - min_reward: -1, - max_reward: 11, - reward_offset: 1, - } + self.compact_spec_for_params(&self.config) + } + + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + params + .compact_spec() + .expect("invalid platformer config cannot produce compact spec") } fn encode_action(&self, action: &Self::Action) -> u64 { @@ -501,9 +574,9 @@ impl single_player::SinglePlayerGame for Platformer { fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { out.clear(); let packed = u64::from(observation.x) - | (u64::from(observation.y) << 4) - | (u64::from(observation.remaining_berries) << 5) - | ((observation.terminal as u64) << 11); + | (u64::from(observation.y) << PLATFORMER_Y_SHIFT) + | (u64::from(observation.remaining_berries) << PLATFORMER_REMAINING_BERRIES_SHIFT) + | ((observation.terminal as u64) << PLATFORMER_TERMINAL_SHIFT); out.push(packed).unwrap(); } } diff --git a/src/builtin/platformer/tests.rs b/src/builtin/platformer/tests.rs index 2921f48..5044ded 100644 --- a/src/builtin/platformer/tests.rs +++ b/src/builtin/platformer/tests.rs @@ -1,4 +1,6 @@ use super::*; +use crate::core::env::DefaultEnvironment; +use crate::core::observe::Observer; use crate::game::Game; use crate::session::Session; use crate::types::{PlayerAction, PlayerReward}; @@ -133,3 +135,59 @@ fn physics_world_tracks_actor_and_berries() { assert_eq!(world.physics.bodies.len(), PLATFORMER_BODIES); assert!(world.physics.invariant()); } + +#[test] +fn parameterized_rewards_update_transition_and_compact_contracts() { + let mut config = PlatformerConfig { + sprain_numerator: 0, + berry_reward: 4, + finish_bonus: 30, + ..PlatformerConfig::default() + }; + config.berry_y = config.jump_delta; + let game = Platformer::default(); + let spec = game.compact_spec_for_params(&config); + + let mut state = game.init_with_params(1, &config); + state.remaining_berries = 1; + game.sync_berries(&mut state); + state + .world + .set_body_position(PLAYER_BODY_ID, config.player_center(config.berry_xs[0], 0)); + + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + + assert_eq!(outcome.reward_for(0), 34); + assert!(spec.max_reward >= 34); + assert!(spec.try_encode_reward(34).is_ok()); +} + +#[test] +fn parameterized_environment_uses_wide_observation_schema() { + let config = PlatformerConfig { + width: 40, + height: 10, + jump_delta: 7, + berry_y: 7, + berry_xs: [1, 6, 11, 16, 21, 26], + ..PlatformerConfig::default() + }; + let mut env = DefaultEnvironment::::new( + Platformer::default(), + 3, + Observer::Player(0), + ); + let packet = env.reset_with_params(3, config).unwrap(); + assert_eq!(packet.words().len(), 1); + assert!(packet.words()[0] > 4095); +} diff --git a/src/core/env.rs b/src/core/env.rs index b1dc43d..a44f463 100644 --- a/src/core/env.rs +++ b/src/core/env.rs @@ -225,6 +225,24 @@ where self.agent_player } + fn validate_player(&self, player: PlayerId) -> Result<(), EnvError> { + let player_count = self.session.game().player_count(); + if player >= player_count { + return Err(EnvError::InvalidAgentPlayer { + player, + player_count, + }); + } + Ok(()) + } + + fn validate_observer(&self) -> Result<(), EnvError> { + if let Observer::Player(player) = self.observer { + self.validate_player(player)?; + } + Ok(()) + } + /// Sets the player id controlled by compact `step()` actions. pub fn set_agent_player(&mut self, player: PlayerId) { self.agent_player = player; @@ -258,13 +276,7 @@ where }); }; - let player_count = self.session.game().player_count(); - if self.agent_player >= player_count { - return Err(EnvError::InvalidAgentPlayer { - player: self.agent_player, - player_count, - }); - } + self.validate_player(self.agent_player)?; let mut actions = G::JointActionBuf::default(); actions @@ -279,7 +291,7 @@ where (outcome.reward_for(self.agent_player), outcome.is_terminal()) }; - let spec = self.session.game().compact_spec(); + let spec = self.session.compact_spec(); let encoded_reward = spec .try_encode_reward(reward) .map_err(|reason| match reason { @@ -304,6 +316,8 @@ where /// Encodes current observation into a bounded compact packet. pub fn encode_current_observation(&self) -> Result, EnvError> { + self.validate_observer()?; + let mut encoded = G::WordBuf::default(); self.session .game() @@ -315,7 +329,6 @@ where }); } self.session - .game() .compact_spec() .validate_observation_words(encoded.as_slice()) .map_err(|reason| EnvError::InvalidObservationEncoding { reason })?; @@ -385,6 +398,9 @@ mod regression_tests { #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] struct BadRewardGame; + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct ParamRewardGame; + impl Game for DemoGame { type Params = u8; type State = DemoState; @@ -698,6 +714,126 @@ mod regression_tests { } } + impl Game for ParamRewardGame { + type Params = u8; + type State = u8; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + 0 + } + + fn name(&self) -> &'static str { + "param-reward" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + *params + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + *state + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + *state + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: i64::from(*state), + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + let max_reward = i64::from(*params); + let reward_bits = if max_reward == 0 { + 1 + } else { + (u64::BITS - (max_reward as u64).leading_zeros()) as u8 + }; + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits, + min_reward: 0, + max_reward, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + #[test] fn step_uses_agent_player_reward() { let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); @@ -752,6 +888,29 @@ mod regression_tests { Err(EnvError::InvalidRewardEncoding { .. }) )); } + + #[test] + fn observation_rejects_out_of_range_player_observer() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.set_observer(Observer::Player(7)); + assert_eq!( + env.encode_current_observation(), + Err(EnvError::InvalidAgentPlayer { + player: 7, + player_count: 2, + }) + ); + } + + #[test] + fn reward_encoding_uses_active_session_params() { + let mut env = + DefaultEnvironment::::new(ParamRewardGame, 1, Observer::Player(0)); + env.reset_with_params(1, 5).unwrap(); + let step = env.step(0).unwrap(); + assert_eq!(step.reward.raw, 5); + assert_eq!(step.reward.encoded, 5); + } } #[cfg(kani)] diff --git a/src/core/single_player.rs b/src/core/single_player.rs index 005696a..addab96 100644 --- a/src/core/single_player.rs +++ b/src/core/single_player.rs @@ -122,6 +122,11 @@ pub trait SinglePlayerGame { } } + /// Compact codec descriptor for an explicit parameter bundle. + fn compact_spec_for_params(&self, _params: &Self::Params) -> CompactSpec { + self.compact_spec() + } + /// Encode an action into compact integer representation. fn encode_action(&self, _action: &Self::Action) -> u64 { 0 @@ -268,6 +273,10 @@ where ::compact_spec(self) } + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + ::compact_spec_for_params(self, params) + } + fn encode_action(&self, action: &Self::Action) -> u64 { ::encode_action(self, action) } diff --git a/src/game.rs b/src/game.rs index d11db29..f3eff1a 100644 --- a/src/game.rs +++ b/src/game.rs @@ -91,6 +91,11 @@ pub trait Game { } } + /// Compact codec descriptor for an explicit parameter bundle. + fn compact_spec_for_params(&self, _params: &Self::Params) -> CompactSpec { + self.compact_spec() + } + /// Encode an action into its compact integer representation. fn encode_action(&self, _action: &Self::Action) -> u64 { 0 diff --git a/src/session.rs b/src/session.rs index c8ba777..64cf2bf 100644 --- a/src/session.rs +++ b/src/session.rs @@ -443,6 +443,11 @@ impl> SessionKernel { self.rng } + /// Returns the active compact codec descriptor for current params. + pub fn compact_spec(&self) -> crate::compact::CompactSpec { + self.game.compact_spec_for_params(&self.params) + } + /// Returns immutable trace view. pub fn trace(&self) -> &H::Trace { self.history.trace() diff --git a/tests/validation.rs b/tests/validation.rs index fab4b5a..9764848 100644 --- a/tests/validation.rs +++ b/tests/validation.rs @@ -368,23 +368,23 @@ fn golden_compact_traces_match_expected_values() { assert_eq!( compact, vec![ - vec![2017], - vec![2001], - vec![1986], - vec![1987], - vec![1939], - vec![1924], - vec![1925], - vec![1813], - vec![1798], - vec![1799], - vec![1559], - vec![1544], - vec![1545], - vec![1049], - vec![1034], - vec![1035], - vec![2075], + vec![4128769], + vec![4063489], + vec![4063234], + vec![4063235], + vec![3932419], + vec![3932164], + vec![3932165], + vec![3670277], + vec![3670022], + vec![3670023], + vec![3145991], + vec![3145736], + vec![3145737], + vec![2097417], + vec![2097162], + vec![2097163], + vec![4194571], ] ); assert_eq!(trace_hash, 0x1ee7_fb2e_3689_eabf); From 4fdc082d91664633b25e3945a311a0b99a77b263 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 22:06:50 -0400 Subject: [PATCH 10/12] Improve architecture and proofs --- Cargo.toml | 1 - README.md | 27 ++- proofs/README.md | 21 +- proofs/claim.md | 55 +++-- proofs/future_game_template.md | 52 +++-- proofs/manifest.txt | 55 +++++ proofs/verus/liveness_model.rs | 64 ++++++ scripts/render-proof-claim.sh | 65 ++++++ scripts/run-perf.sh | 2 +- scripts/run-verification.sh | 73 ++----- scripts/run-verus.sh | 10 +- src/builtin/tictactoe/mod.rs | 218 ++++++++++++++++++- src/builtin/tictactoe/proofs.rs | 35 ++++ src/lib.rs | 1 - src/proof/liveness.rs | 89 ++++++++ src/proof/macros.rs | 62 ++++++ src/proof/manifest.rs | 358 ++++++++++++++++++++++++++++++++ src/proof/mod.rs | 92 +++++++- src/proof/model.rs | 116 +++++++++++ src/proof/refinement.rs | 169 +++++++++++++++ 20 files changed, 1427 insertions(+), 138 deletions(-) create mode 100644 proofs/manifest.txt create mode 100644 proofs/verus/liveness_model.rs create mode 100644 scripts/render-proof-claim.sh create mode 100644 src/proof/liveness.rs create mode 100644 src/proof/macros.rs create mode 100644 src/proof/manifest.rs create mode 100644 src/proof/model.rs create mode 100644 src/proof/refinement.rs diff --git a/Cargo.toml b/Cargo.toml index 77c5976..6e48f30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } [features] default = [] -proof = [] physics = [] builtin = [] cli = ["builtin"] diff --git a/README.md b/README.md index 2d21ef8..aba0dfa 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,24 @@ Current proof surface includes: - rollback/replay restoration properties, - builtin game invariants in the harness matrix, - engine-owned 2D physics invariants, -- Verus replay/observation refinement models. +- manifest-driven Kani/Verus proof registration, +- executable model/refinement scaffolding for verified games, +- Verus replay/observation/liveness models. + +The machine-readable proof boundary lives in [`proofs/manifest.txt`](proofs/manifest.txt). +Claims are intentionally split by status: + +- `refined`: backed by Verus model laws and Kani refinement checks, +- `checked`: bounded Kani proofs over the Rust implementation, +- `model`: Verus-only model claims, +- `runtime`: tested/benchmarked behavior, +- `out_of_scope`: explicitly outside the formal boundary. + +Games only opt into the stronger surface explicitly: + +- implement `proof::ModelGame` and `proof::RefinementWitness`, +- add an explicit `impl proof::VerifiedGame for MyGame {}`, +- register the claim and harness ids in `proofs/manifest.txt`. Render/runtime behavior is validated by tests and benchmarks; the GPU/driver stack is intentionally outside full formal proof scope. @@ -107,12 +124,16 @@ Pin and auto-fetch the CI Verus binary: AUTO_FETCH_VERUS=1 REQUIRE_VERUS=1 bash scripts/run-verus.sh ``` +Render the human-readable claim matrix from the manifest: + +```bash +bash scripts/render-proof-claim.sh +``` + ## Feature Graph - `default = []` - minimal headless kernel -- `proof` - - proof helper exports - `physics` - engine-owned deterministic 2D physics - `builtin` diff --git a/proofs/README.md b/proofs/README.md index 100f1a4..2c4f372 100644 --- a/proofs/README.md +++ b/proofs/README.md @@ -38,7 +38,8 @@ exploring an unbounded rejection loop. ## What Is Verified -See [`proofs/claim.md`](claim.md) for a precise verified vs tested vs out-of-scope matrix. +See [`proofs/manifest.txt`](manifest.txt) for the machine-readable proof boundary and +[`proofs/claim.md`](claim.md) for the rendered human-readable matrix. - Fixed-capacity buffer behavior in [`src/buffer.rs`](../src/buffer.rs) - Reward and replay encoding primitives in [`src/types.rs`](../src/types.rs) @@ -49,7 +50,8 @@ See [`proofs/claim.md`](claim.md) for a precise verified vs tested vs out-of-sco - Physics invariants for the engine-owned 2D world and the platformer environment when `builtin` and `physics` are enabled - Verus model lemmas in [`proofs/verus/session_refinement.rs`](verus/session_refinement.rs) - for replay fold refinement and canonical observation-schema constraints + and [`proofs/verus/liveness_model.rs`](verus/liveness_model.rs) + for replay fold refinement, canonical observation-schema constraints, and liveness scaffolding - Render/input/runtime behavior is covered by tests and benchmarks; it is not currently claimed as fully formally verified @@ -63,12 +65,15 @@ See [`proofs/claim.md`](claim.md) for a precise verified vs tested vs out-of-sco - `world_view_invariant` - `transition_postcondition` 2. Add runtime tests for determinism, replay, compact codecs, and rollback if the game uses sessions. -3. Add `#[cfg(kani)]` proof harnesses in the game module. -4. Call the shared helpers in [`src/verification.rs`](../src/verification.rs) for transition and observation contracts. -5. If the game exposes a compact codec, prove action round-trips and reward range correctness. -6. If the game uses the `physics` feature, prove the world invariant before and after every step. -7. If the game is a first-party reference environment, gate it behind `builtin` and add its - harnesses to [`scripts/run-verification.sh`](../scripts/run-verification.sh). +3. Implement the proof-layer traits in [`src/proof/model.rs`](../src/proof/model.rs) when the + game opts into executable model/refinement checks. + Add an explicit `impl proof::VerifiedGame for MyGame {}` only after the stronger surface is intentional. +4. Add `#[cfg(kani)]` proof harnesses in the game module, preferably through the proof macros. +5. Call the shared helpers in [`src/verification.rs`](../src/verification.rs) for transition and observation contracts. +6. If the game exposes a compact codec, prove action round-trips and reward range correctness. +7. If the game uses the `physics` feature, prove the world invariant before and after every step. +8. If the game is a first-party reference environment, register its claims and harnesses in + [`proofs/manifest.txt`](manifest.txt) so the verification scripts and claim docs stay aligned. ## Acceptance Rule diff --git a/proofs/claim.md b/proofs/claim.md index 8d90921..fe573a7 100644 --- a/proofs/claim.md +++ b/proofs/claim.md @@ -1,41 +1,40 @@ # Proof Claim Matrix -This document states what `gameengine` currently claims as formally verified, what is tested, -and what is intentionally outside full proof scope. +This document is derived from `proofs/manifest.txt` and states the current proof boundary. -## Formally Verified (Kani + Verus Surfaces) +## Verified Boundary -- Fixed-capacity containers and bit-word primitives. -- Compact reward codec round-trips and range soundness. -- Compact observation word-shape and reward bit-width constraint enforcement. -- Deterministic RNG construction and replay properties. -- Replay rewind restoration for bounded history configurations. -- Verus replay refinement lemmas and canonical observation-schema model constraints. -- Builtin game invariants included in harness matrix. -- Physics invariants and platformer synchronization harnesses for `builtin + physics`. +- kernel+builtins -## Verified By Runtime Tests + Property Tests +## Refined Claims -- Seeded determinism and replay equivalence in integration tests. -- Compact traces and stable hashes for golden trajectories. -- Allocation-free stepping on core builtin hot paths. -- Render presenter scene emission and driver progression behavior. +- `builtin.tictactoe`: TicTacToe now has an executable model/refinement surface tying runtime init, step, replay, and liveness scaffolding to the proof framework. (proof ids: `ttt_model_init_refines_runtime`, `ttt_model_step_refines_runtime`, `ttt_model_replay_refines_runtime`, `ranked_progress_holds_for_opening_move`, `probabilistic_support_is_finite_and_nonempty`, `session_refinement`, `liveness_model`) -## In Scope But Not Fully Formalized Yet +## Implementation-Checked Claims -- Registry-level descriptor integrity and dispatch consistency. -- Higher-level CLI orchestration and policy script UX behavior. -- Richer progress/liveness obligations beyond bounded checks. +- `engine.buffer`: Fixed-capacity vectors preserve prefix order and bit-word toggling remains sound. (proof ids: `fixed_vec_push_preserves_prefix_order`, `bit_words_round_trip`) +- `engine.compact`: Compact reward round-trips and schema/bit-width enforcement hold for the implementation helpers. (proof ids: `compact_reward_round_trip`, `compact_observation_words_match_schema`, `compact_reward_bit_width_is_enforced`) +- `engine.rng`: Reference RNG constructor and replay properties hold for the Rust implementation on the verified cases. (proof ids: `rng_state_sanitization_is_total`, `seeded_stream_constructor_handles_reference_cases`, `next_u64_is_repeatable_for_reference_states`) +- `engine.session`: Bounded rewind restoration and replay storage helpers hold for the Rust implementation. (proof ids: `replay_trace_records_steps`, `rewind_restores_prior_state`) +- `engine.env`: The compact environment rejects invalid observation/reward encodings instead of silently accepting them. (proof ids: `env_rejects_invalid_observation_words`, `env_rejects_reward_encoding_that_exceeds_bit_width`) +- `builtin.blackjack`: Blackjack maintains the existing bounded seeded safety/protocol proof surface. (proof ids: `concrete_seed_shuffle_is_a_full_permutation`, `player_observation_hides_opponent_hand_before_terminal`, `initial_observation_contracts_hold_for_concrete_seed`, `stand_action_replays_deterministically_for_seed_17`, `hand_evaluation_matches_busted_flag`) +- `builtin.platformer`: Platformer maintains the existing bounded default-config physics and safety proof surface. (proof ids: `wall_clamps_hold_for_all_edge_positions`, `jump_reward_is_bounded`, `initial_observation_and_world_contracts_hold`, `berry_mask_tracks_trigger_activation`, `clamping_keeps_body_in_bounds`, `oracle_view_matches_world_storage`) -## Out of Full Formal Scope +## Model-Only Claims -- GPU/driver execution details (`wgpu`, OS windowing, platform graphics stack). -- Host runtime behavior outside deterministic kernel contract. +- `engine.replay-laws`: Replay and canonical observation schema laws are proved at the Verus model level. (proof ids: `session_refinement`) +- `engine.liveness-laws`: Ranking-based termination and finite-support stochastic scaffolding are specified at the Verus model level. (proof ids: `liveness_model`) -## Execution Entry Point +## Runtime-Tested Claims -Run the consolidated verification surface with: +- `render.runtime`: Render/runtime behavior remains tested and benchmarked rather than formally proved. -```bash -bash scripts/run-verification.sh -``` +## Out Of Scope + +- `gpu.os`: GPU, OS windowing, and host graphics stacks remain outside the formal proof boundary. + +## Assumptions + +- `builtin.blackjack`: Current bounded blackjack proofs are tied to concrete seeds and representative hands; they are not universal over all shuffled decks. +- `builtin.platformer`: Current bounded platformer proofs cover the default-config safety surface; full refinement proofs for parameterized physics games remain future work. +- `builtin.tictactoe`: The new liveness claims are about ranking/probabilistic scaffolding on representative traces, not an end-to-end universal fairness proof. diff --git a/proofs/future_game_template.md b/proofs/future_game_template.md index fc76e4f..5dff107 100644 --- a/proofs/future_game_template.md +++ b/proofs/future_game_template.md @@ -27,6 +27,18 @@ Implement and document: For single-player games, prefer implementing `core::single_player::SinglePlayerGame` and let the engine provide the `Game` adapter wiring. +## Proof-Layer Checklist + +If the game should participate in the stronger verified surface, also implement: + +- `proof::ModelGame` +- `proof::RefinementWitness` +- `proof::VerifiedGame` +- `proof::TerminationWitness` when a ranking argument exists +- `proof::ProbabilisticWitness` when the game has finite-support stochastic choices + +Register all Kani and Verus links in `proofs/manifest.txt`. + ## Kani Harness Skeleton ```rust @@ -36,30 +48,28 @@ mod proofs { use crate::buffer::FixedVec; use crate::types::PlayerAction; - #[kani::proof] - fn transition_contract_holds_for_representative_step() { - let game = MyGame::default(); - let state = game.init_with_params(1, &game.default_params()); - let mut actions = FixedVec::, 1>::default(); - actions.push(PlayerAction { player: 0, action: MyAction::Default }).unwrap(); - crate::verification::assert_transition_contracts(&game, &state, &actions, 1); - } - - #[kani::proof] - fn observation_contract_holds_for_initial_state() { - let game = MyGame::default(); - let state = game.init_with_params(1, &game.default_params()); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - fn compact_round_trip_holds() { - let game = MyGame::default(); - crate::verification::assert_compact_roundtrip(&game, &MyAction::Default); - } + crate::declare_refinement_harnesses!( + game = MyGame::default(), + params = MyGame::default().default_params(), + seed = 1, + actions = { + let mut actions = FixedVec::, 1>::default(); + actions.push(PlayerAction { player: 0, action: MyAction::Default }).unwrap(); + actions + }, + init = mygame_init_refines_runtime, + step = mygame_step_refines_runtime, + replay = mygame_replay_refines_runtime, + ); } ``` +Outside the proof module, add: + +```rust +impl crate::proof::VerifiedGame for MyGame {} +``` + If your game uses shuffle-heavy setup or rejection-sampled RNG, keep Kani harness seeds concrete unless you have a separately bounded proof wrapper for that RNG path. ## Extra Checks For Physics Games diff --git a/proofs/manifest.txt b/proofs/manifest.txt new file mode 100644 index 0000000..0ea31c4 --- /dev/null +++ b/proofs/manifest.txt @@ -0,0 +1,55 @@ +# kind|... +boundary|kernel+builtins + +kani|bit_words_round_trip|default|bit_words_round_trip +kani|fixed_vec_push_preserves_prefix_order|default|fixed_vec_push_preserves_prefix_order +kani|compact_reward_round_trip|default|compact_reward_round_trip +kani|compact_observation_words_match_schema|default|compact_observation_words_match_schema +kani|compact_reward_bit_width_is_enforced|default|compact_reward_bit_width_is_enforced +kani|step_outcome_reward_lookup_defaults_to_zero|default|step_outcome_reward_lookup_defaults_to_zero +kani|env_rejects_invalid_observation_words|default|env_rejects_invalid_observation_words +kani|env_rejects_reward_encoding_that_exceeds_bit_width|default|env_rejects_reward_encoding_that_exceeds_bit_width +kani|replay_trace_records_steps|default|replay_trace_records_steps +kani|rng_state_sanitization_is_total|default|rng_state_sanitization_is_total +kani|seeded_stream_constructor_handles_reference_cases|default|seeded_stream_constructor_handles_reference_cases +kani|next_u64_is_repeatable_for_reference_states|default|next_u64_is_repeatable_for_reference_states +kani|rewind_restores_prior_state|default|rewind_restores_prior_state +kani|concrete_seed_shuffle_is_a_full_permutation|builtin|concrete_seed_shuffle_is_a_full_permutation +kani|player_observation_hides_opponent_hand_before_terminal|builtin|player_observation_hides_opponent_hand_before_terminal +kani|initial_observation_contracts_hold_for_concrete_seed|builtin|initial_observation_contracts_hold_for_concrete_seed +kani|stand_action_replays_deterministically_for_seed_17|builtin|stand_action_replays_deterministically_for_seed_17 +kani|hand_evaluation_matches_busted_flag|builtin|hand_evaluation_matches_busted_flag +kani|legal_actions_are_exactly_empty_cells|builtin|legal_actions_are_exactly_empty_cells +kani|invalid_move_never_mutates_board|builtin|invalid_move_never_mutates_board +kani|ttt_model_init_refines_runtime|builtin|ttt_model_init_refines_runtime +kani|ttt_model_step_refines_runtime|builtin|ttt_model_step_refines_runtime +kani|ttt_model_replay_refines_runtime|builtin|ttt_model_replay_refines_runtime +kani|ranked_progress_holds_for_opening_move|builtin|ranked_progress_holds_for_opening_move +kani|probabilistic_support_is_finite_and_nonempty|builtin|probabilistic_support_is_finite_and_nonempty +kani|clamping_keeps_body_in_bounds|builtin+physics|clamping_keeps_body_in_bounds +kani|oracle_view_matches_world_storage|builtin+physics|oracle_view_matches_world_storage +kani|wall_clamps_hold_for_all_edge_positions|builtin+physics|wall_clamps_hold_for_all_edge_positions +kani|jump_reward_is_bounded|builtin+physics|jump_reward_is_bounded +kani|initial_observation_and_world_contracts_hold|builtin+physics|initial_observation_and_world_contracts_hold +kani|berry_mask_tracks_trigger_activation|builtin+physics|berry_mask_tracks_trigger_activation + +verus|core_model|proofs/verus/core_model.rs +verus|session_refinement|proofs/verus/session_refinement.rs +verus|liveness_model|proofs/verus/liveness_model.rs + +claim|checked|engine.buffer|Fixed-capacity vectors preserve prefix order and bit-word toggling remains sound.|fixed_vec_push_preserves_prefix_order,bit_words_round_trip +claim|checked|engine.compact|Compact reward round-trips and schema/bit-width enforcement hold for the implementation helpers.|compact_reward_round_trip,compact_observation_words_match_schema,compact_reward_bit_width_is_enforced +claim|checked|engine.rng|Reference RNG constructor and replay properties hold for the Rust implementation on the verified cases.|rng_state_sanitization_is_total,seeded_stream_constructor_handles_reference_cases,next_u64_is_repeatable_for_reference_states +claim|checked|engine.session|Bounded rewind restoration and replay storage helpers hold for the Rust implementation.|replay_trace_records_steps,rewind_restores_prior_state +claim|checked|engine.env|The compact environment rejects invalid observation/reward encodings instead of silently accepting them.|env_rejects_invalid_observation_words,env_rejects_reward_encoding_that_exceeds_bit_width +claim|model|engine.replay-laws|Replay and canonical observation schema laws are proved at the Verus model level.|session_refinement +claim|model|engine.liveness-laws|Ranking-based termination and finite-support stochastic scaffolding are specified at the Verus model level.|liveness_model +claim|refined|builtin.tictactoe|TicTacToe now has an executable model/refinement surface tying runtime init, step, replay, and liveness scaffolding to the proof framework.|ttt_model_init_refines_runtime,ttt_model_step_refines_runtime,ttt_model_replay_refines_runtime,ranked_progress_holds_for_opening_move,probabilistic_support_is_finite_and_nonempty,session_refinement,liveness_model +claim|checked|builtin.blackjack|Blackjack maintains the existing bounded seeded safety/protocol proof surface.|concrete_seed_shuffle_is_a_full_permutation,player_observation_hides_opponent_hand_before_terminal,initial_observation_contracts_hold_for_concrete_seed,stand_action_replays_deterministically_for_seed_17,hand_evaluation_matches_busted_flag +claim|checked|builtin.platformer|Platformer maintains the existing bounded default-config physics and safety proof surface.|wall_clamps_hold_for_all_edge_positions,jump_reward_is_bounded,initial_observation_and_world_contracts_hold,berry_mask_tracks_trigger_activation,clamping_keeps_body_in_bounds,oracle_view_matches_world_storage +claim|runtime|render.runtime|Render/runtime behavior remains tested and benchmarked rather than formally proved.| +claim|out_of_scope|gpu.os|GPU, OS windowing, and host graphics stacks remain outside the formal proof boundary.| + +assumption|builtin.blackjack|Current bounded blackjack proofs are tied to concrete seeds and representative hands; they are not universal over all shuffled decks. +assumption|builtin.platformer|Current bounded platformer proofs cover the default-config safety surface; full refinement proofs for parameterized physics games remain future work. +assumption|builtin.tictactoe|The new liveness claims are about ranking/probabilistic scaffolding on representative traces, not an end-to-end universal fairness proof. diff --git a/proofs/verus/liveness_model.rs b/proofs/verus/liveness_model.rs new file mode 100644 index 0000000..589790d --- /dev/null +++ b/proofs/verus/liveness_model.rs @@ -0,0 +1,64 @@ +use vstd::prelude::*; + +verus! { + +pub trait RankedTransitionModel { + type State; + type Action; + + spec fn step(state: Self::State, action: Self::Action) -> Self::State; + spec fn terminal(state: Self::State) -> bool; + spec fn rank(state: Self::State) -> nat; + + proof fn terminal_rank_axiom(state: Self::State) + ensures + Self::terminal(state) <==> Self::rank(state) == 0; + + proof fn rank_decreases_axiom(state: Self::State, action: Self::Action) + requires + !Self::terminal(state) + ensures + Self::terminal(Self::step(state, action)) + || Self::rank(Self::step(state, action)) < Self::rank(state); +} + +pub proof fn ranked_progress_is_well_founded( + state: M::State, + action: M::Action, +) + requires + !M::terminal(state) + ensures + M::terminal(M::step(state, action)) || M::rank(M::step(state, action)) < M::rank(state), +{ + M::rank_decreases_axiom(state, action); +} + +pub trait FiniteSupportModel { + type State; + type Action; + + spec fn support(state: Self::State, action: Self::Action) -> Seq<(nat, Self::State)>; + + proof fn support_nonempty_axiom(state: Self::State, action: Self::Action) + ensures + Self::support(state, action).len() > 0; + + proof fn support_positive_weights_axiom(state: Self::State, action: Self::Action, index: int) + requires + 0 <= index < Self::support(state, action).len() + ensures + Self::support(state, action)[index].0 > 0; +} + +pub proof fn finite_support_has_positive_mass( + state: M::State, + action: M::Action, +) + ensures + M::support(state, action).len() > 0, +{ + M::support_nonempty_axiom(state, action); +} + +} // verus! diff --git a/scripts/render-proof-claim.sh b/scripts/render-proof-claim.sh new file mode 100644 index 0000000..f526e23 --- /dev/null +++ b/scripts/render-proof-claim.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" +OUTPUT_FILE="${1:-${ROOT_DIR}/proofs/claim.md}" + +heading_for_status() { + case "$1" in + refined) echo "Refined Claims" ;; + checked) echo "Implementation-Checked Claims" ;; + model) echo "Model-Only Claims" ;; + runtime) echo "Runtime-Tested Claims" ;; + out_of_scope) echo "Out Of Scope" ;; + *) return 1 ;; + esac +} + +{ + echo "# Proof Claim Matrix" + echo + echo "This document is derived from \`proofs/manifest.txt\` and states the current proof boundary." + echo + echo "## Verified Boundary" + echo + awk -F'|' '$1 == "boundary" { printf("- %s\n", $2) }' "$MANIFEST_FILE" + + for status in refined checked model runtime out_of_scope; do + section="$(heading_for_status "$status")" + entries="$( + awk -F'|' -v status="$status" ' + $1 == "claim" && $2 == status { + printf("- `%s`: %s", $3, $4) + if (NF >= 5 && length($5) > 0) { + printf(" (proof ids: ") + n = split($5, links, ",") + for (i = 1; i <= n; i++) { + gsub(/^ +| +$/, "", links[i]) + if (i > 1) { + printf(", ") + } + printf("`%s`", links[i]) + } + printf(")") + } + printf("\n") + } + ' "$MANIFEST_FILE" + )" + if [[ -n "$entries" ]]; then + echo + echo "## ${section}" + echo + printf '%s\n' "$entries" + fi + done + + assumptions="$(awk -F'|' '$1 == "assumption" { printf("- `%s`: %s\n", $2, $3) }' "$MANIFEST_FILE")" + if [[ -n "$assumptions" ]]; then + echo + echo "## Assumptions" + echo + printf '%s\n' "$assumptions" + fi +} > "$OUTPUT_FILE" diff --git a/scripts/run-perf.sh b/scripts/run-perf.sh index efb2cbe..5c5b3e4 100755 --- a/scripts/run-perf.sh +++ b/scripts/run-perf.sh @@ -9,7 +9,7 @@ ITERATIONS="${2:-2000000}" FEATURES="${FEATURES:-builtin physics}" DATA_FILE="${PERF_DATA_FILE:-/var/tmp/gameengine-perf.data}" -export TMPDIR="${TMPDIR:-/var/tmp}" +export TMPDIR="${TMPDIR:-/tmp}" if ! command -v perf >/dev/null 2>&1; then echo "perf is not installed" diff --git a/scripts/run-verification.sh b/scripts/run-verification.sh index 8bfeed4..271f304 100755 --- a/scripts/run-verification.sh +++ b/scripts/run-verification.sh @@ -4,64 +4,22 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT_DIR" -export TMPDIR="${TMPDIR:-/var/tmp}" +export TMPDIR="${TMPDIR:-/tmp}" MODE="${VERIFICATION_MODE:-full}" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" -COMMON_HARNESSES=( - bit_words_round_trip - fixed_vec_push_preserves_prefix_order - compact_reward_round_trip - compact_observation_words_match_schema - compact_reward_bit_width_is_enforced - step_outcome_reward_lookup_defaults_to_zero - env_rejects_invalid_observation_words - env_rejects_reward_encoding_that_exceeds_bit_width - replay_trace_records_steps - rng_state_sanitization_is_total - seeded_stream_constructor_handles_reference_cases - next_u64_is_repeatable_for_reference_states - rewind_restores_prior_state -) - -BUILTIN_GAME_HARNESSES=( - concrete_seed_shuffle_is_a_full_permutation - player_observation_hides_opponent_hand_before_terminal - initial_observation_contracts_hold_for_concrete_seed - stand_action_replays_deterministically_for_seed_17 - hand_evaluation_matches_busted_flag - legal_actions_are_exactly_empty_cells - invalid_move_never_mutates_board -) - -PHYSICS_HARNESSES=( - clamping_keeps_body_in_bounds - oracle_view_matches_world_storage - wall_clamps_hold_for_all_edge_positions - jump_reward_is_bounded - initial_observation_and_world_contracts_hold - berry_mask_tracks_trigger_activation -) - -run_kani_harnesses() { - local label="$1" - shift - local -a extra_args=("$@") - - for harness in "${COMMON_HARNESSES[@]}"; do - echo "[kani] Running ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done -} - -run_builtin_kani_harnesses() { - local label="$1" +run_kani_scope() { + local scope="$1" shift local -a extra_args=("$@") - for harness in "${BUILTIN_GAME_HARNESSES[@]}"; do - echo "[kani] Running ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done + while IFS='|' read -r kind id harness_scope target; do + [[ -z "${kind:-}" || "${kind:0:1}" == "#" ]] && continue + if [[ "$kind" == "kani" && "$harness_scope" == "$scope" ]]; then + echo "[kani] Running ${scope} harness: ${id}" + cargo kani --lib "${extra_args[@]}" --harness "${target}" + fi + done < "$MANIFEST_FILE" } run_kani_matrix() { @@ -71,16 +29,13 @@ run_kani_matrix() { fi echo "[kani] default headless kernel" - run_kani_harnesses "default" + run_kani_scope "default" echo "[kani] builtin reference games" - run_builtin_kani_harnesses "builtin" --features builtin + run_kani_scope "builtin" --features builtin echo "[kani] builtin + physics games" - for harness in "${PHYSICS_HARNESSES[@]}"; do - echo "[kani] Running builtin+physics harness: ${harness}" - cargo kani --lib --features "builtin physics" --harness "${harness}" - done + run_kani_scope "builtin+physics" --features "builtin physics" } if [[ "$MODE" != "kani-only" ]]; then diff --git a/scripts/run-verus.sh b/scripts/run-verus.sh index c8c812d..529f34c 100755 --- a/scripts/run-verus.sh +++ b/scripts/run-verus.sh @@ -3,6 +3,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT_DIR" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" REQUIRE_VERUS="${REQUIRE_VERUS:-0}" AUTO_FETCH_VERUS="${AUTO_FETCH_VERUS:-0}" @@ -76,7 +77,14 @@ if [[ -z "${VERUS_BIN_PATH:-}" ]]; then exit 0 fi -mapfile -t verus_models < <(find proofs/verus -type f -name '*.rs' | sort) +mapfile -t verus_models < <( + while IFS='|' read -r kind _id path; do + [[ -z "${kind:-}" || "${kind:0:1}" == "#" ]] && continue + if [[ "$kind" == "verus" ]]; then + printf '%s\n' "$path" + fi + done < "$MANIFEST_FILE" | sort +) if [[ ${#verus_models[@]} -eq 0 ]]; then echo "[verus] no Verus model files found under proofs/verus" diff --git a/src/builtin/tictactoe/mod.rs b/src/builtin/tictactoe/mod.rs index d1e28c1..bd8a565 100644 --- a/src/builtin/tictactoe/mod.rs +++ b/src/builtin/tictactoe/mod.rs @@ -3,6 +3,10 @@ use crate::buffer::FixedVec; use crate::compact::CompactSpec; use crate::core::single_player::{self, SinglePlayerRewardBuf}; +use crate::proof::{ + FairnessWitness, FiniteSupportOutcome, ModelGame, ProbabilisticWitness, RefinementWitness, + TerminationWitness, VerifiedGame, +}; use crate::rng::DeterministicRng; use crate::types::{PlayerId, Seed, StepOutcome, Termination}; use crate::verification::reward_and_terminal_postcondition; @@ -137,6 +141,22 @@ impl TicTacToe { } } + fn model_step( + state: &mut TicTacToeState, + action: Option, + rng: &mut DeterministicRng, + ) -> i64 { + if state.terminal { + return 0; + } + match Self::decode_action_index(action) { + Some(index) if Self::action_is_legal(state, index) => { + Self::resolve_turn(state, index, rng) + } + _ => -3, + } + } + fn resolve_turn( state: &mut TicTacToeState, action_index: usize, @@ -179,6 +199,35 @@ impl TicTacToe { } packed } + + fn empty_cell_count(state: &TicTacToeState) -> u64 { + let mut empty = 0u64; + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + empty += 1; + } + index += 1; + } + empty + } + + fn push_support_outcome( + out: &mut FixedVec, 9>, + state: TicTacToeState, + reward: i64, + weight: u64, + ) { + let mut rewards = SinglePlayerRewardBuf::default(); + single_player::push_reward(&mut rewards, reward); + out.push(FiniteSupportOutcome { + termination: Self::termination_from_state(&state), + state, + rewards, + weight, + }) + .unwrap(); + } } impl single_player::SinglePlayerGame for TicTacToe { @@ -231,16 +280,7 @@ impl single_player::SinglePlayerGame for TicTacToe { rng: &mut DeterministicRng, out: &mut StepOutcome, ) { - let reward = if state.terminal { - 0 - } else { - match Self::decode_action_index(action) { - Some(index) if Self::action_is_legal(state, index) => { - Self::resolve_turn(state, index, rng) - } - _ => -3, - } - }; + let reward = Self::model_step(state, action, rng); single_player::push_reward(&mut out.rewards, reward); out.termination = Self::termination_from_state(state); @@ -302,6 +342,164 @@ impl single_player::SinglePlayerGame for TicTacToe { } } +impl ModelGame for TicTacToe { + type ModelState = TicTacToeState; + type ModelObs = TicTacToeObservation; + type ModelWorldView = TicTacToeWorldView; + + fn model_init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::ModelState { + TicTacToeState::default() + } + + fn model_is_terminal(&self, state: &Self::ModelState) -> bool { + state.terminal + } + + fn model_players_to_act(&self, state: &Self::ModelState, out: &mut Self::PlayerBuf) { + out.clear(); + if !state.terminal { + out.push(0).unwrap(); + } + } + + fn model_legal_actions( + &self, + state: &Self::ModelState, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + if state.terminal { + return; + } + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + out.push(TicTacToeAction(index as u8)).unwrap(); + } + index += 1; + } + } + + fn model_observe_player(&self, state: &Self::ModelState, _player: PlayerId) -> Self::ModelObs { + *state + } + + fn model_observe_spectator(&self, state: &Self::ModelState) -> Self::ModelObs { + *state + } + + fn model_world_view(&self, state: &Self::ModelState) -> Self::ModelWorldView { + *state + } + + fn model_step_in_place( + &self, + state: &mut Self::ModelState, + actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + let action = actions + .as_slice() + .iter() + .find(|candidate| candidate.player == 0) + .map(|candidate| candidate.action); + let reward = Self::model_step(state, action, rng); + single_player::push_reward(&mut out.rewards, reward); + out.termination = Self::termination_from_state(state); + } +} + +impl RefinementWitness for TicTacToe { + fn runtime_state_to_model(&self, state: &Self::State) -> Self::ModelState { + *state + } + + fn runtime_observation_to_model(&self, observation: &Self::Obs) -> Self::ModelObs { + *observation + } + + fn runtime_world_view_to_model(&self, world: &Self::WorldView) -> Self::ModelWorldView { + *world + } +} + +impl VerifiedGame for TicTacToe {} + +impl TerminationWitness for TicTacToe { + fn model_rank(&self, state: &Self::ModelState) -> u64 { + if state.terminal { + 0 + } else { + Self::empty_cell_count(state) + } + } +} + +impl FairnessWitness for TicTacToe {} + +impl ProbabilisticWitness for TicTacToe { + type SupportBuf = FixedVec, 9>; + + fn model_step_support( + &self, + state: &Self::ModelState, + actions: &Self::JointActionBuf, + out: &mut Self::SupportBuf, + ) { + out.clear(); + let action = actions + .as_slice() + .iter() + .find(|candidate| candidate.player == 0) + .map(|candidate| candidate.action); + + if state.terminal { + Self::push_support_outcome(out, *state, 0, 1); + return; + } + + let Some(action_index) = Self::decode_action_index(action) else { + Self::push_support_outcome(out, *state, -3, 1); + return; + }; + if !Self::action_is_legal(state, action_index) { + Self::push_support_outcome(out, *state, -3, 1); + return; + } + + let mut player_state = *state; + if let Some(winner) = + Self::apply_mark(&mut player_state, action_index, TicTacToeCell::Player) + { + Self::push_support_outcome( + out, + player_state, + Self::reward_from_terminal_winner(winner), + 1, + ); + return; + } + + let mut index = 0usize; + while index < player_state.board.len() { + if player_state.board[index] == TicTacToeCell::Empty { + let mut branch = player_state; + let reward = if let Some(winner) = + Self::apply_mark(&mut branch, index, TicTacToeCell::Opponent) + { + Self::reward_from_terminal_winner(winner) + } else { + 0 + }; + Self::push_support_outcome(out, branch, reward, 1); + } + index += 1; + } + } +} + #[cfg(test)] mod tests; diff --git a/src/builtin/tictactoe/proofs.rs b/src/builtin/tictactoe/proofs.rs index 97dfb93..473da47 100644 --- a/src/builtin/tictactoe/proofs.rs +++ b/src/builtin/tictactoe/proofs.rs @@ -1,9 +1,32 @@ use super::{TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeState}; use crate::buffer::FixedVec; use crate::game::Game; +use crate::proof::{assert_finite_support_is_valid, assert_ranked_progress}; use crate::session::{FixedHistory, SessionKernel}; use crate::types::PlayerAction; +fn action(cell: u8) -> FixedVec, 1> { + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(cell), + }) + .unwrap(); + actions +} + +crate::declare_refinement_harnesses!( + game = TicTacToe, + params = (), + seed = 7, + actions = action(0), + trace = [action(0), action(0)], + init = ttt_model_init_refines_runtime, + step = ttt_model_step_refines_runtime, + replay = ttt_model_replay_refines_runtime, +); + #[kani::proof] #[kani::unwind(16)] fn legal_actions_are_exactly_empty_cells() { @@ -76,3 +99,15 @@ fn invalid_move_never_mutates_board() { session.step_with_joint_actions(&actions); assert_eq!(*session.state(), before); } + +#[kani::proof] +#[kani::unwind(16)] +fn ranked_progress_holds_for_opening_move() { + assert_ranked_progress(&TicTacToe, &TicTacToeState::default(), &action(0), 7); +} + +#[kani::proof] +#[kani::unwind(16)] +fn probabilistic_support_is_finite_and_nonempty() { + assert_finite_support_is_valid(&TicTacToe, &TicTacToeState::default(), &action(0)); +} diff --git a/src/lib.rs b/src/lib.rs index 0f9a0f1..025d114 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ //! Deterministic game engine core with compact codecs, verification hooks, and render adapters. pub mod core; -#[cfg(feature = "proof")] pub mod proof; #[cfg(feature = "builtin")] pub mod registry; diff --git a/src/proof/liveness.rs b/src/proof/liveness.rs new file mode 100644 index 0000000..6340898 --- /dev/null +++ b/src/proof/liveness.rs @@ -0,0 +1,89 @@ +//! Liveness-oriented proof scaffolding layered on top of executable model semantics. + +use core::fmt::Debug; + +use crate::buffer::Buffer; +use crate::proof::model::ModelGame; +use crate::rng::DeterministicRng; +use crate::types::{StepOutcome, Termination}; + +/// Ranking-function based termination witness over the executable model. +pub trait TerminationWitness: ModelGame { + fn model_rank(&self, state: &Self::ModelState) -> u64; + + fn terminal_rank_is_exact(&self, state: &Self::ModelState) -> bool { + self.model_is_terminal(state) == (self.model_rank(state) == 0) + } +} + +pub fn assert_ranked_progress( + game: &G, + pre: &G::ModelState, + actions: &G::JointActionBuf, + seed: u64, +) { + let mut post = pre.clone(); + let mut rng = DeterministicRng::from_seed_and_stream(seed, 777); + let mut outcome = StepOutcome::::default(); + let pre_rank = game.model_rank(pre); + game.model_step_in_place(&mut post, actions, &mut rng, &mut outcome); + + assert!(game.terminal_rank_is_exact(pre)); + assert!(game.terminal_rank_is_exact(&post)); + + if !game.model_is_terminal(pre) { + assert!(game.model_is_terminal(&post) || game.model_rank(&post) < pre_rank); + } else { + assert_eq!(game.model_rank(&post), 0); + assert!(outcome.termination.is_terminal()); + } +} + +/// Declarative fairness witness scaffold for future game-specific obligations. +pub trait FairnessWitness: ModelGame { + fn fairness_assumptions(&self) -> &'static [&'static str] { + &[] + } +} + +/// One weighted model outcome in a finite-support stochastic step. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] +pub struct FiniteSupportOutcome { + pub state: S, + pub rewards: R, + pub termination: Termination, + pub weight: u64, +} + +/// Finite-support stochastic witness scaffold for probabilistic liveness proofs. +pub trait ProbabilisticWitness: ModelGame { + type SupportBuf: Buffer> + + Clone + + Debug + + Default + + Eq + + PartialEq; + + fn model_step_support( + &self, + state: &Self::ModelState, + actions: &Self::JointActionBuf, + out: &mut Self::SupportBuf, + ); +} + +pub fn assert_finite_support_is_valid( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, +) { + let mut support = G::SupportBuf::default(); + game.model_step_support(state, actions, &mut support); + assert!(!support.as_slice().is_empty()); + let mut total_weight = 0u64; + for outcome in support.as_slice() { + assert!(outcome.weight > 0); + total_weight = total_weight.saturating_add(outcome.weight); + } + assert!(total_weight > 0); +} diff --git a/src/proof/macros.rs b/src/proof/macros.rs new file mode 100644 index 0000000..0aa152e --- /dev/null +++ b/src/proof/macros.rs @@ -0,0 +1,62 @@ +#[macro_export] +macro_rules! declare_refinement_harnesses { + ( + game = $game:expr, + params = $params:expr, + seed = $seed:expr, + actions = $actions:expr, + trace = $trace:expr, + init = $init_name:ident, + step = $step_name:ident, + replay = $replay_name:ident $(,)? + ) => { + #[kani::proof] + fn $init_name() { + let game = $game; + let params = $params; + $crate::proof::assert_model_init_refinement(&game, $seed, ¶ms); + let state = game.init_with_params($seed, ¶ms); + $crate::proof::assert_model_observation_refinement(&game, &state); + } + + #[kani::proof] + fn $step_name() { + let game = $game; + let params = $params; + let state = game.init_with_params($seed, ¶ms); + let actions = $actions; + $crate::proof::assert_model_step_refinement(&game, &state, &actions, $seed); + } + + #[kani::proof] + fn $replay_name() { + let game = $game; + let params = $params; + let trace = $trace; + $crate::proof::assert_model_replay_refinement(game, $seed, params, &trace); + } + }; + ( + game = $game:expr, + params = $params:expr, + seed = $seed:expr, + actions = $actions:expr, + init = $init_name:ident, + step = $step_name:ident, + replay = $replay_name:ident $(,)? + ) => { + $crate::declare_refinement_harnesses!( + game = $game, + params = $params, + seed = $seed, + actions = $actions, + trace = { + let actions = $actions; + [actions] + }, + init = $init_name, + step = $step_name, + replay = $replay_name, + ); + }; +} diff --git a/src/proof/manifest.rs b/src/proof/manifest.rs new file mode 100644 index 0000000..3d97a74 --- /dev/null +++ b/src/proof/manifest.rs @@ -0,0 +1,358 @@ +//! Parsed proof manifest and claim-status helpers. + +use std::sync::OnceLock; + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum ProofStatus { + /// Backed by bounded checks over the Rust implementation. + Checked, + /// Backed by an abstract Verus model only. + Model, + /// Backed by both Verus model lemmas and Kani implementation/refinement proofs. + Refined, + /// Backed by runtime tests and checks, not formal proofs. + Runtime, + /// Explicitly outside the formal proof boundary. + OutOfScope, +} + +impl ProofStatus { + fn parse(raw: &str) -> Option { + match raw { + "checked" => Some(Self::Checked), + "model" => Some(Self::Model), + "refined" => Some(Self::Refined), + "runtime" => Some(Self::Runtime), + "out_of_scope" => Some(Self::OutOfScope), + _ => None, + } + } + + pub fn heading(self) -> &'static str { + match self { + Self::Checked => "Implementation-Checked Claims", + Self::Model => "Model-Only Claims", + Self::Refined => "Refined Claims", + Self::Runtime => "Runtime-Tested Claims", + Self::OutOfScope => "Out Of Scope", + } + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum HarnessKind { + Kani, + Verus, +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestHarness { + pub kind: HarnessKind, + pub id: &'static str, + pub scope: &'static str, + pub target: &'static str, +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestClaim { + pub status: ProofStatus, + pub component: &'static str, + pub text: &'static str, + pub links: &'static [&'static str], +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestAssumption { + pub component: &'static str, + pub text: &'static str, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VerificationManifest { + boundary: &'static str, + harnesses: Vec, + claims: Vec, + assumptions: Vec, +} + +impl VerificationManifest { + pub fn current() -> &'static Self { + static MANIFEST: OnceLock = OnceLock::new(); + MANIFEST.get_or_init(|| { + let manifest = Self::parse(include_str!("../../proofs/manifest.txt")); + manifest.validate().expect("proof manifest is invalid"); + manifest + }) + } + + pub fn parse(raw: &'static str) -> Self { + let mut boundary = "kernel+builtins"; + let mut harnesses = Vec::new(); + let mut claims = Vec::new(); + let mut assumptions = Vec::new(); + + for line in raw.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + let parts: Vec<&'static str> = line.split('|').collect(); + match parts.as_slice() { + ["boundary", value] => boundary = value, + ["kani", id, scope, target] => harnesses.push(ManifestHarness { + kind: HarnessKind::Kani, + id, + scope, + target, + }), + ["verus", id, target] => harnesses.push(ManifestHarness { + kind: HarnessKind::Verus, + id, + scope: "global", + target, + }), + ["claim", status, component, text, links] => { + let status = + ProofStatus::parse(status).expect("proof manifest claim status is invalid"); + let links = parse_links(links); + claims.push(ManifestClaim { + status, + component, + text, + links, + }); + } + ["assumption", component, text] => { + assumptions.push(ManifestAssumption { component, text }) + } + _ => panic!("invalid proof manifest line: {line}"), + } + } + + Self { + boundary, + harnesses, + claims, + assumptions, + } + } + + pub fn boundary(&self) -> &'static str { + self.boundary + } + + pub fn harnesses(&self) -> &[ManifestHarness] { + &self.harnesses + } + + pub fn claims(&self) -> &[ManifestClaim] { + &self.claims + } + + pub fn assumptions(&self) -> &[ManifestAssumption] { + &self.assumptions + } + + pub fn kani_harnesses_for_scope(&self, scope: &str) -> impl Iterator { + self.harnesses + .iter() + .filter(move |harness| harness.kind == HarnessKind::Kani && harness.scope == scope) + } + + pub fn verus_models(&self) -> impl Iterator { + self.harnesses + .iter() + .filter(|harness| harness.kind == HarnessKind::Verus) + } + + pub fn render_claim_markdown(&self) -> String { + let mut output = String::new(); + output.push_str("# Proof Claim Matrix\n\n"); + output.push_str( + "This document is derived from `proofs/manifest.txt` and states the current proof boundary.\n\n", + ); + output.push_str("## Verified Boundary\n\n"); + output.push_str("- "); + output.push_str(self.boundary); + output.push('\n'); + + for status in [ + ProofStatus::Refined, + ProofStatus::Checked, + ProofStatus::Model, + ProofStatus::Runtime, + ProofStatus::OutOfScope, + ] { + let mut first = true; + for claim in self.claims.iter().filter(|claim| claim.status == status) { + if first { + output.push_str("\n## "); + output.push_str(status.heading()); + output.push_str("\n\n"); + first = false; + } + output.push_str("- `"); + output.push_str(claim.component); + output.push_str("`: "); + output.push_str(claim.text); + if !claim.links.is_empty() { + output.push_str(" (proof ids: "); + let mut first_link = true; + for link in claim.links { + if !first_link { + output.push_str(", "); + } + output.push('`'); + output.push_str(link); + output.push('`'); + first_link = false; + } + output.push(')'); + } + output.push('\n'); + } + } + + if !self.assumptions.is_empty() { + output.push_str("\n## Assumptions\n\n"); + for assumption in &self.assumptions { + output.push_str("- `"); + output.push_str(assumption.component); + output.push_str("`: "); + output.push_str(assumption.text); + output.push('\n'); + } + } + + output + } + + pub fn validate(&self) -> Result<(), String> { + let mut harness_ids = Vec::new(); + for harness in &self.harnesses { + if harness_ids.contains(&harness.id) { + return Err(format!( + "duplicate harness id `{}` in proof manifest", + harness.id + )); + } + harness_ids.push(harness.id); + } + + let mut claim_components = Vec::new(); + for claim in &self.claims { + if claim_components.contains(&claim.component) { + return Err(format!( + "duplicate claim component `{}` in proof manifest", + claim.component + )); + } + claim_components.push(claim.component); + + for link in claim.links { + if !harness_ids.contains(link) { + return Err(format!( + "claim `{}` references unknown proof id `{link}`", + claim.component + )); + } + } + + let has_kani = claim.links.iter().any(|link| { + self.harnesses + .iter() + .any(|harness| harness.id == *link && harness.kind == HarnessKind::Kani) + }); + let has_verus = claim.links.iter().any(|link| { + self.harnesses + .iter() + .any(|harness| harness.id == *link && harness.kind == HarnessKind::Verus) + }); + + match claim.status { + ProofStatus::Refined => { + if !has_kani || !has_verus { + return Err(format!( + "refined claim `{}` must link both Kani and Verus proofs", + claim.component + )); + } + } + ProofStatus::Checked => { + if !has_kani || has_verus { + return Err(format!( + "checked claim `{}` must link Kani proofs only", + claim.component + )); + } + } + ProofStatus::Model => { + if !has_verus || has_kani { + return Err(format!( + "model claim `{}` must link Verus proofs only", + claim.component + )); + } + } + ProofStatus::Runtime | ProofStatus::OutOfScope => { + if has_kani || has_verus { + return Err(format!( + "{} claim `{}` must not link formal proof ids", + match claim.status { + ProofStatus::Runtime => "runtime", + ProofStatus::OutOfScope => "out_of_scope", + _ => unreachable!(), + }, + claim.component + )); + } + } + } + } + + Ok(()) + } +} + +fn parse_links(raw: &'static str) -> &'static [&'static str] { + let links: Vec<&'static str> = raw + .split(',') + .map(str::trim) + .filter(|link| !link.is_empty()) + .collect(); + Box::leak(links.into_boxed_slice()) +} + +#[cfg(test)] +mod tests { + use super::{ProofStatus, VerificationManifest}; + + #[test] + fn manifest_is_valid() { + VerificationManifest::current().validate().unwrap(); + } + + #[test] + fn rendered_claims_include_refined_section() { + let rendered = VerificationManifest::current().render_claim_markdown(); + assert!(rendered.contains(ProofStatus::Refined.heading())); + } + + #[test] + fn checked_claims_require_kani_only_links() { + let manifest = VerificationManifest::parse( + "kani|k|default|k\nverus|v|proofs/verus/core_model.rs\nclaim|checked|engine.bad|bad claim|v\n", + ); + let error = manifest.validate().unwrap_err(); + assert!(error.contains("must link Kani proofs only")); + } + + #[test] + fn runtime_claims_reject_formal_links() { + let manifest = + VerificationManifest::parse("kani|k|default|k\nclaim|runtime|engine.bad|bad claim|k\n"); + let error = manifest.validate().unwrap_err(); + assert!(error.contains("must not link formal proof ids")); + } +} diff --git a/src/proof/mod.rs b/src/proof/mod.rs index 3ac96fb..492d078 100644 --- a/src/proof/mod.rs +++ b/src/proof/mod.rs @@ -1,17 +1,42 @@ -//! Proof-facing assertions and proof-claim surface exported by the crate. +//! Proof-facing manifests, model/refinement traits, and reusable harness helpers. + +#[macro_use] +mod macros; + +pub mod liveness; +pub mod manifest; +pub mod model; +pub mod refinement; use crate::buffer::Buffer; use crate::game::Game; -use crate::types::Seed; +use crate::types::{ReplayStep, Seed}; -/// Human-readable statement of the current proof obligations. -pub const PROOF_CLAIM: &str = include_str!("../../proofs/README.md"); +pub const PROOF_CLAIM: &str = include_str!("../../proofs/claim.md"); +pub const PROOF_MANIFEST_RAW: &str = include_str!("../../proofs/manifest.txt"); pub use crate::verification::{ assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, }; +pub use liveness::{ + FairnessWitness, FiniteSupportOutcome, ProbabilisticWitness, TerminationWitness, + assert_finite_support_is_valid, assert_ranked_progress, +}; +pub use manifest::{ + ManifestAssumption, ManifestClaim, ManifestHarness, ProofStatus, VerificationManifest, +}; +pub use model::{ModelGame, RefinementWitness, SafetyWitness, VerifiedGame}; +pub use refinement::{ + assert_model_init_refinement, assert_model_observation_refinement, + assert_model_replay_refinement, assert_model_step_refinement, +}; -/// Runs the canonical generated-game proof surface checks. +/// Returns the parsed proof manifest for this crate. +pub fn verification_manifest() -> &'static VerificationManifest { + VerificationManifest::current() +} + +/// Runs the historical generated-game safety surface checks. pub fn assert_generated_game_surface( game: &G, state: &G::State, @@ -26,3 +51,60 @@ pub fn assert_generated_game_surface( assert_compact_roundtrip(game, &first.action); } } + +/// Runs the strengthened safety/init/step proof surface for an explicitly verified game. +pub fn assert_verified_game_safety_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + params: &G::Params, + seed: Seed, +) { + assert_generated_game_surface(game, state, actions, seed); + assert_model_init_refinement(game, seed, params); + assert_model_observation_refinement(game, state); + assert_model_step_refinement(game, state, actions, seed); +} + +/// Runs the replay/rewind refinement surface for an explicitly verified game. +pub fn assert_verified_game_replay_surface( + game: G, + params: G::Params, + seed: Seed, + trace: &[G::JointActionBuf], +) where + G: VerifiedGame + Clone, + ReplayStep: Default, +{ + assert_model_replay_refinement(game, seed, params, trace); +} + +/// Runs the ranking-based liveness surface for a verified game. +pub fn assert_verified_termination_surface( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert_ranked_progress(game, state, actions, seed); +} + +/// Runs the finite-support stochastic surface for a verified game. +pub fn assert_verified_probabilistic_surface( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, +) { + assert_finite_support_is_valid(game, state, actions); +} + +/// Backwards-compatible alias for the safety/init/step surface. +pub fn assert_verified_game_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + params: &G::Params, + seed: Seed, +) { + assert_verified_game_safety_surface(game, state, actions, params, seed); +} diff --git a/src/proof/model.rs b/src/proof/model.rs new file mode 100644 index 0000000..093ff6e --- /dev/null +++ b/src/proof/model.rs @@ -0,0 +1,116 @@ +//! Proof traits that separate runtime semantics from executable reference models. + +use core::fmt::Debug; + +use crate::compact::CompactSpec; +use crate::game::Game; +use crate::rng::DeterministicRng; +use crate::types::{PlayerId, Seed, StepOutcome}; + +/// Safety contracts lifted out of the runtime trait surface. +pub trait SafetyWitness: Game { + fn safety_state_invariant(&self, state: &Self::State) -> bool { + self.state_invariant(state) + } + + fn safety_action_invariant(&self, action: &Self::Action) -> bool { + self.action_invariant(action) + } + + fn safety_player_observation_invariant( + &self, + state: &Self::State, + player: PlayerId, + observation: &Self::Obs, + ) -> bool { + self.player_observation_invariant(state, player, observation) + } + + fn safety_spectator_observation_invariant( + &self, + state: &Self::State, + observation: &Self::Obs, + ) -> bool { + self.spectator_observation_invariant(state, observation) + } + + fn safety_world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { + self.world_view_invariant(state, world) + } + + fn safety_transition_postcondition( + &self, + pre: &Self::State, + actions: &Self::JointActionBuf, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + self.transition_postcondition(pre, actions, post, outcome) + } +} + +impl SafetyWitness for T {} + +/// Executable reference semantics for a runtime `Game` implementation. +pub trait ModelGame: Game { + type ModelState: Clone + Debug + Eq + PartialEq; + type ModelObs: Clone + Debug + Eq + PartialEq; + type ModelWorldView: Clone + Debug + Eq + PartialEq; + + fn model_init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::ModelState; + fn model_is_terminal(&self, state: &Self::ModelState) -> bool; + fn model_players_to_act(&self, state: &Self::ModelState, out: &mut Self::PlayerBuf); + fn model_legal_actions( + &self, + state: &Self::ModelState, + player: PlayerId, + out: &mut Self::ActionBuf, + ); + fn model_observe_player(&self, state: &Self::ModelState, player: PlayerId) -> Self::ModelObs; + fn model_observe_spectator(&self, state: &Self::ModelState) -> Self::ModelObs; + fn model_world_view(&self, state: &Self::ModelState) -> Self::ModelWorldView; + fn model_step_in_place( + &self, + state: &mut Self::ModelState, + actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ); + + fn model_compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + self.compact_spec_for_params(params) + } +} + +/// Refinement witness between runtime values and executable model values. +pub trait RefinementWitness: ModelGame + SafetyWitness { + fn runtime_state_to_model(&self, state: &Self::State) -> Self::ModelState; + fn runtime_observation_to_model(&self, observation: &Self::Obs) -> Self::ModelObs; + fn runtime_world_view_to_model(&self, world: &Self::WorldView) -> Self::ModelWorldView; + + fn state_refines_model(&self, state: &Self::State, model: &Self::ModelState) -> bool { + self.runtime_state_to_model(state) == *model + } + + fn observation_refines_model(&self, observation: &Self::Obs, model: &Self::ModelObs) -> bool { + self.runtime_observation_to_model(observation) == *model + } + + fn world_view_refines_model( + &self, + world: &Self::WorldView, + model: &Self::ModelWorldView, + ) -> bool { + self.runtime_world_view_to_model(world) == *model + } + + fn compact_spec_refines_model(&self, params: &Self::Params) -> bool { + self.compact_spec_for_params(params) == self.model_compact_spec_for_params(params) + } +} + +/// Explicit marker for games that opt into the stronger proof/refinement surface. +/// +/// This is intentionally not blanket-implemented: a game should opt in only after +/// its verification surface and manifest claim are deliberate. +pub trait VerifiedGame: RefinementWitness {} diff --git a/src/proof/refinement.rs b/src/proof/refinement.rs new file mode 100644 index 0000000..50d672e --- /dev/null +++ b/src/proof/refinement.rs @@ -0,0 +1,169 @@ +//! Helpers that compare runtime game behavior against executable model semantics. + +use crate::buffer::Buffer; +use crate::proof::model::RefinementWitness; +use crate::rng::DeterministicRng; +use crate::session::{FixedHistory, SessionKernel}; +use crate::types::{ReplayStep, Seed, StepOutcome}; + +pub fn assert_model_init_refinement( + game: &G, + seed: Seed, + params: &G::Params, +) { + let state = game.init_with_params(seed, params); + let model = game.model_init_with_params(seed, params); + assert!(game.safety_state_invariant(&state)); + assert!(game.state_refines_model(&state, &model)); + assert_eq!(game.is_terminal(&state), game.model_is_terminal(&model)); + assert!(game.compact_spec_refines_model(params)); + + let mut runtime_players = G::PlayerBuf::default(); + let mut model_players = G::PlayerBuf::default(); + game.players_to_act(&state, &mut runtime_players); + game.model_players_to_act(&model, &mut model_players); + assert_eq!(runtime_players, model_players); + + for player in 0..game.player_count() { + let mut runtime_actions = G::ActionBuf::default(); + let mut model_actions = G::ActionBuf::default(); + game.legal_actions(&state, player, &mut runtime_actions); + game.model_legal_actions(&model, player, &mut model_actions); + assert_eq!(runtime_actions, model_actions); + } +} + +pub fn assert_model_observation_refinement(game: &G, state: &G::State) { + let model = game.runtime_state_to_model(state); + for player in 0..game.player_count() { + let observation = game.observe_player(state, player); + let model_observation = game.model_observe_player(&model, player); + assert!(game.safety_player_observation_invariant(state, player, &observation)); + assert!(game.observation_refines_model(&observation, &model_observation)); + } + + let spectator = game.observe_spectator(state); + let model_spectator = game.model_observe_spectator(&model); + assert!(game.safety_spectator_observation_invariant(state, &spectator)); + assert!(game.observation_refines_model(&spectator, &model_spectator)); + + let world = game.world_view(state); + let model_world = game.model_world_view(&model); + assert!(game.safety_world_view_invariant(state, &world)); + assert!(game.world_view_refines_model(&world, &model_world)); +} + +pub fn assert_model_step_refinement( + game: &G, + pre: &G::State, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert!(game.safety_state_invariant(pre)); + for action in actions.as_slice() { + assert!(game.safety_action_invariant(&action.action)); + } + + let mut runtime_state = pre.clone(); + let mut model_state = game.runtime_state_to_model(pre); + let mut runtime_rng = DeterministicRng::from_seed_and_stream(seed, 99); + let mut model_rng = runtime_rng; + let mut runtime_outcome = StepOutcome::::default(); + let mut model_outcome = StepOutcome::::default(); + + game.step_in_place( + &mut runtime_state, + actions, + &mut runtime_rng, + &mut runtime_outcome, + ); + game.model_step_in_place( + &mut model_state, + actions, + &mut model_rng, + &mut model_outcome, + ); + + assert_eq!(runtime_rng, model_rng); + assert_eq!(runtime_outcome, model_outcome); + assert!(game.safety_state_invariant(&runtime_state)); + assert!(game.state_refines_model(&runtime_state, &model_state)); + assert_model_observation_refinement(game, &runtime_state); + assert!(game.safety_transition_postcondition(pre, actions, &runtime_state, &runtime_outcome,)); +} + +pub fn assert_model_replay_refinement( + game: G, + seed: Seed, + params: G::Params, + trace: &[G::JointActionBuf], +) where + G: RefinementWitness + Clone, + ReplayStep: Default, +{ + type ProofHistory = FixedHistory; + + let mut session = + SessionKernel::>::new_with_params(game.clone(), seed, params.clone()); + let mut model_state = game.model_init_with_params(seed, ¶ms); + let mut model_rng = DeterministicRng::from_seed_and_stream(seed, 1); + + for actions in trace { + if session.is_terminal() { + break; + } + let outcome = session.step_with_joint_actions(actions).clone(); + let mut model_outcome = StepOutcome::::default(); + game.model_step_in_place( + &mut model_state, + actions, + &mut model_rng, + &mut model_outcome, + ); + model_outcome.tick = session.current_tick(); + assert_eq!(outcome, model_outcome); + assert_eq!(session.rng(), model_rng); + assert!(game.state_refines_model(session.state(), &model_state)); + assert_model_observation_refinement(&game, session.state()); + + let recorded = &session.trace().steps[(session.current_tick() - 1) as usize]; + assert_eq!(recorded.tick, outcome.tick); + assert_eq!(&recorded.actions, actions); + assert_eq!(&recorded.rewards, &outcome.rewards); + assert_eq!(recorded.termination, outcome.termination); + } + + let executed_ticks = session.trace().len() as u64; + let mut target_tick = 0u64; + while target_tick <= executed_ticks { + let restored_state = session + .state_at(target_tick) + .expect("recorded tick must be restorable"); + let fork = session + .fork_at(target_tick) + .expect("recorded tick must produce a rewound fork"); + let mut replay_state = game.model_init_with_params(seed, ¶ms); + let mut replay_rng = DeterministicRng::from_seed_and_stream(seed, 1); + let mut replay_tick = 0usize; + while replay_tick < target_tick as usize { + let mut replay_outcome = StepOutcome::::default(); + game.model_step_in_place( + &mut replay_state, + &trace[replay_tick], + &mut replay_rng, + &mut replay_outcome, + ); + replay_tick += 1; + } + + assert!(game.safety_state_invariant(&restored_state)); + assert!(game.state_refines_model(&restored_state, &replay_state)); + assert_model_observation_refinement(&game, &restored_state); + assert_eq!(fork.current_tick(), target_tick); + assert_eq!(*fork.state(), restored_state); + assert_eq!(fork.rng(), replay_rng); + assert_model_observation_refinement(&game, fork.state()); + + target_tick += 1; + } +} From 4ad917b6985f3425a47353f2b85d187f1d40c813 Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 22:25:54 -0400 Subject: [PATCH 11/12] Document code --- src/proof/liveness.rs | 11 +++++++++++ src/proof/macros.rs | 1 + src/proof/manifest.rs | 30 ++++++++++++++++++++++++++++++ src/proof/mod.rs | 2 ++ src/proof/model.rs | 25 +++++++++++++++++++++++++ src/proof/refinement.rs | 4 ++++ 6 files changed, 73 insertions(+) diff --git a/src/proof/liveness.rs b/src/proof/liveness.rs index 6340898..2c81c66 100644 --- a/src/proof/liveness.rs +++ b/src/proof/liveness.rs @@ -9,13 +9,16 @@ use crate::types::{StepOutcome, Termination}; /// Ranking-function based termination witness over the executable model. pub trait TerminationWitness: ModelGame { + /// Returns a natural-number rank that must decrease on non-terminal progress steps. fn model_rank(&self, state: &Self::ModelState) -> u64; + /// Returns whether terminal states are exactly the rank-zero states. fn terminal_rank_is_exact(&self, state: &Self::ModelState) -> bool { self.model_is_terminal(state) == (self.model_rank(state) == 0) } } +/// Checks the ranking-function progress obligation for one model transition. pub fn assert_ranked_progress( game: &G, pre: &G::ModelState, @@ -41,6 +44,7 @@ pub fn assert_ranked_progress( /// Declarative fairness witness scaffold for future game-specific obligations. pub trait FairnessWitness: ModelGame { + /// Returns the fairness assumptions required by the game's liveness claims. fn fairness_assumptions(&self) -> &'static [&'static str] { &[] } @@ -49,14 +53,19 @@ pub trait FairnessWitness: ModelGame { /// One weighted model outcome in a finite-support stochastic step. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct FiniteSupportOutcome { + /// Successor state for this support point. pub state: S, + /// Reward buffer emitted for this support point. pub rewards: R, + /// Termination status emitted for this support point. pub termination: Termination, + /// Relative support weight for this outcome. pub weight: u64, } /// Finite-support stochastic witness scaffold for probabilistic liveness proofs. pub trait ProbabilisticWitness: ModelGame { + /// Buffer type that stores all finite-support outcomes for one model step. type SupportBuf: Buffer> + Clone + Debug @@ -64,6 +73,7 @@ pub trait ProbabilisticWitness: ModelGame { + Eq + PartialEq; + /// Enumerates the finite support of one model step for the given state and actions. fn model_step_support( &self, state: &Self::ModelState, @@ -72,6 +82,7 @@ pub trait ProbabilisticWitness: ModelGame { ); } +/// Checks that a probabilistic witness exposes a non-empty, positive-weight finite support. pub fn assert_finite_support_is_valid( game: &G, state: &G::ModelState, diff --git a/src/proof/macros.rs b/src/proof/macros.rs index 0aa152e..b3a54f9 100644 --- a/src/proof/macros.rs +++ b/src/proof/macros.rs @@ -1,3 +1,4 @@ +/// Declares the standard Kani refinement harness triplet for a verified game. #[macro_export] macro_rules! declare_refinement_harnesses { ( diff --git a/src/proof/manifest.rs b/src/proof/manifest.rs index 3d97a74..572114d 100644 --- a/src/proof/manifest.rs +++ b/src/proof/manifest.rs @@ -2,6 +2,7 @@ use std::sync::OnceLock; +/// Classification for how strongly a component is covered by the proof system. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum ProofStatus { /// Backed by bounded checks over the Rust implementation. @@ -17,6 +18,7 @@ pub enum ProofStatus { } impl ProofStatus { + /// Parses a manifest status token. fn parse(raw: &str) -> Option { match raw { "checked" => Some(Self::Checked), @@ -28,6 +30,7 @@ impl ProofStatus { } } + /// Returns the markdown heading used for this status in the claim matrix. pub fn heading(self) -> &'static str { match self { Self::Checked => "Implementation-Checked Claims", @@ -39,34 +42,51 @@ impl ProofStatus { } } +/// Kind of verification harness referenced by the proof manifest. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum HarnessKind { + /// A Kani harness over compiled Rust code. Kani, + /// A Verus proof file or model-checking target. Verus, } +/// One proof harness entry declared in the manifest. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct ManifestHarness { + /// Verification technology used by the harness. pub kind: HarnessKind, + /// Stable manifest identifier for the harness. pub id: &'static str, + /// Logical scope or component group the harness belongs to. pub scope: &'static str, + /// Concrete target invoked by tooling for this harness. pub target: &'static str, } +/// One claim about a component inside the verified boundary. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct ManifestClaim { + /// Strength of the claim. pub status: ProofStatus, + /// Stable component identifier used in reports. pub component: &'static str, + /// Human-readable statement of what is claimed. pub text: &'static str, + /// Proof harness identifiers that justify the claim. pub links: &'static [&'static str], } +/// One explicit assumption required by a proof claim. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct ManifestAssumption { + /// Component the assumption applies to. pub component: &'static str, + /// Human-readable statement of the assumption. pub text: &'static str, } +/// Parsed proof manifest used by reporting and verification tooling. #[derive(Clone, Debug, Eq, PartialEq)] pub struct VerificationManifest { boundary: &'static str, @@ -76,6 +96,7 @@ pub struct VerificationManifest { } impl VerificationManifest { + /// Returns the crate's statically embedded proof manifest. pub fn current() -> &'static Self { static MANIFEST: OnceLock = OnceLock::new(); MANIFEST.get_or_init(|| { @@ -85,6 +106,7 @@ impl VerificationManifest { }) } + /// Parses a manifest file into a structured representation. pub fn parse(raw: &'static str) -> Self { let mut boundary = "kernel+builtins"; let mut harnesses = Vec::new(); @@ -138,34 +160,41 @@ impl VerificationManifest { } } + /// Returns the declared proof boundary label. pub fn boundary(&self) -> &'static str { self.boundary } + /// Returns every declared proof harness. pub fn harnesses(&self) -> &[ManifestHarness] { &self.harnesses } + /// Returns every declared proof claim. pub fn claims(&self) -> &[ManifestClaim] { &self.claims } + /// Returns every explicit assumption listed in the manifest. pub fn assumptions(&self) -> &[ManifestAssumption] { &self.assumptions } + /// Returns the Kani harnesses belonging to one manifest scope. pub fn kani_harnesses_for_scope(&self, scope: &str) -> impl Iterator { self.harnesses .iter() .filter(move |harness| harness.kind == HarnessKind::Kani && harness.scope == scope) } + /// Returns all Verus entries in the manifest. pub fn verus_models(&self) -> impl Iterator { self.harnesses .iter() .filter(|harness| harness.kind == HarnessKind::Verus) } + /// Renders the manifest into the public proof-claim markdown summary. pub fn render_claim_markdown(&self) -> String { let mut output = String::new(); output.push_str("# Proof Claim Matrix\n\n"); @@ -228,6 +257,7 @@ impl VerificationManifest { output } + /// Validates manifest consistency, proof links, and claim/status coherence. pub fn validate(&self) -> Result<(), String> { let mut harness_ids = Vec::new(); for harness in &self.harnesses { diff --git a/src/proof/mod.rs b/src/proof/mod.rs index 492d078..a0cf6de 100644 --- a/src/proof/mod.rs +++ b/src/proof/mod.rs @@ -12,7 +12,9 @@ use crate::buffer::Buffer; use crate::game::Game; use crate::types::{ReplayStep, Seed}; +/// Rendered proof claim matrix generated from the current manifest. pub const PROOF_CLAIM: &str = include_str!("../../proofs/claim.md"); +/// Raw proof manifest used to drive Kani, Verus, and claim reporting. pub const PROOF_MANIFEST_RAW: &str = include_str!("../../proofs/manifest.txt"); pub use crate::verification::{ diff --git a/src/proof/model.rs b/src/proof/model.rs index 093ff6e..1fae85f 100644 --- a/src/proof/model.rs +++ b/src/proof/model.rs @@ -9,14 +9,17 @@ use crate::types::{PlayerId, Seed, StepOutcome}; /// Safety contracts lifted out of the runtime trait surface. pub trait SafetyWitness: Game { + /// Returns whether the runtime state satisfies the game's safety invariant. fn safety_state_invariant(&self, state: &Self::State) -> bool { self.state_invariant(state) } + /// Returns whether an action value is valid for safety-oriented proofs. fn safety_action_invariant(&self, action: &Self::Action) -> bool { self.action_invariant(action) } + /// Returns whether a player-facing observation satisfies the declared invariant. fn safety_player_observation_invariant( &self, state: &Self::State, @@ -26,6 +29,7 @@ pub trait SafetyWitness: Game { self.player_observation_invariant(state, player, observation) } + /// Returns whether a spectator observation satisfies the declared invariant. fn safety_spectator_observation_invariant( &self, state: &Self::State, @@ -34,10 +38,12 @@ pub trait SafetyWitness: Game { self.spectator_observation_invariant(state, observation) } + /// Returns whether the world view satisfies the declared invariant. fn safety_world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { self.world_view_invariant(state, world) } + /// Returns whether the step satisfied the declared transition postcondition. fn safety_transition_postcondition( &self, pre: &Self::State, @@ -53,22 +59,33 @@ impl SafetyWitness for T {} /// Executable reference semantics for a runtime `Game` implementation. pub trait ModelGame: Game { + /// Model state used by refinement and liveness proofs. type ModelState: Clone + Debug + Eq + PartialEq; + /// Model observation used by refinement and liveness proofs. type ModelObs: Clone + Debug + Eq + PartialEq; + /// Model world view used by refinement and liveness proofs. type ModelWorldView: Clone + Debug + Eq + PartialEq; + /// Initializes the model state for a seed and parameter set. fn model_init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::ModelState; + /// Returns whether the model state is terminal. fn model_is_terminal(&self, state: &Self::ModelState) -> bool; + /// Collects the model players that must act from the given state. fn model_players_to_act(&self, state: &Self::ModelState, out: &mut Self::PlayerBuf); + /// Collects the legal actions for one player in the given model state. fn model_legal_actions( &self, state: &Self::ModelState, player: PlayerId, out: &mut Self::ActionBuf, ); + /// Returns the player-facing observation for the model state. fn model_observe_player(&self, state: &Self::ModelState, player: PlayerId) -> Self::ModelObs; + /// Returns the spectator observation for the model state. fn model_observe_spectator(&self, state: &Self::ModelState) -> Self::ModelObs; + /// Returns the world view for the model state. fn model_world_view(&self, state: &Self::ModelState) -> Self::ModelWorldView; + /// Applies one model transition in place using the same action/rng surface as runtime. fn model_step_in_place( &self, state: &mut Self::ModelState, @@ -77,6 +94,7 @@ pub trait ModelGame: Game { out: &mut StepOutcome, ); + /// Returns the compact encoding contract for the given parameters. fn model_compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { self.compact_spec_for_params(params) } @@ -84,18 +102,24 @@ pub trait ModelGame: Game { /// Refinement witness between runtime values and executable model values. pub trait RefinementWitness: ModelGame + SafetyWitness { + /// Projects a runtime state into the proof model. fn runtime_state_to_model(&self, state: &Self::State) -> Self::ModelState; + /// Projects a runtime observation into the proof model. fn runtime_observation_to_model(&self, observation: &Self::Obs) -> Self::ModelObs; + /// Projects a runtime world view into the proof model. fn runtime_world_view_to_model(&self, world: &Self::WorldView) -> Self::ModelWorldView; + /// Returns whether the runtime state matches the provided model state. fn state_refines_model(&self, state: &Self::State, model: &Self::ModelState) -> bool { self.runtime_state_to_model(state) == *model } + /// Returns whether the runtime observation matches the provided model observation. fn observation_refines_model(&self, observation: &Self::Obs, model: &Self::ModelObs) -> bool { self.runtime_observation_to_model(observation) == *model } + /// Returns whether the runtime world view matches the provided model world view. fn world_view_refines_model( &self, world: &Self::WorldView, @@ -104,6 +128,7 @@ pub trait RefinementWitness: ModelGame + SafetyWitness { self.runtime_world_view_to_model(world) == *model } + /// Returns whether the runtime compact schema matches the model compact schema. fn compact_spec_refines_model(&self, params: &Self::Params) -> bool { self.compact_spec_for_params(params) == self.model_compact_spec_for_params(params) } diff --git a/src/proof/refinement.rs b/src/proof/refinement.rs index 50d672e..423f793 100644 --- a/src/proof/refinement.rs +++ b/src/proof/refinement.rs @@ -6,6 +6,7 @@ use crate::rng::DeterministicRng; use crate::session::{FixedHistory, SessionKernel}; use crate::types::{ReplayStep, Seed, StepOutcome}; +/// Checks that runtime initialization agrees with the executable proof model. pub fn assert_model_init_refinement( game: &G, seed: Seed, @@ -33,6 +34,7 @@ pub fn assert_model_init_refinement( } } +/// Checks that runtime observations and world views agree with the proof model. pub fn assert_model_observation_refinement(game: &G, state: &G::State) { let model = game.runtime_state_to_model(state); for player in 0..game.player_count() { @@ -53,6 +55,7 @@ pub fn assert_model_observation_refinement(game: &G, state assert!(game.world_view_refines_model(&world, &model_world)); } +/// Checks that one runtime transition agrees with the executable proof model. pub fn assert_model_step_refinement( game: &G, pre: &G::State, @@ -92,6 +95,7 @@ pub fn assert_model_step_refinement( assert!(game.safety_transition_postcondition(pre, actions, &runtime_state, &runtime_outcome,)); } +/// Checks that session replay/rewind semantics agree with repeated model execution. pub fn assert_model_replay_refinement( game: G, seed: Seed, From cdb344100660c7e5e4286b9f0fc439349e6620ee Mon Sep 17 00:00:00 2001 From: Noah Cashin Date: Wed, 1 Apr 2026 23:05:16 -0400 Subject: [PATCH 12/12] Fix bugs --- examples/pong_core.rs | 6 +- src/builtin/platformer/mod.rs | 11 +- src/builtin/platformer/tests.rs | 7 +- src/cli/mod.rs | 325 +++++++++++++++++++++++++------- src/core/env.rs | 175 ++++++++++++++++- src/core/single_player.rs | 8 + src/game.rs | 9 + src/session.rs | 10 + 8 files changed, 473 insertions(+), 78 deletions(-) diff --git a/examples/pong_core.rs b/examples/pong_core.rs index 71256af..4158216 100644 --- a/examples/pong_core.rs +++ b/examples/pong_core.rs @@ -42,7 +42,11 @@ impl Pong { st.bx = W / 2; st.by = H / 2; st.vx = if toward_p1 { -1 } else { 1 }; - st.vy = if (st.s1 + st.s2).is_multiple_of(2) { 1 } else { -1 }; + st.vy = if (st.s1 + st.s2).is_multiple_of(2) { + 1 + } else { + -1 + }; } } diff --git a/src/builtin/platformer/mod.rs b/src/builtin/platformer/mod.rs index 3f1e9ec..95e627d 100644 --- a/src/builtin/platformer/mod.rs +++ b/src/builtin/platformer/mod.rs @@ -93,12 +93,7 @@ impl Default for PlatformerConfig { } impl PlatformerConfig { - fn checked_step_reward( - self, - collected: u8, - finished: bool, - sprained: bool, - ) -> Option { + fn checked_step_reward(self, collected: u8, finished: bool, sprained: bool) -> Option { let mut reward = i128::from(self.berry_reward) * i128::from(collected); if finished { reward += i128::from(self.finish_bonus); @@ -371,6 +366,10 @@ impl single_player::SinglePlayerGame for Platformer { "platformer" } + fn params_invariant(&self, params: &Self::Params) -> bool { + params.invariant() + } + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { assert!(params.invariant()); PlatformerState { diff --git a/src/builtin/platformer/tests.rs b/src/builtin/platformer/tests.rs index 5044ded..2723c9e 100644 --- a/src/builtin/platformer/tests.rs +++ b/src/builtin/platformer/tests.rs @@ -182,11 +182,8 @@ fn parameterized_environment_uses_wide_observation_schema() { berry_xs: [1, 6, 11, 16, 21, 26], ..PlatformerConfig::default() }; - let mut env = DefaultEnvironment::::new( - Platformer::default(), - 3, - Observer::Player(0), - ); + let mut env = + DefaultEnvironment::::new(Platformer::default(), 3, Observer::Player(0)); let packet = env.reset_with_params(3, config).unwrap(); assert_eq!(packet.words().len(), 1); assert!(packet.words()[0] > 4095); diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 03ed1d1..7533bd9 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -18,7 +18,7 @@ use crate::render::{PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, use crate::render::{RealtimeDriver, builtin}; #[cfg(feature = "render")] use crate::session::InteractiveSession; -use crate::{Game, Session, stable_hash}; +use crate::{Game, PlayerAction, Session, stable_hash}; #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub(crate) enum RunMode { @@ -45,14 +45,8 @@ fn resolve_policy_choice( "human" => Err(format!( "unsupported {game_name} policy for replay mode: human" )), - "random" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::Random), - "random" => Err(format!( - "unsupported {game_name} policy for replay mode: random" - )), - "first" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::First), - "first" => Err(format!( - "unsupported {game_name} policy for replay mode: first" - )), + "random" => Ok(PolicyChoice::Random), + "first" => Ok(PolicyChoice::First), script if script.starts_with("script:") => parse_script(script) .map(PolicyChoice::Scripted) .map_err(|error| format!("{game_name} script parse error: {error}")), @@ -113,6 +107,7 @@ pub(crate) struct CliConfig { seed: u64, max_steps: usize, policy: String, + policy_explicit: bool, render: bool, render_physics: bool, ticks_per_second: f64, @@ -129,6 +124,7 @@ impl CliConfig { seed: 1, max_steps: 64, policy: "human".to_string(), + policy_explicit: false, render: false, render_physics: false, ticks_per_second: 12.0, @@ -159,6 +155,7 @@ impl CliConfig { config.policy = iter .next() .ok_or_else(|| "missing value after --policy".to_string())?; + config.policy_explicit = true; } "--render" => { config.render = true; @@ -193,6 +190,125 @@ impl CliConfig { Ok(config) } + + fn policy_for_mode(&self, mode: RunMode) -> &str { + if self.policy_explicit { + &self.policy + } else { + match mode { + RunMode::Play => "human", + RunMode::Replay => "first", + } + } + } +} + +#[cfg(feature = "render")] +fn should_stop_at_tick(tick: u64, max_steps: Option) -> bool { + max_steps.is_some_and(|limit| tick as usize >= limit) +} + +fn collect_scripted_joint_actions( + session: &Session, + script: &[G::Action], + position: &mut usize, + game_name: &'static str, +) -> Result +where + G: Game, +{ + let mut players = G::PlayerBuf::default(); + session.game().players_to_act(session.state(), &mut players); + + let mut joint_actions = G::JointActionBuf::default(); + let mut legal_actions = G::ActionBuf::default(); + for &player in players.as_slice() { + legal_actions.clear(); + session + .game() + .legal_actions(session.state(), player, &mut legal_actions); + if legal_actions.as_slice().is_empty() { + return Err(format!( + "{game_name} player {player} has no legal actions in a non-terminal state" + )); + } + let Some(action) = script.get(*position).copied() else { + return Err(format!( + "{game_name} scripted policy exhausted at index {}", + *position + )); + }; + if !legal_actions.as_slice().contains(&action) { + return Err(format!( + "{game_name} scripted policy action at index {} is illegal for current state", + *position + )); + } + joint_actions + .push(PlayerAction { player, action }) + .expect("joint action buffer capacity exceeded"); + *position += 1; + } + + Ok(joint_actions) +} + +#[cfg(feature = "render")] +fn validate_scripted_policy( + game: G, + seed: u64, + script: &[G::Action], + max_steps: Option, + game_name: &'static str, +) -> Result<(), String> +where + G: Game + Copy, +{ + let mut session = Session::new(game, seed); + let mut position = 0usize; + while !session.is_terminal() && !should_stop_at_tick(session.current_tick(), max_steps) { + let joint_actions = + collect_scripted_joint_actions(&session, script, &mut position, game_name)?; + session.step_with_joint_actions(&joint_actions); + } + Ok(()) +} + +fn run_scripted_headless_game( + game: G, + seed: u64, + script: &[G::Action], + max_steps: usize, + game_name: &'static str, +) -> Result +where + G: Game + Observe + Copy, + G::Obs: Debug, +{ + let mut session = Session::new(game, seed); + let mut position = 0usize; + while !session.is_terminal() && (session.current_tick() as usize) < max_steps { + let joint_actions = + collect_scripted_joint_actions(&session, script, &mut position, game_name)?; + let reward = { + let outcome = session.step_with_joint_actions(&joint_actions); + outcome.reward_for(0) + }; + let observation = session.game().observe(session.state(), Observer::Player(0)); + let mut compact = G::WordBuf::default(); + session + .game() + .encode_observation(&observation, &mut compact); + println!( + "tick={} reward={} terminal={} compact={:?}", + session.current_tick(), + reward, + session.is_terminal(), + compact.as_slice(), + ); + println!("{observation:#?}"); + } + Ok(stable_hash(session.trace())) } fn run_headless_game( @@ -211,15 +327,15 @@ where let mut session = Session::new(game, config.seed); let mut random = RandomPolicy; let mut first = FirstLegalPolicy; - let trace_hash = match resolve_policy_choice(mode, &config.policy, parse_script, game_name)? { - PolicyChoice::Human => run_with_policy(&mut session, config.max_steps, &mut human), - PolicyChoice::Random => run_with_policy(&mut session, config.max_steps, &mut random), - PolicyChoice::First => run_with_policy(&mut session, config.max_steps, &mut first), - PolicyChoice::Scripted(script) => { - let mut scripted = ScriptedPolicy::new_strict(script); - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - }; + let trace_hash = + match resolve_policy_choice(mode, config.policy_for_mode(mode), parse_script, game_name)? { + PolicyChoice::Human => run_with_policy(&mut session, config.max_steps, &mut human), + PolicyChoice::Random => run_with_policy(&mut session, config.max_steps, &mut random), + PolicyChoice::First => run_with_policy(&mut session, config.max_steps, &mut first), + PolicyChoice::Scripted(script) => { + run_scripted_headless_game(game, config.seed, &script, config.max_steps, game_name)? + } + }; println!("trace hash: {trace_hash:016x}"); Ok(()) @@ -367,7 +483,12 @@ fn run_tictactoe_render(config: CliConfig, mode: RunMode) -> Result<(), String> use crate::render::builtin::TicTacToePresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match resolve_policy_choice(mode, &config.policy, parse_tictactoe_script, "tictactoe")? { + match resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_tictactoe_script, + "tictactoe", + )? { PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(TicTacToe, config.seed)), @@ -395,16 +516,19 @@ fn run_tictactoe_render(config: CliConfig, mode: RunMode) -> Result<(), String> ) .run_native() .map_err(|error| error.to_string()), - PolicyChoice::Scripted(script) => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(TicTacToe, config.seed), - ScriptedPolicy::new_strict(script), - ), - TicTacToePresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(TicTacToe, config.seed, &script, None, "tictactoe")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(TicTacToe, config.seed), + ScriptedPolicy::new_strict(script), + ), + TicTacToePresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } @@ -413,7 +537,12 @@ fn run_blackjack_render(config: CliConfig, mode: RunMode) -> Result<(), String> use crate::render::builtin::BlackjackPresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match resolve_policy_choice(mode, &config.policy, parse_blackjack_script, "blackjack")? { + match resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_blackjack_script, + "blackjack", + )? { PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(Blackjack, config.seed)), @@ -441,16 +570,19 @@ fn run_blackjack_render(config: CliConfig, mode: RunMode) -> Result<(), String> ) .run_native() .map_err(|error| error.to_string()), - PolicyChoice::Scripted(script) => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(Blackjack, config.seed), - ScriptedPolicy::new_strict(script), - ), - BlackjackPresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(Blackjack, config.seed, &script, None, "blackjack")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(Blackjack, config.seed), + ScriptedPolicy::new_strict(script), + ), + BlackjackPresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } @@ -464,8 +596,12 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> let render_config = build_render_config(&config, render_mode); let game = Platformer::default(); - let policy_choice = - resolve_policy_choice(mode, &config.policy, parse_platformer_script, "platformer")?; + let policy_choice = resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_platformer_script, + "platformer", + )?; if config.render_physics { match policy_choice { @@ -496,16 +632,19 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> ) .run_native() .map_err(|error| error.to_string()), - PolicyChoice::Scripted(script) => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(game, config.seed), - ScriptedPolicy::new_strict(script), - ), - builtin::PlatformerPhysicsPresenter::new(game.config), - ) - .run_native() - .map_err(|error| error.to_string()), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(game, config.seed, &script, None, "platformer")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(game, config.seed), + ScriptedPolicy::new_strict(script), + ), + builtin::PlatformerPhysicsPresenter::new(game.config), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } else { match policy_choice { @@ -536,16 +675,19 @@ fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> ) .run_native() .map_err(|error| error.to_string()), - PolicyChoice::Scripted(script) => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(game, config.seed), - ScriptedPolicy::new_strict(script), - ), - builtin::PlatformerPresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(game, config.seed, &script, None, "platformer")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(game, config.seed), + ScriptedPolicy::new_strict(script), + ), + builtin::PlatformerPresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } } @@ -556,7 +698,9 @@ fn print_usage() { println!( " gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]" ); - println!(" gameengine replay [--seed N] [--max-steps N] [--policy script:...]"); + println!( + " gameengine replay [--seed N] [--max-steps N] [--policy first|random|script:...]" + ); println!(" gameengine validate"); println!("available games:"); for descriptor in all_games() { @@ -711,3 +855,58 @@ impl Policy for HumanPlatformer { } } } + +#[cfg(test)] +mod tests { + use super::{ + CliConfig, PolicyChoice, RunMode, parse_tictactoe_script, resolve_policy_choice, + run_scripted_headless_game, + }; + use crate::builtin::{TicTacToe, TicTacToeAction}; + + #[test] + fn replay_defaults_to_first_policy() { + let config = CliConfig::parse(Vec::::new()).unwrap(); + let choice = resolve_policy_choice( + RunMode::Replay, + config.policy_for_mode(RunMode::Replay), + parse_tictactoe_script, + "tictactoe", + ) + .unwrap(); + assert!(matches!(choice, PolicyChoice::First)); + } + + #[test] + fn replay_accepts_explicit_random_policy() { + let choice = resolve_policy_choice( + RunMode::Replay, + "random", + parse_tictactoe_script, + "tictactoe", + ) + .unwrap(); + assert!(matches!(choice, PolicyChoice::Random)); + } + + #[test] + fn scripted_headless_run_reports_exhaustion() { + let error = + run_scripted_headless_game(TicTacToe, 1, &[TicTacToeAction(0)], 64, "tictactoe") + .unwrap_err(); + assert!(error.contains("scripted policy exhausted")); + } + + #[test] + fn scripted_headless_run_reports_illegal_action() { + let error = run_scripted_headless_game( + TicTacToe, + 1, + &[TicTacToeAction(0), TicTacToeAction(0)], + 64, + "tictactoe", + ) + .unwrap_err(); + assert!(error.contains("scripted policy action at index 1 is illegal")); + } +} diff --git a/src/core/env.rs b/src/core/env.rs index a44f463..efbf852 100644 --- a/src/core/env.rs +++ b/src/core/env.rs @@ -95,6 +95,11 @@ pub enum EnvError { /// Number of players exposed by the game. player_count: usize, }, + /// Parameter bundle was rejected by the game's parameter invariant. + InvalidParameters { + /// Stable machine-readable game name. + game: &'static str, + }, } impl fmt::Display for EnvError { @@ -132,6 +137,9 @@ impl fmt::Display for EnvError { f, "agent player {player} is outside player range 0..{player_count}" ), + Self::InvalidParameters { game } => { + write!(f, "invalid parameter bundle for game `{game}`") + } } } } @@ -178,17 +186,37 @@ where G: Observe, H: HistoryStore, { + fn validate_params(game: &G, params: &G::Params) -> Result<(), EnvError> { + if game.params_invariant(params) { + Ok(()) + } else { + Err(EnvError::InvalidParameters { game: game.name() }) + } + } + /// Creates a new compact environment initialized with explicit params. - pub fn new_with_params(game: G, seed: Seed, observer: Observer, params: G::Params) -> Self { + pub fn try_new_with_params( + game: G, + seed: Seed, + observer: Observer, + params: G::Params, + ) -> Result { + Self::validate_params(&game, ¶ms)?; let agent_player = match observer { Observer::Player(player) => player, Observer::Spectator => 0, }; - Self { + Ok(Self { session: SessionKernel::new_with_params(game, seed, params), observer, agent_player, - } + }) + } + + /// Creates a new compact environment initialized with explicit params. + pub fn new_with_params(game: G, seed: Seed, observer: Observer, params: G::Params) -> Self { + Self::try_new_with_params(game, seed, observer, params) + .expect("invalid parameter bundle for compact environment") } /// Creates a new compact environment. @@ -260,6 +288,7 @@ where seed: Seed, params: G::Params, ) -> Result, EnvError> { + Self::validate_params(self.session.game(), ¶ms)?; self.session.reset_with_params(seed, params); self.encode_current_observation() } @@ -401,6 +430,9 @@ mod regression_tests { #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] struct ParamRewardGame; + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct RejectingParamsGame; + impl Game for DemoGame { type Params = u8; type State = DemoState; @@ -834,6 +866,113 @@ mod regression_tests { } } + impl Game for RejectingParamsGame { + type Params = i32; + type State = i32; + type Action = u8; + type Obs = i32; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + 0 + } + + fn name(&self) -> &'static str { + "rejecting-params" + } + + fn player_count(&self) -> usize { + 1 + } + + fn params_invariant(&self, params: &Self::Params) -> bool { + *params >= 0 + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + assert!(*params >= 0); + *params + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + *state + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + *state + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(*observation as u64).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + #[test] fn step_uses_agent_player_reward() { let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); @@ -911,6 +1050,36 @@ mod regression_tests { assert_eq!(step.reward.raw, 5); assert_eq!(step.reward.encoded, 5); } + + #[test] + fn reset_with_invalid_params_returns_error() { + let mut env = DefaultEnvironment::::new( + RejectingParamsGame, + 1, + Observer::Player(0), + ); + assert_eq!( + env.reset_with_params(1, -1), + Err(EnvError::InvalidParameters { + game: "rejecting-params" + }) + ); + } + + #[test] + fn try_new_with_invalid_params_returns_error() { + assert!(matches!( + DefaultEnvironment::::try_new_with_params( + RejectingParamsGame, + 1, + Observer::Player(0), + -1, + ), + Err(EnvError::InvalidParameters { + game: "rejecting-params" + }) + )); + } } #[cfg(kani)] diff --git a/src/core/single_player.rs b/src/core/single_player.rs index addab96..c7046b6 100644 --- a/src/core/single_player.rs +++ b/src/core/single_player.rs @@ -86,6 +86,10 @@ pub trait SinglePlayerGame { fn default_params(&self) -> Self::Params { Self::Params::default() } + /// Returns whether a parameter bundle is valid for `init_with_params`. + fn params_invariant(&self, _params: &Self::Params) -> bool { + true + } /// Initialize deterministic state from a seed and parameter bundle. fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; /// Whether the state is terminal. @@ -221,6 +225,10 @@ where ::default_params(self) } + fn params_invariant(&self, params: &Self::Params) -> bool { + ::params_invariant(self, params) + } + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State { ::init_with_params(self, seed, params) } diff --git a/src/game.rs b/src/game.rs index f3eff1a..4e60141 100644 --- a/src/game.rs +++ b/src/game.rs @@ -49,6 +49,15 @@ pub trait Game { Self::Params::default() } + /// Returns whether a parameter bundle is valid for `init_with_params`. + /// + /// Infallible engine APIs may assume this precondition holds. Fallible wrappers + /// such as compact environments can use it to reject malformed runtime input + /// before calling into game initialization. + fn params_invariant(&self, _params: &Self::Params) -> bool { + true + } + /// Initialize deterministic state from a seed and parameter bundle. fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; diff --git a/src/session.rs b/src/session.rs index 64cf2bf..12b4096 100644 --- a/src/session.rs +++ b/src/session.rs @@ -381,6 +381,11 @@ impl> SessionKernel { /// Creates a new session initialized from `seed` and explicit params. pub fn new_with_params(game: G, seed: Seed, params: G::Params) -> Self { + assert!( + game.params_invariant(¶ms), + "invalid params for game `{}`", + game.name() + ); let state = game.init_with_params(seed, ¶ms); assert!(game.state_invariant(&state)); let rng = DeterministicRng::from_seed_and_stream(seed, 1); @@ -407,6 +412,11 @@ impl> SessionKernel { /// Resets session state/history to `seed` and updates active params. pub fn reset_with_params(&mut self, seed: Seed, params: G::Params) { + assert!( + self.game.params_invariant(¶ms), + "invalid params for game `{}`", + self.game.name() + ); self.params = params; self.state = self.game.init_with_params(seed, &self.params); self.rng = DeterministicRng::from_seed_and_stream(seed, 1);