diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72c0810..e189c01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: run: cargo test - name: Builtin game tests - run: cargo test --features builtin-games + run: cargo test --features builtin - name: Physics tests run: cargo test --features physics @@ -29,16 +29,16 @@ jobs: run: cargo test --features parallel - name: Builtin physics tests - run: cargo test --features "builtin-games physics" + run: cargo test --features "builtin physics" - name: Rendered builtin tests - run: cargo test --features "render builtin-games physics" + run: cargo test --features "render builtin physics" - name: Render framework check run: cargo check --features render - name: Rendered builtin check - run: cargo check --features "render builtin-games" + run: cargo check --features "render builtin" - name: CLI check run: cargo check --bin gameengine --features cli @@ -53,13 +53,45 @@ jobs: run: cargo check --target wasm32-unknown-unknown --features physics - name: WASM rendered builtin check - run: cargo check --target wasm32-unknown-unknown --features "render builtin-games physics" + run: cargo check --target wasm32-unknown-unknown --features "render builtin physics" + + - name: Install nightly Rust + uses: dtolnay/rust-toolchain@nightly + + - name: Rustdoc coverage gate + run: | + cargo +nightly rustdoc --all-features -- -Z unstable-options --show-coverage --output-format json > /tmp/rustdoc_cov.json + python - <<'PY' + import json, sys + files = json.load(open('/tmp/rustdoc_cov.json')) + with_docs = sum(v.get('with_docs', 0) for v in files.values()) + total = sum(v.get('total', 0) for v in files.values()) + pct = 100.0 if total == 0 else (with_docs * 100.0 / total) + print(f"Rustdoc documented items: {with_docs}/{total} ({pct:.2f}%)") + if with_docs != total: + print('Rustdoc coverage gate failed (<100.0%).', file=sys.stderr) + sys.exit(1) + PY - name: Clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: cargo +stable clippy --all-targets --all-features -- -D warnings + + - name: Install pinned Verus release + run: | + curl -fsSL "https://github.com/verus-lang/verus/releases/download/release%2F0.2026.03.28.3390e9a/verus-0.2026.03.28.3390e9a-x86-linux.zip" -o /tmp/verus.zip + unzip -q /tmp/verus.zip -d /tmp + rm -rf ./verus_binary + mv /tmp/verus-x86-linux ./verus_binary + chmod +x ./verus_binary/verus + + - name: Install Verus-required Rust toolchain + run: rustup toolchain install 1.94.0-x86_64-unknown-linux-gnu + + - name: Verus model checks + run: REQUIRE_VERUS=1 bash scripts/run-verus.sh - name: Bench compile - run: cargo bench --no-run --features "builtin-games physics" + run: cargo bench --no-run --features "builtin physics" kani: runs-on: ubuntu-latest @@ -72,4 +104,4 @@ jobs: with: kani-version: "0.67.0" command: bash - args: "scripts/run-kani.sh" + args: "-lc 'VERIFICATION_MODE=kani-only scripts/run-verification.sh'" diff --git a/.gitignore b/.gitignore index ea8c4bf..0175d93 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +/verus_binary +/infotheory diff --git a/Cargo.lock b/Cargo.lock index d59ed49..066b599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -697,7 +697,7 @@ dependencies = [ [[package]] name = "gameengine" -version = "0.1.2" +version = "0.2.0" dependencies = [ "bytemuck", "criterion", diff --git a/Cargo.toml b/Cargo.toml index d968154..6e48f30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gameengine" -version = "0.1.2" +version = "0.2.0" edition = "2024" autobins = false license = "ISC" @@ -14,8 +14,8 @@ unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } [features] default = [] physics = [] -builtin-games = [] -cli = ["builtin-games"] +builtin = [] +cli = ["builtin"] parallel = ["dep:rayon"] render = [ "dep:bytemuck", @@ -51,10 +51,20 @@ criterion = { version = "0.5.1", default-features = false, features = ["cargo_be [[bin]] name = "gameengine" -path = "src/main.rs" +path = "src/bin/gameengine.rs" required-features = ["cli"] [[bench]] name = "step_throughput" harness = false -required-features = ["builtin-games"] +required-features = ["builtin"] + +[[bench]] +name = "kernel_hotpaths" +harness = false +required-features = ["builtin"] + +[[example]] +name = "perf_probe" +path = "examples/perf_probe.rs" +required-features = ["builtin"] diff --git a/README.md b/README.md index 99e9df4..aba0dfa 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ # Infotheory Game Engine -`gameengine` is a deterministic, replayable, proof-oriented and object-oriented game engine core for games treated as -mathematical objects. +`gameengine` is a deterministic, replayable, proof-oriented game/simulation kernel for games treated as mathematical objects. -The kernel is designed around the idea that a game is just: +The kernel is designed around: `(seed, state, joint_actions) -> (new_state, reward, observations, termination)` @@ -15,8 +14,6 @@ Everything else is layered on top: - machine control is just another action source, - replay and rollback are exact because the kernel is deterministic. -Thus, you can implement a game which is mathematically proven to not have logic bugs if you prove the invariants on it -- deterministically, anywhere, including in a browser. - ## What It Is For This crate is meant for: @@ -25,12 +22,10 @@ This crate is meant for: - AIT and AI experiments, - simulation-heavy search workloads such as MCTS, - scientific or benchmark environments that need replay fidelity, -- games that benefit from formal reasoning about correctness. -- simulated physical environments +- games that benefit from formal reasoning about correctness, +- simulated physical environments. -The target audience is broader than traditional game development. The engine is intended to be -useful to computer scientists, mathematicians, ML/AI researchers, and anyone who needs portable, -auditable, replayable environments. +The target audience is broader than traditional game development: computer scientists, mathematicians, ML/AI researchers, and anyone who needs portable, auditable, replayable environments. ## Design Principles @@ -38,121 +33,166 @@ auditable, replayable environments. - Deterministic seeded PRNG only. No wall-clock time inside the game core. - Tick-based simulation. Rendering speed and simulation speed are decoupled. - Fixed-capacity buffers in the proof-critical path. Hot stepping stays allocation-free. -- Replay, rewind, and fork are first-class. Rollback netcode can be built on exact state recovery. -- Physics is engine-owned, auditable, and provable -- you may define invariants and prove them with Kani, inheriting the proven correctness and determinism of the Engine. -- Rendering is additive. A UI can never change the game’s mathematical semantics. Rendering is a function performed upon the observations of a state. +- Replay, rewind, and fork are first-class. +- Physics is engine-owned, auditable, and deterministic. +- Rendering is additive. A UI cannot change game semantics. +- One canonical observation type per game (`type Obs`), with player/spectator viewpoints encoded from that shared schema. + +## Authoring Ergonomics + +The core `Game` trait remains available for full control, but single-player environments now have an ergonomic adapter: + +- `core::single_player::SinglePlayerGame` + +It removes repeated single-player plumbing: + +- no manual `player_count = 1` wiring, +- no manual `players_to_act` wiring, +- no manual joint-action extraction boilerplate, +- canonical fixed-capacity reward/joint-action buffer wiring is engine-owned. + +This is the intended path for Pong-class ports where the handwritten core should stay close to game math. + +Minimal compileable example: + +```bash +cargo run --example pong_core +``` + +## Environment Interface + +`core::env::Environment` exposes an infotheory-compatible compact interface: + +- `reset(seed)` +- `reset_with_params(seed, params)` +- `step(action_bits) -> EnvStep { observation_bits, reward, terminated, truncated }` + +Compact constraints are canonical and centralized in `CompactSpec`: + +- observation word count/bit-width validation, +- reward range validation, +- reward bit-width validation. ## Formal Verification Scope -The core engine and builtin reference environments are set up for Kani-based verification. +The core engine and builtin reference environments are set up for Kani and Verus checks. -The proof surface covers: +Current proof surface includes: - fixed-capacity buffers, -- compact codecs, +- compact codec constraints and roundtrip properties, - PRNG determinism, -- rollback/replay restoration, -- game-specific invariants for builtin games, +- rollback/replay restoration properties, +- builtin game invariants in the harness matrix, - engine-owned 2D physics invariants, -- platformer/environment synchronization. +- manifest-driven Kani/Verus proof registration, +- executable model/refinement scaffolding for verified games, +- Verus replay/observation/liveness models. -The render stack is intentionally **outside** the proof claim. The claim is that the game kernel and -physics kernel are the mathematical source of truth; the GUI is a derived interface that consumes -verified state. I am not sure if that would be possible to prove. -If anyone would like to suggest a provable rendering method, I would DEFINITELY be open to consideration. +The machine-readable proof boundary lives in [`proofs/manifest.txt`](proofs/manifest.txt). +Claims are intentionally split by status: -Run the current proof matrix with: +- `refined`: backed by Verus model laws and Kani refinement checks, +- `checked`: bounded Kani proofs over the Rust implementation, +- `model`: Verus-only model claims, +- `runtime`: tested/benchmarked behavior, +- `out_of_scope`: explicitly outside the formal boundary. -```bash -bash scripts/run-kani.sh -``` +Games only opt into the stronger surface explicitly: -## Feature Graph +- implement `proof::ModelGame` and `proof::RefinementWitness`, +- add an explicit `impl proof::VerifiedGame for MyGame {}`, +- register the claim and harness ids in `proofs/manifest.txt`. -- `default = []` - - minimal headless library kernel -- `physics` - - engine-owned deterministic 2D physics types and proofs -- `builtin-games` - - reference environments only -- `cli` - - opt-in command-line binary (`gameengine`), depends on `builtin-games` -- `parallel` - - batch-simulation helpers for independent runs -- `render` - - additive `wgpu`-based render/runtime layer +Render/runtime behavior is validated by tests and benchmarks; the GPU/driver stack is intentionally outside full formal proof scope. -Recommended combinations: +Run the integrated verification matrix with: + +```bash +bash scripts/run-verification.sh +``` -- headless kernel only: +Run Verus checks directly: ```bash -cargo test +bash scripts/run-verus.sh ``` -- builtin reference environments: +Pin and auto-fetch the CI Verus binary: ```bash -cargo test --features builtin-games +AUTO_FETCH_VERUS=1 REQUIRE_VERUS=1 bash scripts/run-verus.sh ``` -- builtin games plus physics: +Render the human-readable claim matrix from the manifest: ```bash -cargo test --features "builtin-games physics" +bash scripts/render-proof-claim.sh ``` -- playable/rendered reference environments: +## Feature Graph + +- `default = []` + - minimal headless kernel +- `physics` + - engine-owned deterministic 2D physics +- `builtin` + - builtin reference environments +- `cli` + - command-line binary (`gameengine`), depends on `builtin` +- `parallel` + - batch simulation helpers for independent runs +- `render` + - additive render/runtime layer + +Recommended combinations: ```bash -cargo test --features "render builtin-games physics" +# headless kernel only +cargo test + +# builtin reference environments +cargo test --features builtin + +# builtin games plus physics +cargo test --features "builtin physics" + +# playable/rendered reference environments +cargo test --features "render builtin physics" ``` ## Builtin Reference Games - `TicTacToe` - - observation-complete turn-based game with deterministic seeded opponent behavior - `Blackjack` - - hidden-information card game with seeded shuffle/opponent policy - `Platformer` - - simple physics-backed 2D environment with rewards, jump risk, and an oracle physics view - -These are reference environments, not privileged engine special-cases. They exist both as examples -of how to implement games with the kernel and as useful ready-made environments for experiments. -Use these as references for how to implement formal verification, how to render a Game Object, etc. +These are reference environments, not privileged engine special-cases. They demonstrate deterministic game authoring, proof hooks, compact encoding, and render adapters. ## Rendering Model -The render layer is deliberately wrapper-first, not engine-first. +The render layer is wrapper-first, not kernel-first. -- `--render` means: render the intended observation/UI path. -- `--render-physics` means: render an explicit oracle/developer view of the underlying physics environment. +- `--render`: intended player observation/UI path +- `--render-physics`: oracle/developer view of the physics environment -That oracle view can reveal more than the player should see. It is useful for debugging, -demonstrations, teaching, and understanding the environment, but it should not be confused with the -fair observation channel. +The oracle path can reveal information the player should not see. It exists for debugging, teaching, and diagnostics. Because the kernel is tick-based, the same game can be: -- trained as fast as it can be computed, +- trained at compute speed, - replayed exactly, -- slowed down to human-readable speed, -- or rendered live while an AI policy controls the actions. - -`--render-physics` will work only on games which use the built-in Physics engine, and will only show that physical environment. Obviously not all games will use 2D physics at all. - -`--render` must be implemented manually atop of raw Inputs/Observations -- the library provides 2D Game Rendering abstractions for this, +- slowed for human-readable pacing, +- or rendered live with AI-driven actions. ## CLI The CLI is available when `cli` is enabled. -`cli` automatically enables `builtin-games`. ```bash cargo run --features cli -- list cargo run --features cli -- play tictactoe --policy human -cargo run --features cli -- play blackjack --policy script:hit,stand +cargo run --features cli -- replay blackjack --policy script:hit,stand cargo run --features "cli physics render" -- play platformer --render cargo run --features "cli physics render" -- play platformer --render-physics --debug-overlay ``` @@ -170,7 +210,7 @@ Useful flags: ## Rollback And Replay -`SessionKernel` and `FixedHistory` support: +`SessionKernel`, `DynamicHistory`, and `FixedHistory` support: - exact trace recording, - `rewind_to(tick)`, @@ -178,37 +218,21 @@ Useful flags: - `state_at(tick)`, - `fork_at(tick)`. -That makes the engine a clean basis for rollback netcode, deterministic multiplayer simulation, -offline search, and reproducible experiments. +This supports rollback netcode, deterministic multiplayer simulation, offline search, and reproducible experiments. ## WASM -The core library is written to remain WASM-compatible. The headless kernel and feature graph are -kept portable, and the render stack is structured so it can compile for WebAssembly. -It doesn't just compile for WebAssembly, it works! Try the demos at https://infotheory.tech +The core library is WASM-compatible. The headless kernel remains portable, and the render stack is structured to compile for WebAssembly. ## Project Direction -The kernel is intentionally shaped to be compatible with [Infotheory](https://github.com/turtle261/infotheory)'s AIXI interfaces: +The kernel is intentionally shaped to be compatible with Infotheory AIXI interfaces: -- `u64` compact actions/observations, +- compact `u64` actions/observations, - `i64` rewards, - deterministic seeded execution, -- zero hidden time, -- replayable state transitions. - -Though this may very well be useful for other AI/RL usecases for what is now obvious reasons, given you read this far. - -More creatively, this may be useful for Reservoir Computer design. - -You may even call this the "Infotheory Game Engine" - - -3D Physics engine and Rendering is a goal. It's in the works. - -Intended for games of all types, arbitrarily -- whether it be a mere coinflip, card games, board games, a 3D spaceflight simulation, or a massively multiplayer FPS. +- replayable transitions. ## License -- This is free software, given with the ISC License. This applies to the Software and all associated documentation ("this software"). -- Contributing to this specific repository means you agree to submit all contributions under the same Licensing arrangement. -- Don't forget to add your Copyright notice to the LICENSE file. + +This project uses the ISC License (see `LICENSE`). diff --git a/benches/kernel_hotpaths.rs b/benches/kernel_hotpaths.rs new file mode 100644 index 0000000..2c45c7b --- /dev/null +++ b/benches/kernel_hotpaths.rs @@ -0,0 +1,189 @@ +#![cfg(feature = "builtin")] + +use criterion::{Criterion, criterion_group, criterion_main}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +#[cfg(feature = "physics")] +use gameengine::builtin::{Platformer, PlatformerAction}; +use gameengine::{PlayerAction, Session}; + +fn bench_tictactoe_kernel_step(c: &mut Criterion) { + c.bench_function("tictactoe_session_step_kernel", |b| { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +fn bench_tictactoe_checked_step(c: &mut Criterion) { + c.bench_function("tictactoe_session_step_checked", |b| { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step_checked(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +fn bench_blackjack_kernel_step(c: &mut Criterion) { + c.bench_function("blackjack_session_step_kernel", |b| { + let mut session = Session::new(Blackjack, 11); + let script = [ + PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }, + PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(11); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +#[cfg(feature = "physics")] +fn bench_platformer_kernel_step(c: &mut Criterion) { + c.bench_function("platformer_session_step_kernel", |b| { + let mut session = Session::new(Platformer::default(), 5); + let script = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Left, + }, + ]; + let mut index = 0usize; + b.iter(|| { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(5); + index = 0; + } + let action = &script[index % script.len()]; + index += 1; + let outcome = session.step(std::slice::from_ref(action)); + criterion::black_box(outcome.reward_for(0)); + }) + }); +} + +#[cfg(feature = "physics")] +fn bench_platformer_rewind_kernel(c: &mut Criterion) { + c.bench_function("platformer_rewind_kernel", |b| { + b.iter(|| { + let mut session = Session::new(Platformer::default(), 5); + let actions = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + ]; + for action in &actions { + session.step(std::slice::from_ref(action)); + } + criterion::black_box(session.rewind_to(2)); + }) + }); +} + +#[cfg(feature = "physics")] +criterion_group!( + benches, + bench_tictactoe_kernel_step, + bench_tictactoe_checked_step, + bench_blackjack_kernel_step, + bench_platformer_kernel_step, + bench_platformer_rewind_kernel +); +#[cfg(not(feature = "physics"))] +criterion_group!( + benches, + bench_tictactoe_kernel_step, + bench_tictactoe_checked_step, + bench_blackjack_kernel_step +); +criterion_main!(benches); diff --git a/benches/step_throughput.rs b/benches/step_throughput.rs index 6e206fe..e6ea031 100644 --- a/benches/step_throughput.rs +++ b/benches/step_throughput.rs @@ -1,9 +1,9 @@ -#![cfg(feature = "builtin-games")] +#![cfg(feature = "builtin")] use criterion::{Criterion, criterion_group, criterion_main}; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; +use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{PlayerAction, Session}; fn bench_tictactoe(c: &mut Criterion) { diff --git a/examples/perf_probe.rs b/examples/perf_probe.rs new file mode 100644 index 0000000..bf9b928 --- /dev/null +++ b/examples/perf_probe.rs @@ -0,0 +1,135 @@ +use std::env; + +#[cfg(feature = "builtin")] +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +#[cfg(all(feature = "builtin", feature = "physics"))] +use gameengine::builtin::{Platformer, PlatformerAction}; +#[cfg(feature = "builtin")] +use gameengine::{PlayerAction, Session, stable_hash}; + +#[cfg(feature = "builtin")] +fn run_tictactoe(iterations: u64) -> u64 { + let mut session = Session::new(TicTacToe, 7); + let script = [ + PlayerAction { + player: 0, + action: TicTacToeAction(0), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(4), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(8), + }, + PlayerAction { + player: 0, + action: TicTacToeAction(2), + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(7); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(feature = "builtin")] +fn run_blackjack(iterations: u64) -> u64 { + let mut session = Session::new(Blackjack, 11); + let script = [ + PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }, + PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(11); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(all(feature = "builtin", feature = "physics"))] +fn run_platformer(iterations: u64) -> u64 { + let mut session = Session::new(Platformer::default(), 5); + let script = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Left, + }, + ]; + + let mut digest = 0u64; + for index in 0..iterations { + if session.is_terminal() || session.current_tick() >= 200 { + session.reset(5); + } + let action = &script[(index as usize) % script.len()]; + let outcome = session.step(std::slice::from_ref(action)); + digest = digest.wrapping_add(outcome.reward_for(0) as u64); + digest ^= session.current_tick(); + } + digest ^ stable_hash(session.trace()) +} + +#[cfg(feature = "builtin")] +fn main() { + let mut args = env::args().skip(1); + let game = args.next().unwrap_or_else(|| "platformer".to_string()); + let iterations = args + .next() + .and_then(|value| value.parse::().ok()) + .unwrap_or(2_000_000); + + let digest = match game.as_str() { + "tictactoe" => run_tictactoe(iterations), + "blackjack" => run_blackjack(iterations), + #[cfg(feature = "physics")] + "platformer" => run_platformer(iterations), + _ => { + eprintln!("unknown game '{game}', expected tictactoe|blackjack|platformer"); + std::process::exit(2); + } + }; + + println!("game={game} iterations={iterations} digest={digest:016x}"); +} + +#[cfg(not(feature = "builtin"))] +fn main() { + let _ = env::args(); + eprintln!("perf_probe requires the builtin feature"); + std::process::exit(1); +} diff --git a/examples/pong_core.rs b/examples/pong_core.rs new file mode 100644 index 0000000..4158216 --- /dev/null +++ b/examples/pong_core.rs @@ -0,0 +1,150 @@ +use gameengine::core::single_player::{self, SinglePlayerGame, SinglePlayerRewardBuf}; +use gameengine::{ + Buffer, DeterministicRng, FixedVec, PlayerId, Seed, Session, StepOutcome, Termination, +}; + +const W: i16 = 40; +const H: i16 = 20; +const P: i16 = 2; +const WIN: u8 = 5; +const ACTIONS: [Act; 3] = [Act::Stay, Act::Up, Act::Down]; + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +enum Act { + #[default] + Stay, + Up, + Down, +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +struct St { + p1: i16, + p2: i16, + bx: i16, + by: i16, + vx: i16, + vy: i16, + s1: u8, + s2: u8, + done: bool, + winner: Option, +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +struct Pong; + +impl Pong { + fn clamp(y: i16) -> i16 { + y.clamp(P, H - 1 - P) + } + fn reset_ball(st: &mut St, toward_p1: bool) { + st.bx = W / 2; + st.by = H / 2; + st.vx = if toward_p1 { -1 } else { 1 }; + st.vy = if (st.s1 + st.s2).is_multiple_of(2) { + 1 + } else { + -1 + }; + } +} + +impl SinglePlayerGame for Pong { + type Params = (); + type State = St; + type Action = Act; + type Obs = St; + type WorldView = St; + type ActionBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "pong-core" + } + fn init_with_params(&self, _seed: Seed, _params: &()) -> St { + St { + p1: H / 2, + p2: H / 2, + bx: W / 2, + by: H / 2, + vx: 1, + vy: 1, + ..St::default() + } + } + fn is_terminal(&self, st: &St) -> bool { + st.done + } + fn legal_actions(&self, _st: &St, out: &mut Self::ActionBuf) { + out.clear(); + out.extend_from_slice(&ACTIONS).unwrap(); + } + fn observe_player(&self, st: &St) -> St { + *st + } + fn world_view(&self, st: &St) -> St { + *st + } + fn step_in_place( + &self, + st: &mut St, + action: Option, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + if st.done { + out.termination = Termination::Terminal { winner: st.winner }; + single_player::push_reward(&mut out.rewards, 0); + return; + } + let dy = match action.unwrap_or(Act::Stay) { + Act::Stay => 0, + Act::Up => -1, + Act::Down => 1, + }; + st.p1 = Self::clamp(st.p1 + dy); + st.p2 = Self::clamp(st.p2 + (st.by > st.p2) as i16 - (st.by < st.p2) as i16); + st.bx += st.vx; + st.by += st.vy; + if st.by <= 0 || st.by >= H - 1 { + st.by = st.by.clamp(0, H - 1); + st.vy = -st.vy; + } + let mut reward = 0; + if st.bx <= 1 && (st.by - st.p1).abs() <= P { + st.vx = 1; + } else if st.bx >= W - 2 && (st.by - st.p2).abs() <= P { + st.vx = -1; + } else if st.bx < 0 { + st.s2 += 1; + reward = -1; + Self::reset_ball(st, false); + } else if st.bx >= W { + st.s1 += 1; + reward = 1; + Self::reset_ball(st, true); + } + if st.s1 >= WIN || st.s2 >= WIN { + st.done = true; + st.winner = Some(if st.s1 > st.s2 { 0 } else { 1 }); + out.termination = Termination::Terminal { winner: st.winner }; + } else { + out.termination = Termination::Ongoing; + } + single_player::push_reward(&mut out.rewards, reward); + } +} + +fn main() { + let mut session = Session::new(Pong, 7); + while !session.is_terminal() && session.current_tick() < 64 { + session.step(&[]); + } + println!( + "tick={} score={} - {}", + session.current_tick(), + session.state().s1, + session.state().s2 + ); +} diff --git a/proofs/README.md b/proofs/README.md index cc440d5..2c4f372 100644 --- a/proofs/README.md +++ b/proofs/README.md @@ -9,14 +9,26 @@ This crate treats Kani as part of the engine, not an afterthought. ## Local Commands ```bash -bash scripts/run-kani.sh +bash scripts/run-verification.sh ``` -The script runs the proof surface harness-by-harness across three verified layers: +Run Verus model checks directly: + +```bash +bash scripts/run-verus.sh +``` + +Pin and auto-fetch the exact Verus release used by CI: + +```bash +AUTO_FETCH_VERUS=1 REQUIRE_VERUS=1 bash scripts/run-verus.sh +``` + +The unified script runs tests, checks, clippy, bench compilation, Kani harnesses, and Verus model checks across three verified layers: - the default headless kernel, -- the `builtin-games` reference environments, -- the `builtin-games + physics` platformer/physics surface. +- the `builtin` reference environments, +- the `builtin + physics` platformer/physics surface. This keeps failures isolated and avoids monolithic proof runs that are harder to diagnose. @@ -26,16 +38,22 @@ exploring an unbounded rejection loop. ## What Is Verified -- Fixed-capacity buffer behavior in [`src/buffer.rs`](/home/theo/dev/gameengine/src/buffer.rs) -- Reward and replay encoding primitives in [`src/types.rs`](/home/theo/dev/gameengine/src/types.rs) -- Compact reward codec soundness in [`src/compact.rs`](/home/theo/dev/gameengine/src/compact.rs) -- PRNG replay/fork determinism in [`src/rng.rs`](/home/theo/dev/gameengine/src/rng.rs) -- Rollback and replay restoration in [`src/session.rs`](/home/theo/dev/gameengine/src/session.rs) -- Game-specific properties in the builtin game modules when `builtin-games` is enabled +See [`proofs/manifest.txt`](manifest.txt) for the machine-readable proof boundary and +[`proofs/claim.md`](claim.md) for the rendered human-readable matrix. + +- Fixed-capacity buffer behavior in [`src/buffer.rs`](../src/buffer.rs) +- Reward and replay encoding primitives in [`src/types.rs`](../src/types.rs) +- Compact reward codec soundness in [`src/compact.rs`](../src/compact.rs) +- PRNG replay/fork determinism in [`src/rng.rs`](../src/rng.rs) +- Rollback and replay restoration in [`src/session.rs`](../src/session.rs) +- Game-specific properties in the builtin game modules when `builtin` is enabled - Physics invariants for the engine-owned 2D world and the platformer environment when - `builtin-games` and `physics` are enabled -- The render stack is intentionally outside the proof claim; it consumes verified game state but - does not participate in the Kani surface + `builtin` and `physics` are enabled +- Verus model lemmas in [`proofs/verus/session_refinement.rs`](verus/session_refinement.rs) + and [`proofs/verus/liveness_model.rs`](verus/liveness_model.rs) + for replay fold refinement, canonical observation-schema constraints, and liveness scaffolding +- Render/input/runtime behavior is covered by tests and benchmarks; it is not currently + claimed as fully formally verified ## Verification Pattern For New Games @@ -47,12 +65,15 @@ exploring an unbounded rejection loop. - `world_view_invariant` - `transition_postcondition` 2. Add runtime tests for determinism, replay, compact codecs, and rollback if the game uses sessions. -3. Add `#[cfg(kani)]` proof harnesses in the game module. -4. Call the shared helpers in [`src/verification.rs`](/home/theo/dev/gameengine/src/verification.rs) for transition and observation contracts. -5. If the game exposes a compact codec, prove action round-trips and reward range correctness. -6. If the game uses the `physics` feature, prove the world invariant before and after every step. -7. If the game is a first-party reference environment, gate it behind `builtin-games` and add its - harnesses to [`scripts/run-kani.sh`](/home/theo/dev/gameengine/scripts/run-kani.sh). +3. Implement the proof-layer traits in [`src/proof/model.rs`](../src/proof/model.rs) when the + game opts into executable model/refinement checks. + Add an explicit `impl proof::VerifiedGame for MyGame {}` only after the stronger surface is intentional. +4. Add `#[cfg(kani)]` proof harnesses in the game module, preferably through the proof macros. +5. Call the shared helpers in [`src/verification.rs`](../src/verification.rs) for transition and observation contracts. +6. If the game exposes a compact codec, prove action round-trips and reward range correctness. +7. If the game uses the `physics` feature, prove the world invariant before and after every step. +8. If the game is a first-party reference environment, register its claims and harnesses in + [`proofs/manifest.txt`](manifest.txt) so the verification scripts and claim docs stay aligned. ## Acceptance Rule @@ -60,7 +81,7 @@ A new first-party game is only "verified" when: - the runtime test suite passes, - the Kani harnesses pass in the default feature set, -- the Kani harnesses pass in `--features builtin-games` if it is a builtin reference game, -- the Kani harnesses pass in `--features "builtin-games physics"` if the game uses the physics subsystem, +- the Kani harnesses pass in `--features builtin` if it is a builtin reference game, +- the Kani harnesses pass in `--features "builtin physics"` if the game uses the physics subsystem, - rollback/fork determinism is covered, - compact encoding is covered when applicable. diff --git a/proofs/claim.md b/proofs/claim.md new file mode 100644 index 0000000..fe573a7 --- /dev/null +++ b/proofs/claim.md @@ -0,0 +1,40 @@ +# Proof Claim Matrix + +This document is derived from `proofs/manifest.txt` and states the current proof boundary. + +## Verified Boundary + +- kernel+builtins + +## Refined Claims + +- `builtin.tictactoe`: TicTacToe now has an executable model/refinement surface tying runtime init, step, replay, and liveness scaffolding to the proof framework. (proof ids: `ttt_model_init_refines_runtime`, `ttt_model_step_refines_runtime`, `ttt_model_replay_refines_runtime`, `ranked_progress_holds_for_opening_move`, `probabilistic_support_is_finite_and_nonempty`, `session_refinement`, `liveness_model`) + +## Implementation-Checked Claims + +- `engine.buffer`: Fixed-capacity vectors preserve prefix order and bit-word toggling remains sound. (proof ids: `fixed_vec_push_preserves_prefix_order`, `bit_words_round_trip`) +- `engine.compact`: Compact reward round-trips and schema/bit-width enforcement hold for the implementation helpers. (proof ids: `compact_reward_round_trip`, `compact_observation_words_match_schema`, `compact_reward_bit_width_is_enforced`) +- `engine.rng`: Reference RNG constructor and replay properties hold for the Rust implementation on the verified cases. (proof ids: `rng_state_sanitization_is_total`, `seeded_stream_constructor_handles_reference_cases`, `next_u64_is_repeatable_for_reference_states`) +- `engine.session`: Bounded rewind restoration and replay storage helpers hold for the Rust implementation. (proof ids: `replay_trace_records_steps`, `rewind_restores_prior_state`) +- `engine.env`: The compact environment rejects invalid observation/reward encodings instead of silently accepting them. (proof ids: `env_rejects_invalid_observation_words`, `env_rejects_reward_encoding_that_exceeds_bit_width`) +- `builtin.blackjack`: Blackjack maintains the existing bounded seeded safety/protocol proof surface. (proof ids: `concrete_seed_shuffle_is_a_full_permutation`, `player_observation_hides_opponent_hand_before_terminal`, `initial_observation_contracts_hold_for_concrete_seed`, `stand_action_replays_deterministically_for_seed_17`, `hand_evaluation_matches_busted_flag`) +- `builtin.platformer`: Platformer maintains the existing bounded default-config physics and safety proof surface. (proof ids: `wall_clamps_hold_for_all_edge_positions`, `jump_reward_is_bounded`, `initial_observation_and_world_contracts_hold`, `berry_mask_tracks_trigger_activation`, `clamping_keeps_body_in_bounds`, `oracle_view_matches_world_storage`) + +## Model-Only Claims + +- `engine.replay-laws`: Replay and canonical observation schema laws are proved at the Verus model level. (proof ids: `session_refinement`) +- `engine.liveness-laws`: Ranking-based termination and finite-support stochastic scaffolding are specified at the Verus model level. (proof ids: `liveness_model`) + +## Runtime-Tested Claims + +- `render.runtime`: Render/runtime behavior remains tested and benchmarked rather than formally proved. + +## Out Of Scope + +- `gpu.os`: GPU, OS windowing, and host graphics stacks remain outside the formal proof boundary. + +## Assumptions + +- `builtin.blackjack`: Current bounded blackjack proofs are tied to concrete seeds and representative hands; they are not universal over all shuffled decks. +- `builtin.platformer`: Current bounded platformer proofs cover the default-config safety surface; full refinement proofs for parameterized physics games remain future work. +- `builtin.tictactoe`: The new liveness claims are about ranking/probabilistic scaffolding on representative traces, not an end-to-end universal fairness proof. diff --git a/proofs/future_game_template.md b/proofs/future_game_template.md index 6070e51..5dff107 100644 --- a/proofs/future_game_template.md +++ b/proofs/future_game_template.md @@ -3,15 +3,16 @@ Use this checklist when adding a new builtin or first-party game. If the game is intended to ship as a first-party reference environment, gate it behind the -`builtin-games` feature. Rendering stays outside the proof claim; only the pure game kernel, +`builtin` feature. Rendering stays outside direct GPU +proof scope; only the pure game kernel, world view, compact codec, and physics hooks belong in the verification checklist. ## Runtime Checklist -- Add a deterministic smoke test from `init(seed)` through a fixed action trace. +- Add a deterministic smoke test from `init_with_params(seed, ¶ms)` through a fixed action trace. - Add a replay equivalence test using `Session::state_at`, `rewind_to`, and `fork_at`. - Add a no-allocation hot-path test for direct `step_in_place`. -- Add compact codec round-trip tests if the game implements `CompactGame`. +- Add compact codec round-trip tests for the game action/observation codec hooks. ## `Game` Hook Checklist @@ -24,6 +25,20 @@ Implement and document: - `world_view_invariant` - `transition_postcondition` +For single-player games, prefer implementing `core::single_player::SinglePlayerGame` and let the engine provide the `Game` adapter wiring. + +## Proof-Layer Checklist + +If the game should participate in the stronger verified surface, also implement: + +- `proof::ModelGame` +- `proof::RefinementWitness` +- `proof::VerifiedGame` +- `proof::TerminationWitness` when a ranking argument exists +- `proof::ProbabilisticWitness` when the game has finite-support stochastic choices + +Register all Kani and Verus links in `proofs/manifest.txt`. + ## Kani Harness Skeleton ```rust @@ -33,30 +48,28 @@ mod proofs { use crate::buffer::FixedVec; use crate::types::PlayerAction; - #[kani::proof] - fn transition_contract_holds_for_representative_step() { - let game = MyGame::default(); - let state = game.init(1); - let mut actions = FixedVec::, 1>::default(); - actions.push(PlayerAction { player: 0, action: MyAction::Default }).unwrap(); - crate::verification::assert_transition_contracts(&game, &state, &actions, 1); - } - - #[kani::proof] - fn observation_contract_holds_for_initial_state() { - let game = MyGame::default(); - let state = game.init(1); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - fn compact_round_trip_holds() { - let game = MyGame::default(); - crate::verification::assert_compact_roundtrip(&game, &MyAction::Default); - } + crate::declare_refinement_harnesses!( + game = MyGame::default(), + params = MyGame::default().default_params(), + seed = 1, + actions = { + let mut actions = FixedVec::, 1>::default(); + actions.push(PlayerAction { player: 0, action: MyAction::Default }).unwrap(); + actions + }, + init = mygame_init_refines_runtime, + step = mygame_step_refines_runtime, + replay = mygame_replay_refines_runtime, + ); } ``` +Outside the proof module, add: + +```rust +impl crate::proof::VerifiedGame for MyGame {} +``` + If your game uses shuffle-heavy setup or rejection-sampled RNG, keep Kani harness seeds concrete unless you have a separately bounded proof wrapper for that RNG path. ## Extra Checks For Physics Games diff --git a/proofs/manifest.txt b/proofs/manifest.txt new file mode 100644 index 0000000..0ea31c4 --- /dev/null +++ b/proofs/manifest.txt @@ -0,0 +1,55 @@ +# kind|... +boundary|kernel+builtins + +kani|bit_words_round_trip|default|bit_words_round_trip +kani|fixed_vec_push_preserves_prefix_order|default|fixed_vec_push_preserves_prefix_order +kani|compact_reward_round_trip|default|compact_reward_round_trip +kani|compact_observation_words_match_schema|default|compact_observation_words_match_schema +kani|compact_reward_bit_width_is_enforced|default|compact_reward_bit_width_is_enforced +kani|step_outcome_reward_lookup_defaults_to_zero|default|step_outcome_reward_lookup_defaults_to_zero +kani|env_rejects_invalid_observation_words|default|env_rejects_invalid_observation_words +kani|env_rejects_reward_encoding_that_exceeds_bit_width|default|env_rejects_reward_encoding_that_exceeds_bit_width +kani|replay_trace_records_steps|default|replay_trace_records_steps +kani|rng_state_sanitization_is_total|default|rng_state_sanitization_is_total +kani|seeded_stream_constructor_handles_reference_cases|default|seeded_stream_constructor_handles_reference_cases +kani|next_u64_is_repeatable_for_reference_states|default|next_u64_is_repeatable_for_reference_states +kani|rewind_restores_prior_state|default|rewind_restores_prior_state +kani|concrete_seed_shuffle_is_a_full_permutation|builtin|concrete_seed_shuffle_is_a_full_permutation +kani|player_observation_hides_opponent_hand_before_terminal|builtin|player_observation_hides_opponent_hand_before_terminal +kani|initial_observation_contracts_hold_for_concrete_seed|builtin|initial_observation_contracts_hold_for_concrete_seed +kani|stand_action_replays_deterministically_for_seed_17|builtin|stand_action_replays_deterministically_for_seed_17 +kani|hand_evaluation_matches_busted_flag|builtin|hand_evaluation_matches_busted_flag +kani|legal_actions_are_exactly_empty_cells|builtin|legal_actions_are_exactly_empty_cells +kani|invalid_move_never_mutates_board|builtin|invalid_move_never_mutates_board +kani|ttt_model_init_refines_runtime|builtin|ttt_model_init_refines_runtime +kani|ttt_model_step_refines_runtime|builtin|ttt_model_step_refines_runtime +kani|ttt_model_replay_refines_runtime|builtin|ttt_model_replay_refines_runtime +kani|ranked_progress_holds_for_opening_move|builtin|ranked_progress_holds_for_opening_move +kani|probabilistic_support_is_finite_and_nonempty|builtin|probabilistic_support_is_finite_and_nonempty +kani|clamping_keeps_body_in_bounds|builtin+physics|clamping_keeps_body_in_bounds +kani|oracle_view_matches_world_storage|builtin+physics|oracle_view_matches_world_storage +kani|wall_clamps_hold_for_all_edge_positions|builtin+physics|wall_clamps_hold_for_all_edge_positions +kani|jump_reward_is_bounded|builtin+physics|jump_reward_is_bounded +kani|initial_observation_and_world_contracts_hold|builtin+physics|initial_observation_and_world_contracts_hold +kani|berry_mask_tracks_trigger_activation|builtin+physics|berry_mask_tracks_trigger_activation + +verus|core_model|proofs/verus/core_model.rs +verus|session_refinement|proofs/verus/session_refinement.rs +verus|liveness_model|proofs/verus/liveness_model.rs + +claim|checked|engine.buffer|Fixed-capacity vectors preserve prefix order and bit-word toggling remains sound.|fixed_vec_push_preserves_prefix_order,bit_words_round_trip +claim|checked|engine.compact|Compact reward round-trips and schema/bit-width enforcement hold for the implementation helpers.|compact_reward_round_trip,compact_observation_words_match_schema,compact_reward_bit_width_is_enforced +claim|checked|engine.rng|Reference RNG constructor and replay properties hold for the Rust implementation on the verified cases.|rng_state_sanitization_is_total,seeded_stream_constructor_handles_reference_cases,next_u64_is_repeatable_for_reference_states +claim|checked|engine.session|Bounded rewind restoration and replay storage helpers hold for the Rust implementation.|replay_trace_records_steps,rewind_restores_prior_state +claim|checked|engine.env|The compact environment rejects invalid observation/reward encodings instead of silently accepting them.|env_rejects_invalid_observation_words,env_rejects_reward_encoding_that_exceeds_bit_width +claim|model|engine.replay-laws|Replay and canonical observation schema laws are proved at the Verus model level.|session_refinement +claim|model|engine.liveness-laws|Ranking-based termination and finite-support stochastic scaffolding are specified at the Verus model level.|liveness_model +claim|refined|builtin.tictactoe|TicTacToe now has an executable model/refinement surface tying runtime init, step, replay, and liveness scaffolding to the proof framework.|ttt_model_init_refines_runtime,ttt_model_step_refines_runtime,ttt_model_replay_refines_runtime,ranked_progress_holds_for_opening_move,probabilistic_support_is_finite_and_nonempty,session_refinement,liveness_model +claim|checked|builtin.blackjack|Blackjack maintains the existing bounded seeded safety/protocol proof surface.|concrete_seed_shuffle_is_a_full_permutation,player_observation_hides_opponent_hand_before_terminal,initial_observation_contracts_hold_for_concrete_seed,stand_action_replays_deterministically_for_seed_17,hand_evaluation_matches_busted_flag +claim|checked|builtin.platformer|Platformer maintains the existing bounded default-config physics and safety proof surface.|wall_clamps_hold_for_all_edge_positions,jump_reward_is_bounded,initial_observation_and_world_contracts_hold,berry_mask_tracks_trigger_activation,clamping_keeps_body_in_bounds,oracle_view_matches_world_storage +claim|runtime|render.runtime|Render/runtime behavior remains tested and benchmarked rather than formally proved.| +claim|out_of_scope|gpu.os|GPU, OS windowing, and host graphics stacks remain outside the formal proof boundary.| + +assumption|builtin.blackjack|Current bounded blackjack proofs are tied to concrete seeds and representative hands; they are not universal over all shuffled decks. +assumption|builtin.platformer|Current bounded platformer proofs cover the default-config safety surface; full refinement proofs for parameterized physics games remain future work. +assumption|builtin.tictactoe|The new liveness claims are about ranking/probabilistic scaffolding on representative traces, not an end-to-end universal fairness proof. diff --git a/proofs/verus/core_model.rs b/proofs/verus/core_model.rs new file mode 100644 index 0000000..aafdc5c --- /dev/null +++ b/proofs/verus/core_model.rs @@ -0,0 +1,41 @@ +use vstd::prelude::*; + +verus! { + +pub trait DeterministicTransition { + type State; + type Action; + + spec fn step(state: Self::State, action: Self::Action) -> Self::State; +} + +pub proof fn deterministic_step_reflexive( + state: T::State, + action: T::Action, +) + ensures + T::step(state, action) == T::step(state, action), +{ +} + +pub trait ReplayModel { + type State; + type Action; + + spec fn init() -> Self::State; + spec fn apply(state: Self::State, action: Self::Action) -> Self::State; + spec fn replay(log: Seq) -> Self::State; + + proof fn replay_prefix_axiom(log: Seq, next: Self::Action) + ensures + Self::replay(log.push(next)) == Self::apply(Self::replay(log), next); +} + +pub proof fn replay_prefix_is_refinement(log: Seq, next: T::Action) + ensures + T::replay(log.push(next)) == T::apply(T::replay(log), next), +{ + T::replay_prefix_axiom(log, next); +} + +} // verus! diff --git a/proofs/verus/liveness_model.rs b/proofs/verus/liveness_model.rs new file mode 100644 index 0000000..589790d --- /dev/null +++ b/proofs/verus/liveness_model.rs @@ -0,0 +1,64 @@ +use vstd::prelude::*; + +verus! { + +pub trait RankedTransitionModel { + type State; + type Action; + + spec fn step(state: Self::State, action: Self::Action) -> Self::State; + spec fn terminal(state: Self::State) -> bool; + spec fn rank(state: Self::State) -> nat; + + proof fn terminal_rank_axiom(state: Self::State) + ensures + Self::terminal(state) <==> Self::rank(state) == 0; + + proof fn rank_decreases_axiom(state: Self::State, action: Self::Action) + requires + !Self::terminal(state) + ensures + Self::terminal(Self::step(state, action)) + || Self::rank(Self::step(state, action)) < Self::rank(state); +} + +pub proof fn ranked_progress_is_well_founded( + state: M::State, + action: M::Action, +) + requires + !M::terminal(state) + ensures + M::terminal(M::step(state, action)) || M::rank(M::step(state, action)) < M::rank(state), +{ + M::rank_decreases_axiom(state, action); +} + +pub trait FiniteSupportModel { + type State; + type Action; + + spec fn support(state: Self::State, action: Self::Action) -> Seq<(nat, Self::State)>; + + proof fn support_nonempty_axiom(state: Self::State, action: Self::Action) + ensures + Self::support(state, action).len() > 0; + + proof fn support_positive_weights_axiom(state: Self::State, action: Self::Action, index: int) + requires + 0 <= index < Self::support(state, action).len() + ensures + Self::support(state, action)[index].0 > 0; +} + +pub proof fn finite_support_has_positive_mass( + state: M::State, + action: M::Action, +) + ensures + M::support(state, action).len() > 0, +{ + M::support_nonempty_axiom(state, action); +} + +} // verus! diff --git a/proofs/verus/session_refinement.rs b/proofs/verus/session_refinement.rs new file mode 100644 index 0000000..8eacb69 --- /dev/null +++ b/proofs/verus/session_refinement.rs @@ -0,0 +1,123 @@ +use vstd::prelude::*; + +verus! { + +pub trait KernelReplayModel { + type State; + type Action; + + spec fn init(seed: nat) -> Self::State; + spec fn step(state: Self::State, action: Self::Action) -> Self::State; + spec fn replay(seed: nat, actions: Seq) -> Self::State; + spec fn replay_from(state: Self::State, actions: Seq) -> Self::State; + + proof fn replay_from_empty_axiom(state: Self::State) + ensures + Self::replay_from(state, Seq::::empty()) == state; + + proof fn replay_from_step_axiom( + state: Self::State, + prefix: Seq, + next: Self::Action, + ) + ensures + Self::replay_from(state, prefix.push(next)) + == Self::step(Self::replay_from(state, prefix), next); + + proof fn replay_is_from_init_axiom(seed: nat, actions: Seq) + ensures + Self::replay(seed, actions) == Self::replay_from(Self::init(seed), actions); +} + +pub proof fn replay_empty_refines_init(seed: nat) + ensures + M::replay(seed, Seq::::empty()) == M::init(seed), +{ + M::replay_is_from_init_axiom(seed, Seq::::empty()); + M::replay_from_empty_axiom(M::init(seed)); +} + +pub proof fn replay_refines_left_fold( + seed: nat, + prefix: Seq, + next: M::Action, +) + ensures + M::replay(seed, prefix.push(next)) == M::step(M::replay(seed, prefix), next), +{ + M::replay_is_from_init_axiom(seed, prefix.push(next)); + M::replay_from_step_axiom(M::init(seed), prefix, next); + M::replay_is_from_init_axiom(seed, prefix); +} + +pub proof fn replay_singleton_refines_one_step( + seed: nat, + action: M::Action, +) + ensures + M::replay(seed, Seq::::empty().push(action)) + == M::step(M::init(seed), action), +{ + replay_refines_left_fold::(seed, Seq::::empty(), action); + replay_empty_refines_init::(seed); +} + +pub proof fn replay_from_prefix_state_refines_left_fold( + seed: nat, + prefix: Seq, + suffix_prefix: Seq, + next: M::Action, +) + ensures + M::replay_from(M::replay(seed, prefix), suffix_prefix.push(next)) + == M::step(M::replay_from(M::replay(seed, prefix), suffix_prefix), next), +{ + M::replay_from_step_axiom(M::replay(seed, prefix), suffix_prefix, next); +} + +pub trait ObservationModel { + type State; + type Obs; + + spec fn observe(state: Self::State, who: int) -> Self::Obs; + spec fn observer_is_valid(who: int) -> bool; + spec fn obs_well_formed(obs: Self::Obs) -> bool; + spec fn obs_schema_id(obs: Self::Obs) -> nat; + spec fn canonical_schema_id() -> nat; + + proof fn observation_totality_axiom(state: Self::State, who: int) + requires + Self::observer_is_valid(who) + ensures + Self::obs_well_formed(Self::observe(state, who)), + Self::obs_schema_id(Self::observe(state, who)) == Self::canonical_schema_id(); +} + +pub proof fn canonical_observation_schema_for_any_view( + state: M::State, + who_a: int, + who_b: int, +) + requires + M::observer_is_valid(who_a), + M::observer_is_valid(who_b) + ensures + M::obs_well_formed(M::observe(state, who_a)), + M::obs_well_formed(M::observe(state, who_b)), + M::obs_schema_id(M::observe(state, who_a)) == M::obs_schema_id(M::observe(state, who_b)), +{ + M::observation_totality_axiom(state, who_a); + M::observation_totality_axiom(state, who_b); +} + +pub proof fn canonical_schema_matches_declared_id(state: M::State, who: int) + requires + M::observer_is_valid(who) + ensures + M::obs_well_formed(M::observe(state, who)), + M::obs_schema_id(M::observe(state, who)) == M::canonical_schema_id(), +{ + M::observation_totality_axiom(state, who); +} + +} // verus! diff --git a/scripts/render-proof-claim.sh b/scripts/render-proof-claim.sh new file mode 100644 index 0000000..f526e23 --- /dev/null +++ b/scripts/render-proof-claim.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" +OUTPUT_FILE="${1:-${ROOT_DIR}/proofs/claim.md}" + +heading_for_status() { + case "$1" in + refined) echo "Refined Claims" ;; + checked) echo "Implementation-Checked Claims" ;; + model) echo "Model-Only Claims" ;; + runtime) echo "Runtime-Tested Claims" ;; + out_of_scope) echo "Out Of Scope" ;; + *) return 1 ;; + esac +} + +{ + echo "# Proof Claim Matrix" + echo + echo "This document is derived from \`proofs/manifest.txt\` and states the current proof boundary." + echo + echo "## Verified Boundary" + echo + awk -F'|' '$1 == "boundary" { printf("- %s\n", $2) }' "$MANIFEST_FILE" + + for status in refined checked model runtime out_of_scope; do + section="$(heading_for_status "$status")" + entries="$( + awk -F'|' -v status="$status" ' + $1 == "claim" && $2 == status { + printf("- `%s`: %s", $3, $4) + if (NF >= 5 && length($5) > 0) { + printf(" (proof ids: ") + n = split($5, links, ",") + for (i = 1; i <= n; i++) { + gsub(/^ +| +$/, "", links[i]) + if (i > 1) { + printf(", ") + } + printf("`%s`", links[i]) + } + printf(")") + } + printf("\n") + } + ' "$MANIFEST_FILE" + )" + if [[ -n "$entries" ]]; then + echo + echo "## ${section}" + echo + printf '%s\n' "$entries" + fi + done + + assumptions="$(awk -F'|' '$1 == "assumption" { printf("- `%s`: %s\n", $2, $3) }' "$MANIFEST_FILE")" + if [[ -n "$assumptions" ]]; then + echo + echo "## Assumptions" + echo + printf '%s\n' "$assumptions" + fi +} > "$OUTPUT_FILE" diff --git a/scripts/run-kani.sh b/scripts/run-kani.sh deleted file mode 100644 index e2bb987..0000000 --- a/scripts/run-kani.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -COMMON_HARNESSES=( - bit_words_round_trip - fixed_vec_push_preserves_prefix_order - compact_reward_round_trip - step_outcome_reward_lookup_defaults_to_zero - replay_trace_records_steps - rng_state_sanitization_is_total - seeded_stream_constructor_handles_reference_cases - next_u64_is_repeatable_for_reference_states - rewind_restores_prior_state -) - -BUILTIN_GAME_HARNESSES=( - concrete_seed_shuffle_is_a_full_permutation - player_observation_hides_opponent_hand_before_terminal - initial_observation_contracts_hold_for_concrete_seed - stand_action_replays_deterministically_for_seed_17 - hand_evaluation_matches_busted_flag - legal_actions_are_exactly_empty_cells - invalid_move_never_mutates_board -) - -PHYSICS_HARNESSES=( - clamping_keeps_body_in_bounds - oracle_view_matches_world_storage - wall_clamps_hold_for_all_edge_positions - jump_reward_is_bounded - initial_observation_and_world_contracts_hold - berry_mask_tracks_trigger_activation -) - -run_harnesses() { - local label="$1" - shift - local -a extra_args=("$@") - - for harness in "${COMMON_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done -} - -run_builtin_harnesses() { - local label="$1" - shift - local -a extra_args=("$@") - - for harness in "${BUILTIN_GAME_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 ${label} harness: ${harness}" - cargo kani --lib "${extra_args[@]}" --harness "${harness}" - done -} - -echo "Running Kani 0.67.0 on the default headless kernel" -run_harnesses "default" - -echo "Running Kani 0.67.0 on builtin non-physics games" -run_builtin_harnesses "builtin-games" --features builtin-games - -echo "Running Kani 0.67.0 on builtin physics games" -for harness in "${PHYSICS_HARNESSES[@]}"; do - echo "Running Kani 0.67.0 builtin-games+physics harness: ${harness}" - cargo kani --lib --features "builtin-games physics" --harness "${harness}" -done diff --git a/scripts/run-perf.sh b/scripts/run-perf.sh new file mode 100755 index 0000000..5c5b3e4 --- /dev/null +++ b/scripts/run-perf.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +GAME="${1:-platformer}" +ITERATIONS="${2:-2000000}" +FEATURES="${FEATURES:-builtin physics}" +DATA_FILE="${PERF_DATA_FILE:-/var/tmp/gameengine-perf.data}" + +export TMPDIR="${TMPDIR:-/tmp}" + +if ! command -v perf >/dev/null 2>&1; then + echo "perf is not installed" + exit 1 +fi + +echo "[perf] Building perf probe example" +cargo build --release --example perf_probe --features "$FEATURES" + +BIN="target/release/examples/perf_probe" +if [[ ! -x "$BIN" ]]; then + echo "missing perf probe binary: $BIN" + exit 1 +fi + +echo "[perf] perf stat ($GAME, iterations=$ITERATIONS)" +perf stat -e cycles,instructions,branches,branch-misses,cache-references,cache-misses \ + "$BIN" "$GAME" "$ITERATIONS" + +echo "[perf] perf record/report ($GAME, iterations=$ITERATIONS)" +perf record -g -o "$DATA_FILE" "$BIN" "$GAME" "$ITERATIONS" +perf report --stdio -i "$DATA_FILE" --sort=dso,symbol | head -n 120 diff --git a/scripts/run-verification.sh b/scripts/run-verification.sh new file mode 100755 index 0000000..271f304 --- /dev/null +++ b/scripts/run-verification.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +export TMPDIR="${TMPDIR:-/tmp}" +MODE="${VERIFICATION_MODE:-full}" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" + +run_kani_scope() { + local scope="$1" + shift + local -a extra_args=("$@") + + while IFS='|' read -r kind id harness_scope target; do + [[ -z "${kind:-}" || "${kind:0:1}" == "#" ]] && continue + if [[ "$kind" == "kani" && "$harness_scope" == "$scope" ]]; then + echo "[kani] Running ${scope} harness: ${id}" + cargo kani --lib "${extra_args[@]}" --harness "${target}" + fi + done < "$MANIFEST_FILE" +} + +run_kani_matrix() { + if ! command -v cargo-kani >/dev/null 2>&1; then + echo "[kani] cargo-kani not found; skipping Kani matrix" + return 0 + fi + + echo "[kani] default headless kernel" + run_kani_scope "default" + + echo "[kani] builtin reference games" + run_kani_scope "builtin" --features builtin + + echo "[kani] builtin + physics games" + run_kani_scope "builtin+physics" --features "builtin physics" +} + +if [[ "$MODE" != "kani-only" ]]; then + echo "[verify] Running test and check matrix" + cargo test + cargo test --features builtin + cargo test --features "builtin physics" + cargo test --features parallel + cargo test --features "render builtin physics" + cargo check --features render + cargo check --features "render builtin" + cargo check --bin gameengine --features cli + cargo check --bin gameengine --features "cli physics render" + cargo check --target wasm32-unknown-unknown + cargo check --target wasm32-unknown-unknown --features physics + cargo check --target wasm32-unknown-unknown --features "render builtin physics" + cargo clippy --all-targets --all-features -- -D warnings + cargo bench --no-run --features "builtin physics" +fi + +run_kani_matrix + +if [[ "${RUN_VERUS:-1}" == "1" ]]; then + echo "[verus] Running Verus model checks" + bash scripts/run-verus.sh +fi + +if [[ "${RUN_PERF:-0}" == "1" ]]; then + echo "[perf] Running perf profile script" + bash scripts/run-perf.sh +fi + +echo "[verify] Completed successfully" diff --git a/scripts/run-verus.sh b/scripts/run-verus.sh new file mode 100755 index 0000000..529f34c --- /dev/null +++ b/scripts/run-verus.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" +MANIFEST_FILE="${ROOT_DIR}/proofs/manifest.txt" + +REQUIRE_VERUS="${REQUIRE_VERUS:-0}" +AUTO_FETCH_VERUS="${AUTO_FETCH_VERUS:-0}" +VERUS_RELEASE_URL="${VERUS_RELEASE_URL:-https://github.com/verus-lang/verus/releases/download/release%2F0.2026.03.28.3390e9a/verus-0.2026.03.28.3390e9a-x86-linux.zip}" + +bootstrap_verus_binary() { + local archive_path + archive_path="$(mktemp /tmp/verus-release.XXXXXX.zip)" + local extract_dir + extract_dir="$(mktemp -d /tmp/verus-release.XXXXXX)" + + echo "[verus] downloading pinned release archive" + curl -fsSL "$VERUS_RELEASE_URL" -o "$archive_path" + unzip -q "$archive_path" -d "$extract_dir" + + local extracted + extracted="$(find "$extract_dir" -mindepth 1 -maxdepth 1 -type d | head -n 1)" + if [[ -z "$extracted" || ! -x "$extracted/verus" ]]; then + echo "[verus] archive did not contain an executable verus directory" >&2 + return 1 + fi + + rm -rf ./verus_binary + mv "$extracted" ./verus_binary + chmod +x ./verus_binary/verus + echo "[verus] installed pinned release into ./verus_binary" +} + +resolve_verus_bin() { + local requested="${VERUS_BIN:-}" + local -a candidates=() + + if [[ -n "$requested" ]]; then + candidates+=("$requested") + else + candidates+=("./verus_binary/verus" "./verus_binary" "verus") + fi + + local candidate + for candidate in "${candidates[@]}"; do + if [[ -d "$candidate" && -x "$candidate/verus" ]]; then + echo "$candidate/verus" + return 0 + fi + if [[ -x "$candidate" ]]; then + echo "$candidate" + return 0 + fi + if command -v "$candidate" >/dev/null 2>&1; then + command -v "$candidate" + return 0 + fi + done + + return 1 +} + +if ! VERUS_BIN_PATH="$(resolve_verus_bin)"; then + if [[ "$AUTO_FETCH_VERUS" == "1" || "$REQUIRE_VERUS" == "1" ]]; then + bootstrap_verus_binary + VERUS_BIN_PATH="$(resolve_verus_bin)" + fi +fi + +if [[ -z "${VERUS_BIN_PATH:-}" ]]; then + if [[ "$REQUIRE_VERUS" == "1" ]]; then + echo "[verus] required but no Verus binary was found (checked VERUS_BIN, ./verus_binary/verus, ./verus_binary, PATH, optional bootstrap)" >&2 + exit 1 + fi + echo "[verus] no Verus binary found; skipping Verus model checks" + exit 0 +fi + +mapfile -t verus_models < <( + while IFS='|' read -r kind _id path; do + [[ -z "${kind:-}" || "${kind:0:1}" == "#" ]] && continue + if [[ "$kind" == "verus" ]]; then + printf '%s\n' "$path" + fi + done < "$MANIFEST_FILE" | sort +) + +if [[ ${#verus_models[@]} -eq 0 ]]; then + echo "[verus] no Verus model files found under proofs/verus" + exit 0 +fi + +echo "[verus] Using Verus binary: $VERUS_BIN_PATH" +for model in "${verus_models[@]}"; do + echo "[verus] Checking $model" + "$VERUS_BIN_PATH" "$model" --crate-type=lib +done + +echo "[verus] Completed successfully" diff --git a/src/bin/gameengine.rs b/src/bin/gameengine.rs new file mode 100644 index 0000000..cff967e --- /dev/null +++ b/src/bin/gameengine.rs @@ -0,0 +1,6 @@ +fn main() { + if let Err(error) = gameengine::cli::run_from_env() { + eprintln!("{error}"); + std::process::exit(1); + } +} diff --git a/src/buffer.rs b/src/buffer.rs index cde65f4..46d228b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,28 +1,41 @@ +//! Fixed-capacity buffer utilities used to avoid heap allocations in core loops. + use core::fmt; use core::hash::{Hash, Hasher}; -use core::mem::MaybeUninit; use core::ops::{Deref, DerefMut}; +/// Error returned when attempting to push past fixed capacity. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct CapacityError { + /// Maximum capacity of the destination buffer. pub capacity: usize, } +/// Minimal fixed-capacity buffer interface. pub trait Buffer { + /// Item stored by this buffer. type Item; + /// Maximum number of items this buffer can hold. const CAPACITY: usize; + /// Removes all items from the buffer. fn clear(&mut self); + /// Returns the current number of items. fn len(&self) -> usize; + /// Appends one item when capacity permits. fn push(&mut self, item: Self::Item) -> Result<(), CapacityError>; + /// Returns the populated immutable slice. fn as_slice(&self) -> &[Self::Item]; + /// Returns the populated mutable slice. fn as_mut_slice(&mut self) -> &mut [Self::Item]; + /// Returns whether the buffer has zero items. fn is_empty(&self) -> bool { self.len() == 0 } + /// Extends the buffer by cloning all items from `items`. fn extend_from_slice(&mut self, items: &[Self::Item]) -> Result<(), CapacityError> where Self::Item: Clone, @@ -34,6 +47,7 @@ pub trait Buffer { } } +/// Array-backed fixed-capacity vector. #[derive(Clone)] pub struct FixedVec { data: [T; N], @@ -41,65 +55,68 @@ pub struct FixedVec { } pub(crate) fn default_array() -> [T; N] { - let mut data = [const { MaybeUninit::::uninit() }; N]; - let mut index = 0usize; - while index < N { - data[index].write(T::default()); - index += 1; - } - // SAFETY: - // Every slot in `data` is initialized exactly once in the loop above, - // and `MaybeUninit` has the same layout as `T`. - unsafe { (&data as *const [MaybeUninit; N] as *const [T; N]).read() } + core::array::from_fn(|_| T::default()) } impl FixedVec { + /// Clears all elements. pub fn clear(&mut self) { self.len = 0; } + /// Returns current length. pub const fn len(&self) -> usize { self.len } + /// Returns compile-time capacity. pub const fn capacity(&self) -> usize { N } + /// Returns `true` when `len == 0`. pub const fn is_empty(&self) -> bool { self.len == 0 } + /// Returns the populated immutable slice. pub fn as_slice(&self) -> &[T] { &self.data[..self.len] } + /// Returns the populated mutable slice. pub fn as_mut_slice(&mut self) -> &mut [T] { &mut self.data[..self.len] } + /// Returns the first element when present. pub fn first(&self) -> Option<&T> { self.as_slice().first() } + /// Returns an immutable element reference by index. pub fn get(&self, index: usize) -> Option<&T> { self.as_slice().get(index) } + /// Returns a mutable element reference by index. pub fn get_mut(&mut self, index: usize) -> Option<&mut T> { self.as_mut_slice().get_mut(index) } + /// Iterates over populated elements. pub fn iter(&self) -> core::slice::Iter<'_, T> { self.as_slice().iter() } } impl FixedVec { + /// Creates an empty fixed-capacity vector. pub fn new() -> Self { Self::default() } + /// Pushes one element when capacity permits. pub fn push(&mut self, item: T) -> Result<(), CapacityError> { if self.len == N { return Err(CapacityError { capacity: N }); @@ -181,25 +198,30 @@ impl Hash for FixedVec { } impl FixedVec { + /// Returns whether `value` exists in the populated slice. pub fn contains(&self, value: &T) -> bool { self.as_slice().contains(value) } } +/// Fixed-size bitset backed by `N` machine words. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct BitWords { words: [u64; N], } impl BitWords { + /// Returns immutable access to backing words. pub const fn words(&self) -> &[u64; N] { &self.words } + /// Clears all bits. pub fn clear_all(&mut self) { self.words.fill(0); } + /// Sets `bit` when it falls within capacity. pub fn set_bit(&mut self, bit: usize) { let word = bit / 64; let offset = bit % 64; @@ -208,6 +230,7 @@ impl BitWords { } } + /// Clears `bit` when it falls within capacity. pub fn clear_bit(&mut self, bit: usize) { let word = bit / 64; let offset = bit % 64; @@ -216,6 +239,7 @@ impl BitWords { } } + /// Tests whether `bit` is set. pub fn test_bit(&self, bit: usize) -> bool { let word = bit / 64; let offset = bit % 64; diff --git a/src/builtin/blackjack/mod.rs b/src/builtin/blackjack/mod.rs new file mode 100644 index 0000000..810a643 --- /dev/null +++ b/src/builtin/blackjack/mod.rs @@ -0,0 +1,499 @@ +//! Builtin deterministic blackjack environment and compact observation codecs. + +use crate::buffer::FixedVec; +use crate::compact::{CompactSpec, decode_enum_action, encode_enum_action}; +use crate::core::cards::{ + BlackjackValue, evaluate_blackjack_hand, fill_standard_deck_52, + is_standard_deck_52_permutation, pack_cards_nibbles, +}; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; +use crate::rng::DeterministicRng; +use crate::types::{PlayerId, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; +const MAX_HAND_CARDS: usize = 12; +const DECK_SIZE: usize = 52; +const BLACKJACK_ACTION_ORDER: [BlackjackAction; 2] = [BlackjackAction::Hit, BlackjackAction::Stand]; + +/// Player action in the blackjack round. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum BlackjackAction { + /// Draw one additional card. + #[default] + Hit, + /// End the player turn and let the opponent resolve. + Stand, +} + +/// High-level stage of a blackjack round. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum BlackjackPhase { + /// Waiting for the player-controlled action. + #[default] + PlayerTurn, + /// Opponent policy is resolving draws. + OpponentTurn, + /// Round is completed. + Terminal, +} + +/// Evaluated value of a blackjack hand. +pub type HandValue = BlackjackValue; + +/// Full deterministic blackjack state including shuffled deck. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct BlackjackState { + /// Shuffled full deck represented as rank codes 1..=13. + pub deck: [u8; DECK_SIZE], + /// Index of the next card to draw from `deck`. + pub next_card: u8, + /// Player-held cards. + pub player_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `player_cards`. + pub player_len: u8, + /// Opponent-held cards. + pub opponent_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `opponent_cards`. + pub opponent_len: u8, + /// Current game phase. + pub phase: BlackjackPhase, + /// Winner id if terminal with a winner. + pub winner: Option, +} + +impl Default for BlackjackState { + fn default() -> Self { + Self { + deck: [0; DECK_SIZE], + next_card: 0, + player_cards: [0; MAX_HAND_CARDS], + player_len: 0, + opponent_cards: [0; MAX_HAND_CARDS], + opponent_len: 0, + phase: BlackjackPhase::PlayerTurn, + winner: None, + } + } +} + +/// Canonical blackjack observation shared across viewpoints. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct BlackjackObservation { + /// Current game phase. + pub phase: BlackjackPhase, + /// True if the round has completed. + pub terminal: bool, + /// Winner id if terminal with a winner. + pub winner: Option, + /// Player cards visible to the observer. + pub player_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `player_cards`. + pub player_len: u8, + /// Evaluated player hand value. + pub player_value: HandValue, + /// Opponent cards visible to the observer. + pub opponent_cards: [u8; MAX_HAND_CARDS], + /// Number of valid entries in `opponent_cards` that are visible. + pub opponent_visible_len: u8, + /// Total opponent card count, including hidden cards. + pub opponent_card_count: u8, + /// Evaluated opponent hand value when available. + pub opponent_value: HandValue, +} + +/// Full world/debug view type. +pub type BlackjackWorldView = BlackjackObservation; + +/// Builtin blackjack environment. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct Blackjack; + +impl Blackjack { + fn evaluate_hand(cards: &[u8], len: u8) -> HandValue { + let mut hand = [0u8; MAX_HAND_CARDS]; + let max_len = MAX_HAND_CARDS.min(cards.len()); + hand[..max_len].copy_from_slice(&cards[..max_len]); + evaluate_blackjack_hand(&hand, len) + } + + fn fill_deck(deck: &mut [u8; DECK_SIZE]) { + fill_standard_deck_52(deck); + } + + fn draw_card(state: &mut BlackjackState) -> u8 { + let card = state.deck[state.next_card as usize]; + state.next_card += 1; + card + } + + fn push_player_card(state: &mut BlackjackState, card: u8) { + state.player_cards[state.player_len as usize] = card; + state.player_len += 1; + } + + fn push_opponent_card(state: &mut BlackjackState, card: u8) { + state.opponent_cards[state.opponent_len as usize] = card; + state.opponent_len += 1; + } + + fn player_value(state: &BlackjackState) -> HandValue { + Self::evaluate_hand(&state.player_cards, state.player_len) + } + + fn opponent_value(state: &BlackjackState) -> HandValue { + Self::evaluate_hand(&state.opponent_cards, state.opponent_len) + } + + fn resolve_terminal(state: &mut BlackjackState) -> i64 { + let player = Self::player_value(state); + let opponent = Self::opponent_value(state); + state.phase = BlackjackPhase::Terminal; + let (reward, winner) = if player.busted { + (-1, Some(1)) + } else if opponent.busted || player.total > opponent.total { + (1, Some(0)) + } else if player.total < opponent.total { + (-1, Some(1)) + } else { + (0, None) + }; + state.winner = winner; + reward + } + + fn resolve_opponent_turn(state: &mut BlackjackState, rng: &mut DeterministicRng) -> i64 { + state.phase = BlackjackPhase::OpponentTurn; + loop { + let value = Self::opponent_value(state); + if value.busted || value.total == 21 { + break; + } + let hit = rng.gen_range(2) == 0; + if !hit { + break; + } + if state.next_card as usize >= DECK_SIZE { + break; + } + let card = Self::draw_card(state); + Self::push_opponent_card(state, card); + } + Self::resolve_terminal(state) + } + + fn pack_cards(cards: &[u8; MAX_HAND_CARDS], len: u8) -> u64 { + pack_cards_nibbles(cards, len) + } + + fn winner_code(winner: Option) -> u64 { + match winner { + None => 0, + Some(0) => 1, + Some(_) => 2, + } + } + + fn phase_code(phase: BlackjackPhase) -> u64 { + match phase { + BlackjackPhase::PlayerTurn => 0, + BlackjackPhase::OpponentTurn => 1, + BlackjackPhase::Terminal => 2, + } + } + + fn encode_observation_with_header( + observation: &BlackjackObservation, + header: u64, + opponent_len: u8, + out: &mut FixedVec, + ) { + out.clear(); + out.push(header).unwrap(); + out.push(Self::pack_cards( + &observation.player_cards, + observation.player_len, + )) + .unwrap(); + out.push(Self::pack_cards(&observation.opponent_cards, opponent_len)) + .unwrap(); + out.push(0).unwrap(); + } +} + +impl single_player::SinglePlayerGame for Blackjack { + type Params = (); + type State = BlackjackState; + type Action = BlackjackAction; + type Obs = BlackjackObservation; + type WorldView = BlackjackWorldView; + type ActionBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "blackjack" + } + + fn init_with_params(&self, seed: Seed, _params: &Self::Params) -> Self::State { + let mut rng = DeterministicRng::from_seed_and_stream(seed, 0); + let mut deck = [0u8; DECK_SIZE]; + Self::fill_deck(&mut deck); + rng.shuffle(&mut deck); + + let mut state = BlackjackState { + deck, + next_card: 0, + player_cards: [0; MAX_HAND_CARDS], + player_len: 0, + opponent_cards: [0; MAX_HAND_CARDS], + opponent_len: 0, + phase: BlackjackPhase::PlayerTurn, + winner: None, + }; + + let player_card_1 = Self::draw_card(&mut state); + Self::push_player_card(&mut state, player_card_1); + let opponent_card_1 = Self::draw_card(&mut state); + Self::push_opponent_card(&mut state, opponent_card_1); + let player_card_2 = Self::draw_card(&mut state); + Self::push_player_card(&mut state, player_card_2); + let opponent_card_2 = Self::draw_card(&mut state); + Self::push_opponent_card(&mut state, opponent_card_2); + state + } + + fn is_terminal(&self, state: &Self::State) -> bool { + matches!(state.phase, BlackjackPhase::Terminal) + } + + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { + out.clear(); + if self.is_terminal(state) { + return; + } + let value = Self::player_value(state); + if value.total >= 21 { + out.push(BlackjackAction::Stand).unwrap(); + } else { + out.push(BlackjackAction::Hit).unwrap(); + out.push(BlackjackAction::Stand).unwrap(); + } + } + + fn observe_player(&self, state: &Self::State) -> Self::Obs { + let terminal = self.is_terminal(state); + let opponent_visible_len = if terminal { state.opponent_len } else { 0 }; + let mut opponent_cards = [0u8; MAX_HAND_CARDS]; + if terminal { + opponent_cards = state.opponent_cards; + } + BlackjackObservation { + phase: state.phase, + terminal, + winner: state.winner, + player_cards: state.player_cards, + player_len: state.player_len, + player_value: Self::player_value(state), + opponent_cards, + opponent_visible_len, + opponent_card_count: state.opponent_len, + opponent_value: HandValue::default(), + } + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + BlackjackObservation { + phase: state.phase, + terminal: self.is_terminal(state), + winner: state.winner, + player_cards: state.player_cards, + player_len: state.player_len, + player_value: Self::player_value(state), + opponent_cards: state.opponent_cards, + opponent_visible_len: state.opponent_len, + opponent_card_count: state.opponent_len, + opponent_value: Self::opponent_value(state), + } + } + + fn world_view(&self, state: &Self::State) -> Self::WorldView { + self.observe_spectator(state) + } + + fn step_in_place( + &self, + state: &mut Self::State, + action: Option, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + let reward = if self.is_terminal(state) { + out.termination = Termination::Terminal { + winner: state.winner, + }; + 0 + } else if let Some(action) = action { + let player_value = Self::player_value(state); + let legal = if player_value.total >= 21 { + matches!(action, BlackjackAction::Stand) + } else { + true + }; + if !legal { + state.phase = BlackjackPhase::Terminal; + state.winner = Some(1); + out.termination = Termination::Terminal { winner: Some(1) }; + -1 + } else { + match action { + BlackjackAction::Hit => { + let card = Self::draw_card(state); + Self::push_player_card(state, card); + let updated = Self::player_value(state); + if updated.busted { + state.phase = BlackjackPhase::Terminal; + state.winner = Some(1); + out.termination = Termination::Terminal { winner: Some(1) }; + -1 + } else if updated.total == 21 { + let reward = Self::resolve_opponent_turn(state, rng); + out.termination = Termination::Terminal { + winner: state.winner, + }; + reward + } else { + 0 + } + } + BlackjackAction::Stand => { + let reward = Self::resolve_opponent_turn(state, rng); + out.termination = Termination::Terminal { + winner: state.winner, + }; + reward + } + } + } + } else { + state.phase = BlackjackPhase::Terminal; + state.winner = Some(1); + out.termination = Termination::Terminal { winner: Some(1) }; + -1 + }; + + single_player::push_reward(&mut out.rewards, reward); + if !self.is_terminal(state) { + out.termination = Termination::Ongoing; + } + } + + fn state_invariant(&self, state: &Self::State) -> bool { + if state.player_len < 2 + || state.opponent_len < 2 + || usize::from(state.player_len) > MAX_HAND_CARDS + || usize::from(state.opponent_len) > MAX_HAND_CARDS + || usize::from(state.next_card) > DECK_SIZE + || !is_standard_deck_52_permutation(&state.deck) + { + return false; + } + if self.is_terminal(state) { + let mut resolved = *state; + Self::resolve_terminal(&mut resolved); + resolved.winner == state.winner + } else { + true + } + } + + fn player_observation_invariant(&self, state: &Self::State, observation: &Self::Obs) -> bool { + if self.is_terminal(state) { + observation.opponent_visible_len == state.opponent_len + && observation.opponent_cards == state.opponent_cards + } else { + if observation.opponent_visible_len != 0 { + return false; + } + for index in 0..MAX_HAND_CARDS { + if observation.opponent_cards[index] != 0 { + return false; + } + } + true + } + } + + fn transition_postcondition( + &self, + _pre: &Self::State, + _action: Option, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + reward_and_terminal_postcondition( + outcome.reward_for(0), + -1, + 1, + post.phase == BlackjackPhase::Terminal, + outcome.is_terminal(), + ) + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 2, + observation_bits: 64, + observation_stream_len: 4, + reward_bits: 2, + min_reward: -1, + max_reward: 1, + reward_offset: 1, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + encode_enum_action(*action, &BLACKJACK_ACTION_ORDER) + } + + fn decode_action(&self, encoded: u64) -> Option { + decode_enum_action(encoded, &BLACKJACK_ACTION_ORDER) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + let header = Self::phase_code(observation.phase) + | ((observation.terminal as u64) << 4) + | ((u64::from(observation.player_len)) << 8) + | ((u64::from(observation.player_value.total)) << 12) + | ((observation.player_value.soft as u64) << 20) + | ((u64::from(observation.opponent_card_count)) << 24) + | ((u64::from(observation.opponent_visible_len)) << 28) + | (Self::winner_code(observation.winner) << 32); + Self::encode_observation_with_header( + observation, + header, + observation.opponent_visible_len, + out, + ); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + let header = Self::phase_code(observation.phase) + | ((observation.terminal as u64) << 4) + | ((u64::from(observation.player_len)) << 8) + | ((u64::from(observation.player_value.total)) << 12) + | ((observation.player_value.soft as u64) << 20) + | ((u64::from(observation.opponent_card_count)) << 24) + | ((u64::from(observation.opponent_value.total)) << 28) + | (Self::winner_code(observation.winner) << 36); + Self::encode_observation_with_header( + observation, + header, + observation.opponent_visible_len, + out, + ); + } +} + +#[cfg(test)] +mod tests; + +#[cfg(kani)] +mod proofs; diff --git a/src/builtin/blackjack/proofs.rs b/src/builtin/blackjack/proofs.rs new file mode 100644 index 0000000..bbfd396 --- /dev/null +++ b/src/builtin/blackjack/proofs.rs @@ -0,0 +1,72 @@ +use super::{Blackjack, BlackjackAction, BlackjackPhase, HandValue, MAX_HAND_CARDS}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::types::PlayerAction; + +#[kani::proof] +#[kani::unwind(64)] +fn concrete_seed_shuffle_is_a_full_permutation() { + let state = Blackjack.init(11); + let mut counts = [0u8; 14]; + for card in state.deck { + counts[card as usize] += 1; + } + let mut rank = 1usize; + while rank <= 13 { + assert_eq!(counts[rank], 4); + rank += 1; + } +} + +#[kani::proof] +#[kani::unwind(64)] +fn player_observation_hides_opponent_hand_before_terminal() { + let state = Blackjack.init(11); + let observation = Blackjack.observe_player(&state, 0); + if state.phase != BlackjackPhase::Terminal { + assert_eq!(observation.opponent_visible_len, 0); + } +} + +#[kani::proof] +#[kani::unwind(64)] +fn initial_observation_contracts_hold_for_concrete_seed() { + let game = Blackjack; + let state = game.init(11); + crate::verification::assert_observation_contracts(&game, &state); +} + +#[kani::proof] +#[kani::unwind(64)] +fn stand_action_replays_deterministically_for_seed_17() { + let state = Blackjack.init(17); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: BlackjackAction::Stand, + }) + .unwrap(); + crate::verification::assert_transition_contracts(&Blackjack, &state, &actions, 17); +} + +#[kani::proof] +#[kani::unwind(32)] +fn hand_evaluation_matches_busted_flag() { + let len: u8 = kani::any(); + kani::assume(len <= MAX_HAND_CARDS as u8); + let mut cards = [1u8; MAX_HAND_CARDS]; + for card in &mut cards { + *card = kani::any(); + kani::assume((1..=13).contains(card)); + } + let value = Blackjack::evaluate_hand(&cards, len); + assert_eq!( + value, + HandValue { + total: value.total, + soft: value.soft, + busted: value.total > 21, + } + ); +} diff --git a/src/builtin/blackjack/tests.rs b/src/builtin/blackjack/tests.rs new file mode 100644 index 0000000..fd3c716 --- /dev/null +++ b/src/builtin/blackjack/tests.rs @@ -0,0 +1,140 @@ +use super::*; +use crate::game::Game; +use crate::policy::{FirstLegalPolicy, RandomPolicy}; +use crate::session::Session; +use crate::types::PlayerAction; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +fn state_from_hands(player: &[u8], opponent: &[u8]) -> BlackjackState { + let mut state = BlackjackState { + deck: [0; DECK_SIZE], + next_card: 0, + player_cards: [0; MAX_HAND_CARDS], + player_len: 0, + opponent_cards: [0; MAX_HAND_CARDS], + opponent_len: 0, + phase: BlackjackPhase::PlayerTurn, + winner: None, + }; + Blackjack::fill_deck(&mut state.deck); + for &card in player { + Blackjack::push_player_card(&mut state, card); + } + for &card in opponent { + Blackjack::push_opponent_card(&mut state, card); + } + state +} + +#[test] +fn hand_value_handles_soft_aces() { + assert_eq!( + Blackjack::evaluate_hand(&[1, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2), + HandValue { + total: 21, + soft: true, + busted: false, + } + ); + assert_eq!( + Blackjack::evaluate_hand(&[1, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3), + HandValue { + total: 21, + soft: true, + busted: false, + } + ); + assert_eq!( + Blackjack::evaluate_hand(&[1, 1, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0], 4), + HandValue { + total: 22, + soft: false, + busted: true, + } + ); +} + +#[test] +fn shuffled_deck_is_a_full_permutation() { + let state = Blackjack.init(11); + let mut counts = [0u8; 14]; + for card in state.deck { + counts[card as usize] += 1; + } + let mut rank = 1usize; + while rank <= 13 { + assert_eq!(counts[rank], 4, "rank {rank} should appear four times"); + rank += 1; + } + assert_observation_contracts(&Blackjack, &state); +} + +#[test] +fn showdown_matrix_is_correct() { + let mut player_win = state_from_hands(&[10, 10], &[9, 9]); + assert_eq!(Blackjack::resolve_terminal(&mut player_win), 1); + assert_eq!(player_win.winner, Some(0)); + + let mut opponent_win = state_from_hands(&[10, 8], &[10, 9]); + assert_eq!(Blackjack::resolve_terminal(&mut opponent_win), -1); + assert_eq!(opponent_win.winner, Some(1)); + + let mut push = state_from_hands(&[10, 7], &[9, 8]); + assert_eq!(Blackjack::resolve_terminal(&mut push), 0); + assert_eq!(push.winner, None); +} + +#[test] +fn seeded_round_trip_is_reproducible() { + let mut left = Session::new(Blackjack, 11); + let mut right = Session::new(Blackjack, 11); + let action = [PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }]; + let left_outcome = left.step(&action).clone(); + let right_outcome = right.step(&action).clone(); + assert_eq!(left.state(), right.state()); + assert_eq!(left_outcome, right_outcome); +} + +#[test] +fn verification_helpers_hold_for_player_hit() { + let game = Blackjack; + let state = game.init(11); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: BlackjackAction::Hit, + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 11); + assert_compact_roundtrip(&game, &BlackjackAction::Hit); +} + +#[test] +fn seeded_sessions_preserve_invariants_across_policies() { + for seed in 1..=256 { + let mut first = FirstLegalPolicy; + let mut random = RandomPolicy; + + let mut first_session = Session::new(Blackjack, seed); + assert!(Blackjack.state_invariant(first_session.state())); + let mut first_policies: [&mut dyn crate::policy::Policy; 1] = [&mut first]; + while !first_session.is_terminal() && first_session.current_tick() < 16 { + first_session.step_with_policies(&mut first_policies); + } + assert!(Blackjack.state_invariant(first_session.state())); + + let mut random_session = Session::new(Blackjack, seed); + assert!(Blackjack.state_invariant(random_session.state())); + let mut random_policies: [&mut dyn crate::policy::Policy; 1] = [&mut random]; + while !random_session.is_terminal() && random_session.current_tick() < 16 { + random_session.step_with_policies(&mut random_policies); + } + assert!(Blackjack.state_invariant(random_session.state())); + } +} diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs new file mode 100644 index 0000000..fc3298c --- /dev/null +++ b/src/builtin/mod.rs @@ -0,0 +1,11 @@ +//! Builtin game implementations shipped with the engine. + +pub mod blackjack; +#[cfg(feature = "physics")] +pub mod platformer; +pub mod tictactoe; + +pub use blackjack::*; +#[cfg(feature = "physics")] +pub use platformer::*; +pub use tictactoe::*; diff --git a/src/builtin/platformer/mod.rs b/src/builtin/platformer/mod.rs new file mode 100644 index 0000000..95e627d --- /dev/null +++ b/src/builtin/platformer/mod.rs @@ -0,0 +1,587 @@ +//! Builtin deterministic platformer environment backed by fixed-capacity physics. + +use crate::buffer::{Buffer, FixedVec}; +use crate::compact::{CompactSpec, decode_enum_action, encode_enum_action}; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; +use crate::math::{Aabb2, StrictF64, Vec2}; +use crate::physics::{ + BodyKind, PhysicsBody2d, PhysicsWorld2d, collect_actor_trigger_contacts, + set_trigger_mask_deferred, +}; +use crate::rng::DeterministicRng; +use crate::types::{PlayerId, Reward, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; + +const BERRY_COUNT: usize = 6; +const PLAYER_BODY_ID: u16 = 1; +const FIRST_BERRY_BODY_ID: u16 = 10; +const PLATFORMER_BODIES: usize = 1 + BERRY_COUNT; +const PLATFORMER_CONTACTS: usize = PLATFORMER_BODIES * (PLATFORMER_BODIES - 1) / 2; +const ALL_BERRIES_MASK: u8 = 0b00_111111; +const PLATFORMER_Y_SHIFT: u8 = 8; +const PLATFORMER_REMAINING_BERRIES_SHIFT: u8 = 16; +const PLATFORMER_TERMINAL_SHIFT: u8 = 22; +const PLATFORMER_OBSERVATION_BITS: u8 = PLATFORMER_TERMINAL_SHIFT + 1; +const PLATFORMER_ACTION_ORDER: [PlatformerAction; 4] = [ + PlatformerAction::Stay, + PlatformerAction::Left, + PlatformerAction::Right, + PlatformerAction::Jump, +]; + +mod world; +use world::berry_views; +pub use world::{BerryView, PlatformerWorldView}; + +/// Player action in the platformer world. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum PlatformerAction { + /// Keep current horizontal position. + #[default] + Stay, + /// Move left by one tile if possible. + Left, + /// Move right by one tile if possible. + Right, + /// Jump upward by configured jump delta. + Jump, +} + +/// Parameter set for the deterministic platformer environment. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct PlatformerConfig { + /// Arena width in tile units. + pub width: u8, + /// Arena height in tile units. + pub height: u8, + /// Player body width in tile units. + pub player_width: u8, + /// Player body height in tile units. + pub player_height: u8, + /// Vertical displacement applied by `Jump`. + pub jump_delta: u8, + /// Shared berry y-coordinate. + pub berry_y: u8, + /// Sorted berry x-coordinates. + pub berry_xs: [u8; BERRY_COUNT], + /// Numerator for jump-sprain Bernoulli penalty. + pub sprain_numerator: u64, + /// Denominator for jump-sprain Bernoulli penalty. + pub sprain_denominator: u64, + /// Reward added when collecting one berry. + pub berry_reward: Reward, + /// Bonus reward added when all berries are collected. + pub finish_bonus: Reward, +} + +impl Default for PlatformerConfig { + fn default() -> Self { + Self { + width: 12, + height: 3, + player_width: 1, + player_height: 1, + jump_delta: 1, + berry_y: 2, + berry_xs: [1, 3, 5, 7, 9, 11], + sprain_numerator: 1, + sprain_denominator: 10, + berry_reward: 1, + finish_bonus: 10, + } + } +} + +impl PlatformerConfig { + fn checked_step_reward(self, collected: u8, finished: bool, sprained: bool) -> Option { + let mut reward = i128::from(self.berry_reward) * i128::from(collected); + if finished { + reward += i128::from(self.finish_bonus); + } + if sprained { + reward -= 1; + } + if reward < i128::from(Reward::MIN) || reward > i128::from(Reward::MAX) { + return None; + } + Some(reward as Reward) + } + + fn reward_bounds(self) -> Option<(Reward, Reward)> { + let mut min_reward = Reward::MAX; + let mut max_reward = Reward::MIN; + let mut collected = 0u8; + while collected <= BERRY_COUNT as u8 { + for finished in [false, true] { + if finished && collected == 0 { + continue; + } + for sprained in [false, true] { + let reward = self.checked_step_reward(collected, finished, sprained)?; + min_reward = min_reward.min(reward); + max_reward = max_reward.max(reward); + } + } + collected += 1; + } + Some((min_reward, max_reward)) + } + + fn compact_spec(self) -> Option { + let (min_reward, max_reward) = self.reward_bounds()?; + let reward_span = i128::from(max_reward) - i128::from(min_reward); + if reward_span < 0 || reward_span > i128::from(u64::MAX) { + return None; + } + let reward_offset = -i128::from(min_reward); + if reward_offset < i128::from(Reward::MIN) || reward_offset > i128::from(Reward::MAX) { + return None; + } + + let reward_bits = if reward_span == 0 { + 1 + } else { + (u64::BITS - (reward_span as u64).leading_zeros()) as u8 + }; + + Some(CompactSpec { + action_count: 4, + observation_bits: PLATFORMER_OBSERVATION_BITS, + observation_stream_len: 1, + reward_bits, + min_reward, + max_reward, + reward_offset: reward_offset as Reward, + }) + } + + /// Returns the axis-aligned world bounds. + pub fn arena_bounds(self) -> Aabb2 { + Aabb2::new( + Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), + Vec2::new( + StrictF64::new(self.width as f64), + StrictF64::new(self.height as f64), + ), + ) + } + + /// Returns player half-extents used by physics body creation. + pub fn player_half_extents(self) -> Vec2 { + Vec2::new( + StrictF64::new(self.player_width as f64 / 2.0), + StrictF64::new(self.player_height as f64 / 2.0), + ) + } + + /// Converts tile coordinates to player-center world coordinates. + pub fn player_center(self, x: u8, y: u8) -> Vec2 { + Vec2::new( + StrictF64::new(x as f64 + self.player_width as f64 / 2.0), + StrictF64::new(y as f64 + self.player_height as f64 / 2.0), + ) + } + + /// Returns center position for berry `index`. + pub fn berry_center(self, index: usize) -> Vec2 { + Vec2::new( + StrictF64::new(self.berry_xs[index] as f64 + 0.5), + StrictF64::new(self.berry_y as f64), + ) + } + + /// Validates internal consistency and geometric constraints. + pub fn invariant(self) -> bool { + if self.width == 0 + || self.height == 0 + || self.player_width == 0 + || self.player_height == 0 + || self.player_width > self.width + || self.player_height > self.height + || self.jump_delta >= self.height + || self.sprain_denominator == 0 + || self.sprain_numerator > self.sprain_denominator + || self.berry_y >= self.height + || self.compact_spec().is_none() + { + return false; + } + + let mut index = 1usize; + while index < self.berry_xs.len() { + if self.berry_xs[index - 1] >= self.berry_xs[index] + || self.berry_xs[index] >= self.width + { + return false; + } + index += 1; + } + + true + } +} + +/// Full platformer state. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct PlatformerState { + /// Active immutable configuration for this episode. + pub config: PlatformerConfig, + /// Physics simulation world containing player and berries. + pub world: PhysicsWorld2d, + /// Bitset of still-active berries. + pub remaining_berries: u8, +} + +/// Canonical player/spectator observation. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct PlatformerObservation { + /// Player x tile coordinate. + pub x: u8, + /// Player y tile coordinate. + pub y: u8, + /// Bitset of still-active berries. + pub remaining_berries: u8, + /// True when all berries have been collected. + pub terminal: bool, + /// Winner id in terminal states. + pub winner: Option, +} + +impl Default for PlatformerState { + fn default() -> Self { + let game = Platformer::default(); + let params = ::default_params(&game); + ::init_with_params(&game, 0, ¶ms) + } +} + +/// Builtin deterministic platformer environment. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct Platformer { + /// Environment configuration. + pub config: PlatformerConfig, +} + +impl Platformer { + /// Creates a platformer game with validated configuration. + pub fn new(config: PlatformerConfig) -> Self { + assert!(config.invariant(), "invalid platformer config"); + Self { config } + } + + fn player_body(state: &PlatformerState) -> &PhysicsBody2d { + state.world.require_body(PLAYER_BODY_ID) + } + + fn player_position(state: &PlatformerState) -> (u8, u8) { + let player = Self::player_body(state); + let min = player.aabb().min; + let x = min.x.to_f64(); + let y = min.y.to_f64(); + debug_assert!(x >= 0.0 && y >= 0.0); + (x as u8, y as u8) + } + + fn is_terminal_state(state: &PlatformerState) -> bool { + state.remaining_berries == 0 + } + + fn winner(state: &PlatformerState) -> Option { + Self::is_terminal_state(state).then_some(0) + } + + fn sync_berries(&self, state: &mut PlatformerState) { + set_trigger_mask_deferred( + &mut state.world, + FIRST_BERRY_BODY_ID, + BERRY_COUNT, + u64::from(state.remaining_berries), + ); + } + + fn collect_berries_from_contacts(state: &mut PlatformerState) -> (u8, bool) { + let was_non_terminal = state.remaining_berries != 0; + let mut remaining = u64::from(state.remaining_berries); + let collected = collect_actor_trigger_contacts( + &mut state.world, + PLAYER_BODY_ID, + FIRST_BERRY_BODY_ID, + BERRY_COUNT, + &mut remaining, + ); + state.remaining_berries = remaining as u8; + (collected, was_non_terminal && state.remaining_berries == 0) + } + + fn observation_from_state(state: &PlatformerState) -> PlatformerObservation { + let (x, y) = Self::player_position(state); + PlatformerObservation { + x, + y, + remaining_berries: state.remaining_berries, + terminal: Self::is_terminal_state(state), + winner: Self::winner(state), + } + } + + fn build_world( + config: PlatformerConfig, + ) -> PhysicsWorld2d { + let mut world = PhysicsWorld2d::new(config.arena_bounds()); + world.add_body_deferred(PhysicsBody2d { + id: PLAYER_BODY_ID, + kind: BodyKind::Kinematic, + position: config.player_center(0, 0), + half_extents: config.player_half_extents(), + active: true, + }); + for index in 0..BERRY_COUNT { + world.add_body_deferred(PhysicsBody2d { + id: FIRST_BERRY_BODY_ID + index as u16, + kind: BodyKind::Trigger, + position: config.berry_center(index), + half_extents: Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), + active: true, + }); + } + world.refresh_contacts(); + world + } +} + +impl single_player::SinglePlayerGame for Platformer { + type Params = PlatformerConfig; + type State = PlatformerState; + type Action = PlatformerAction; + type Obs = PlatformerObservation; + type WorldView = PlatformerWorldView; + type ActionBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + self.config + } + + fn name(&self) -> &'static str { + "platformer" + } + + fn params_invariant(&self, params: &Self::Params) -> bool { + params.invariant() + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + assert!(params.invariant()); + PlatformerState { + config: *params, + world: Self::build_world(*params), + remaining_berries: ALL_BERRIES_MASK, + } + } + + fn is_terminal(&self, state: &Self::State) -> bool { + Self::is_terminal_state(state) + } + + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { + out.clear(); + if self.is_terminal(state) { + return; + } + out.extend_from_slice(&PLATFORMER_ACTION_ORDER).unwrap(); + } + + fn observe_player(&self, state: &Self::State) -> Self::Obs { + Self::observation_from_state(state) + } + + fn world_view(&self, state: &Self::State) -> Self::WorldView { + PlatformerWorldView { + config: state.config, + physics: state.world.clone(), + berries: berry_views(state.config, state.remaining_berries), + } + } + + fn step_in_place( + &self, + state: &mut Self::State, + action: Option, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + let action = action.unwrap_or(PlatformerAction::Stay); + + if self.is_terminal(state) { + out.termination = Termination::Terminal { + winner: Self::winner(state), + }; + single_player::push_reward(&mut out.rewards, 0); + } else { + let config = state.config; + let (current_x, _) = Self::player_position(state); + let (x, y, sprained) = match action { + PlatformerAction::Stay => (current_x, 0, false), + PlatformerAction::Left => (current_x.saturating_sub(1), 0, false), + PlatformerAction::Right => ( + if current_x + config.player_width < config.width { + current_x + 1 + } else { + current_x + }, + 0, + false, + ), + PlatformerAction::Jump => { + let sprained = + rng.gen_bool_ratio(config.sprain_numerator, config.sprain_denominator); + (current_x, config.jump_delta, sprained) + } + }; + + state + .world + .set_body_position_deferred(PLAYER_BODY_ID, config.player_center(x, y)); + state.world.refresh_contacts(); + let (collected, finished) = Self::collect_berries_from_contacts(state); + self.sync_berries(state); + state.world.step(); + + let reward = config + .checked_step_reward(collected, finished, sprained) + .expect("validated platformer config produced an out-of-range reward"); + single_player::push_reward(&mut out.rewards, reward); + out.termination = if self.is_terminal(state) { + Termination::Terminal { + winner: Self::winner(state), + } + } else { + Termination::Ongoing + }; + } + } + + fn state_invariant(&self, state: &Self::State) -> bool { + if !state.config.invariant() + || state.remaining_berries & !ALL_BERRIES_MASK != 0 + || !state.world.invariant() + || state.world.bodies.len() != PLATFORMER_BODIES + { + return false; + } + + let player = Self::player_body(state); + if player.kind != BodyKind::Kinematic + || !player.active + || player.half_extents != state.config.player_half_extents() + { + return false; + } + + let (x, y) = Self::player_position(state); + if x + state.config.player_width > state.config.width || y > state.config.jump_delta { + return false; + } + + for index in 0..BERRY_COUNT { + let berry = state.world.require_body(FIRST_BERRY_BODY_ID + index as u16); + let expected_active = state.remaining_berries & (1u8 << index) != 0; + if berry.kind != BodyKind::Trigger + || berry.position != state.config.berry_center(index) + || berry.active != expected_active + { + return false; + } + } + + true + } + + fn player_observation_invariant(&self, state: &Self::State, observation: &Self::Obs) -> bool { + observation == &Self::observation_from_state(state) + } + + fn spectator_observation_invariant( + &self, + state: &Self::State, + observation: &Self::Obs, + ) -> bool { + observation == &Self::observation_from_state(state) + } + + fn world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { + if world.config != state.config || world.physics != state.world { + return false; + } + + let mut index = 0usize; + while index < world.berries.len() { + let berry = world.berries[index]; + if berry.id != FIRST_BERRY_BODY_ID + index as u16 + || berry.x != state.config.berry_xs[index] + || berry.y != state.config.berry_y + || berry.collected != ((state.remaining_berries & (1u8 << index)) == 0) + { + return false; + } + index += 1; + } + + true + } + + fn transition_postcondition( + &self, + pre: &Self::State, + _action: Option, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + if pre.remaining_berries == 0 { + return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); + } + let Some((min_reward, max_reward)) = post.config.reward_bounds() else { + return false; + }; + reward_and_terminal_postcondition( + outcome.reward_for(0), + min_reward, + max_reward, + post.remaining_berries == 0, + outcome.is_terminal(), + ) + } + + fn compact_spec(&self) -> CompactSpec { + self.compact_spec_for_params(&self.config) + } + + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + params + .compact_spec() + .expect("invalid platformer config cannot produce compact spec") + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + encode_enum_action(*action, &PLATFORMER_ACTION_ORDER) + } + + fn decode_action(&self, encoded: u64) -> Option { + decode_enum_action(encoded, &PLATFORMER_ACTION_ORDER) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + let packed = u64::from(observation.x) + | (u64::from(observation.y) << PLATFORMER_Y_SHIFT) + | (u64::from(observation.remaining_berries) << PLATFORMER_REMAINING_BERRIES_SHIFT) + | ((observation.terminal as u64) << PLATFORMER_TERMINAL_SHIFT); + out.push(packed).unwrap(); + } +} + +#[cfg(test)] +mod tests; + +#[cfg(kani)] +mod proofs; diff --git a/src/builtin/platformer/proofs.rs b/src/builtin/platformer/proofs.rs new file mode 100644 index 0000000..1ef9df0 --- /dev/null +++ b/src/builtin/platformer/proofs.rs @@ -0,0 +1,59 @@ +use super::{ALL_BERRIES_MASK, PLAYER_BODY_ID, Platformer, PlatformerAction, PlatformerState}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::types::PlayerAction; + +#[kani::proof] +#[kani::unwind(64)] +fn wall_clamps_hold_for_all_edge_positions() { + let game = Platformer::default(); + let mut state = PlatformerState::default(); + let x: u8 = kani::any(); + kani::assume(x < game.config.width); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(x, 0)); + let mut rng = crate::rng::DeterministicRng::from_seed(1); + let mut outcome = + crate::types::StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Left, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert!(game.observe_spectator(&state).x < game.config.width); +} + +#[kani::proof] +#[kani::unwind(64)] +fn jump_reward_is_bounded() { + let state = Platformer::default().init(1); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + crate::verification::assert_transition_contracts(&Platformer::default(), &state, &actions, 1); +} + +#[kani::proof] +#[kani::unwind(64)] +fn initial_observation_and_world_contracts_hold() { + let game = Platformer::default(); + let state = game.init(1); + crate::verification::assert_observation_contracts(&game, &state); +} + +#[kani::proof] +#[kani::unwind(64)] +fn berry_mask_tracks_trigger_activation() { + let mut state = PlatformerState::default(); + state.remaining_berries = ALL_BERRIES_MASK ^ 0b000001; + Platformer::default().sync_berries(&mut state); + assert!(!state.world.require_body(super::FIRST_BERRY_BODY_ID).active); +} diff --git a/src/builtin/platformer/tests.rs b/src/builtin/platformer/tests.rs new file mode 100644 index 0000000..2723c9e --- /dev/null +++ b/src/builtin/platformer/tests.rs @@ -0,0 +1,190 @@ +use super::*; +use crate::core::env::DefaultEnvironment; +use crate::core::observe::Observer; +use crate::game::Game; +use crate::session::Session; +use crate::types::{PlayerAction, PlayerReward}; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +#[test] +fn movement_clamps_at_walls() { + let game = Platformer::default(); + let mut state = game.init(1); + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Left, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(game.observe_spectator(&state).x, 0); + + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); + outcome.clear(); + actions.clear(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Right, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(game.observe_spectator(&state).x, 11); +} + +#[test] +fn berry_collection_is_idempotent() { + let game = Platformer::default(); + let mut state = game.init(1); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(1, 0)); + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + let remaining = state.remaining_berries; + outcome.clear(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert_eq!(state.remaining_berries, remaining); +} + +#[test] +fn final_berry_terminates_with_bonus() { + let game = Platformer::default(); + let mut state = game.init(9); + state.remaining_berries = 1u8 << 5; + game.sync_berries(&mut state); + state + .world + .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); + let mut rng = DeterministicRng::from_seed_and_stream(9, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + assert!(game.is_terminal(&state)); + assert!(outcome.reward_for(0) >= 10); +} + +#[test] +fn seeded_sessions_replay_exactly() { + let mut left = Session::new(Platformer::default(), 3); + let mut right = Session::new(Platformer::default(), 3); + let actions = [ + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }, + PlayerAction { + player: 0, + action: PlatformerAction::Right, + }, + ]; + for action in actions { + left.step(std::slice::from_ref(&action)); + right.step(std::slice::from_ref(&action)); + } + assert_eq!(left.trace(), right.trace()); + assert_eq!(left.state(), right.state()); +} + +#[test] +fn verification_helpers_hold_for_jump() { + let game = Platformer::default(); + let state = game.init(3); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 3); + assert_observation_contracts(&game, &state); + assert_compact_roundtrip(&game, &PlatformerAction::Jump); +} + +#[test] +fn physics_world_tracks_actor_and_berries() { + let state = Platformer::default().init(3); + let world = Platformer::default().world_view(&state); + assert_eq!(world.physics.bodies.len(), PLATFORMER_BODIES); + assert!(world.physics.invariant()); +} + +#[test] +fn parameterized_rewards_update_transition_and_compact_contracts() { + let mut config = PlatformerConfig { + sprain_numerator: 0, + berry_reward: 4, + finish_bonus: 30, + ..PlatformerConfig::default() + }; + config.berry_y = config.jump_delta; + let game = Platformer::default(); + let spec = game.compact_spec_for_params(&config); + + let mut state = game.init_with_params(1, &config); + state.remaining_berries = 1; + game.sync_berries(&mut state); + state + .world + .set_body_position(PLAYER_BODY_ID, config.player_center(config.berry_xs[0], 0)); + + let mut rng = DeterministicRng::from_seed_and_stream(1, 1); + let mut outcome = StepOutcome::>::default(); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: PlatformerAction::Jump, + }) + .unwrap(); + + game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); + + assert_eq!(outcome.reward_for(0), 34); + assert!(spec.max_reward >= 34); + assert!(spec.try_encode_reward(34).is_ok()); +} + +#[test] +fn parameterized_environment_uses_wide_observation_schema() { + let config = PlatformerConfig { + width: 40, + height: 10, + jump_delta: 7, + berry_y: 7, + berry_xs: [1, 6, 11, 16, 21, 26], + ..PlatformerConfig::default() + }; + let mut env = + DefaultEnvironment::::new(Platformer::default(), 3, Observer::Player(0)); + let packet = env.reset_with_params(3, config).unwrap(); + assert_eq!(packet.words().len(), 1); + assert!(packet.words()[0] > 4095); +} diff --git a/src/builtin/platformer/world.rs b/src/builtin/platformer/world.rs new file mode 100644 index 0000000..7ca96c7 --- /dev/null +++ b/src/builtin/platformer/world.rs @@ -0,0 +1,76 @@ +//! Platformer world/debug view types and physics oracle adapter. + +use crate::game::Game; +use crate::math::{Aabb2, StrictF64}; +use crate::physics::{Contact2d, PhysicsBody2d, PhysicsOracleView2d, PhysicsWorld2d}; + +use super::{ + BERRY_COUNT, FIRST_BERRY_BODY_ID, PLATFORMER_BODIES, PLATFORMER_CONTACTS, Platformer, + PlatformerConfig, +}; + +/// Render/debug view of one berry. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct BerryView { + /// Stable body id in the physics world. + pub id: u16, + /// Berry x tile coordinate. + pub x: u8, + /// Berry y tile coordinate. + pub y: u8, + /// Whether this berry has already been collected. + pub collected: bool, +} + +/// World-level debug view combining config, physics and berry metadata. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct PlatformerWorldView { + /// Environment configuration used for this world. + pub config: PlatformerConfig, + /// Physics snapshot. + pub physics: PhysicsWorld2d, + /// Berry metadata for rendering and inspection. + pub berries: [BerryView; BERRY_COUNT], +} + +pub(super) fn berry_views( + config: PlatformerConfig, + remaining_berries: u8, +) -> [BerryView; BERRY_COUNT] { + let mut berries = [BerryView::default(); BERRY_COUNT]; + let mut index = 0usize; + while index < BERRY_COUNT { + berries[index] = BerryView { + id: FIRST_BERRY_BODY_ID + index as u16, + x: config.berry_xs[index], + y: config.berry_y, + collected: (remaining_berries & (1u8 << index)) == 0, + }; + index += 1; + } + berries +} + +impl Default for PlatformerWorldView { + fn default() -> Self { + Platformer::default().world_view(&Platformer::default().init(0)) + } +} + +impl PhysicsOracleView2d for PlatformerWorldView { + fn bounds(&self) -> Aabb2 { + self.physics.bounds() + } + + fn tick(&self) -> u64 { + self.physics.tick() + } + + fn bodies(&self) -> &[PhysicsBody2d] { + self.physics.bodies() + } + + fn contacts(&self) -> &[Contact2d] { + self.physics.contacts() + } +} diff --git a/src/builtin/tictactoe/mod.rs b/src/builtin/tictactoe/mod.rs new file mode 100644 index 0000000..bd8a565 --- /dev/null +++ b/src/builtin/tictactoe/mod.rs @@ -0,0 +1,507 @@ +//! Builtin deterministic tic-tac-toe environment and compact encoding. + +use crate::buffer::FixedVec; +use crate::compact::CompactSpec; +use crate::core::single_player::{self, SinglePlayerRewardBuf}; +use crate::proof::{ + FairnessWitness, FiniteSupportOutcome, ModelGame, ProbabilisticWitness, RefinementWitness, + TerminationWitness, VerifiedGame, +}; +use crate::rng::DeterministicRng; +use crate::types::{PlayerId, Seed, StepOutcome, Termination}; +use crate::verification::reward_and_terminal_postcondition; + +const WIN_LINES: [(usize, usize, usize); 8] = [ + (0, 1, 2), + (3, 4, 5), + (6, 7, 8), + (0, 3, 6), + (1, 4, 7), + (2, 5, 8), + (0, 4, 8), + (2, 4, 6), +]; + +/// Cell state on the 3x3 board. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub enum TicTacToeCell { + /// Empty cell. + #[default] + Empty, + /// Player-controlled mark. + Player, + /// Opponent mark. + Opponent, +} + +/// Compact action selecting one board cell. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct TicTacToeAction(pub u8); + +/// Complete deterministic game state. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct TicTacToeState { + /// Board occupancy. + pub board: [TicTacToeCell; 9], + /// Terminal-state flag. + pub terminal: bool, + /// Winner id when terminal with a winner. + pub winner: Option, +} + +/// Canonical tic-tac-toe observation type. +pub type TicTacToeObservation = TicTacToeState; +/// World/debug view type. +pub type TicTacToeWorldView = TicTacToeState; + +/// Builtin deterministic tic-tac-toe environment. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct TicTacToe; + +impl TicTacToe { + fn find_winner(board: &[TicTacToeCell; 9]) -> Option { + for (a, b, c) in WIN_LINES { + let cells = (board[a], board[b], board[c]); + if cells + == ( + TicTacToeCell::Player, + TicTacToeCell::Player, + TicTacToeCell::Player, + ) + { + return Some(0); + } + if cells + == ( + TicTacToeCell::Opponent, + TicTacToeCell::Opponent, + TicTacToeCell::Opponent, + ) + { + return Some(1); + } + } + None + } + + fn is_full(board: &[TicTacToeCell; 9]) -> bool { + let mut index = 0usize; + while index < board.len() { + if board[index] == TicTacToeCell::Empty { + return false; + } + index += 1; + } + true + } + + fn decode_action_index(action: Option) -> Option { + action.map(|action: TicTacToeAction| action.0 as usize) + } + + fn action_is_legal(state: &TicTacToeState, index: usize) -> bool { + index < state.board.len() && state.board[index] == TicTacToeCell::Empty + } + + fn apply_mark( + state: &mut TicTacToeState, + index: usize, + mark: TicTacToeCell, + ) -> Option> { + state.board[index] = mark; + let winner = Self::find_winner(&state.board); + if winner.is_some() || Self::is_full(&state.board) { + state.terminal = true; + state.winner = winner; + Some(winner) + } else { + None + } + } + + fn sample_opponent_action(state: &TicTacToeState, rng: &mut DeterministicRng) -> usize { + let mut empty_positions = [0usize; 9]; + let mut empty_len = 0usize; + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + empty_positions[empty_len] = index; + empty_len += 1; + } + index += 1; + } + empty_positions[rng.gen_range(empty_len)] + } + + fn reward_from_terminal_winner(winner: Option) -> i64 { + match winner { + Some(0) => 2, + Some(_) => -2, + None => 1, + } + } + + fn model_step( + state: &mut TicTacToeState, + action: Option, + rng: &mut DeterministicRng, + ) -> i64 { + if state.terminal { + return 0; + } + match Self::decode_action_index(action) { + Some(index) if Self::action_is_legal(state, index) => { + Self::resolve_turn(state, index, rng) + } + _ => -3, + } + } + + fn resolve_turn( + state: &mut TicTacToeState, + action_index: usize, + rng: &mut DeterministicRng, + ) -> i64 { + if let Some(winner) = Self::apply_mark(state, action_index, TicTacToeCell::Player) { + return Self::reward_from_terminal_winner(winner); + } + + let opponent_index = Self::sample_opponent_action(state, rng); + if let Some(winner) = Self::apply_mark(state, opponent_index, TicTacToeCell::Opponent) { + return Self::reward_from_terminal_winner(winner); + } + + 0 + } + + fn termination_from_state(state: &TicTacToeState) -> Termination { + if state.terminal { + Termination::Terminal { + winner: state.winner, + } + } else { + Termination::Ongoing + } + } + + /// Packs board cells into a two-bit-per-cell `u64` representation. + pub fn packed_board(board: &[TicTacToeCell; 9]) -> u64 { + let mut packed = 0u64; + let mut index = 0usize; + while index < board.len() { + let value = match board[index] { + TicTacToeCell::Empty => 0, + TicTacToeCell::Player => 1, + TicTacToeCell::Opponent => 2, + }; + packed |= value << (index * 2); + index += 1; + } + packed + } + + fn empty_cell_count(state: &TicTacToeState) -> u64 { + let mut empty = 0u64; + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + empty += 1; + } + index += 1; + } + empty + } + + fn push_support_outcome( + out: &mut FixedVec, 9>, + state: TicTacToeState, + reward: i64, + weight: u64, + ) { + let mut rewards = SinglePlayerRewardBuf::default(); + single_player::push_reward(&mut rewards, reward); + out.push(FiniteSupportOutcome { + termination: Self::termination_from_state(&state), + state, + rewards, + weight, + }) + .unwrap(); + } +} + +impl single_player::SinglePlayerGame for TicTacToe { + type Params = (); + type State = TicTacToeState; + type Action = TicTacToeAction; + type Obs = TicTacToeObservation; + type WorldView = TicTacToeWorldView; + type ActionBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "tictactoe" + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + TicTacToeState::default() + } + + fn is_terminal(&self, state: &Self::State) -> bool { + state.terminal + } + + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf) { + out.clear(); + if state.terminal { + return; + } + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + out.push(TicTacToeAction(index as u8)).unwrap(); + } + index += 1; + } + } + + fn observe_player(&self, state: &Self::State) -> Self::Obs { + *state + } + + fn world_view(&self, state: &Self::State) -> Self::WorldView { + *state + } + + fn step_in_place( + &self, + state: &mut Self::State, + action: Option, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + let reward = Self::model_step(state, action, rng); + + single_player::push_reward(&mut out.rewards, reward); + out.termination = Self::termination_from_state(state); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 9, + observation_bits: 18, + observation_stream_len: 1, + reward_bits: 3, + min_reward: -3, + max_reward: 2, + reward_offset: 3, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(action.0) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded < 9).then_some(TicTacToeAction(encoded as u8)) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(Self::packed_board(&observation.board)).unwrap(); + } + + fn state_invariant(&self, state: &Self::State) -> bool { + let winner = Self::find_winner(&state.board); + let full = Self::is_full(&state.board); + state.terminal == (winner.is_some() || full) + && (state.winner == winner || (winner.is_none() && state.winner.is_none())) + } + + fn action_invariant(&self, action: &Self::Action) -> bool { + action.0 < 9 + } + + fn transition_postcondition( + &self, + pre: &Self::State, + _action: Option, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + if pre.terminal { + return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); + } + reward_and_terminal_postcondition( + outcome.reward_for(0), + -3, + 2, + post.terminal, + outcome.is_terminal(), + ) + } +} + +impl ModelGame for TicTacToe { + type ModelState = TicTacToeState; + type ModelObs = TicTacToeObservation; + type ModelWorldView = TicTacToeWorldView; + + fn model_init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::ModelState { + TicTacToeState::default() + } + + fn model_is_terminal(&self, state: &Self::ModelState) -> bool { + state.terminal + } + + fn model_players_to_act(&self, state: &Self::ModelState, out: &mut Self::PlayerBuf) { + out.clear(); + if !state.terminal { + out.push(0).unwrap(); + } + } + + fn model_legal_actions( + &self, + state: &Self::ModelState, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + if state.terminal { + return; + } + let mut index = 0usize; + while index < state.board.len() { + if state.board[index] == TicTacToeCell::Empty { + out.push(TicTacToeAction(index as u8)).unwrap(); + } + index += 1; + } + } + + fn model_observe_player(&self, state: &Self::ModelState, _player: PlayerId) -> Self::ModelObs { + *state + } + + fn model_observe_spectator(&self, state: &Self::ModelState) -> Self::ModelObs { + *state + } + + fn model_world_view(&self, state: &Self::ModelState) -> Self::ModelWorldView { + *state + } + + fn model_step_in_place( + &self, + state: &mut Self::ModelState, + actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + let action = actions + .as_slice() + .iter() + .find(|candidate| candidate.player == 0) + .map(|candidate| candidate.action); + let reward = Self::model_step(state, action, rng); + single_player::push_reward(&mut out.rewards, reward); + out.termination = Self::termination_from_state(state); + } +} + +impl RefinementWitness for TicTacToe { + fn runtime_state_to_model(&self, state: &Self::State) -> Self::ModelState { + *state + } + + fn runtime_observation_to_model(&self, observation: &Self::Obs) -> Self::ModelObs { + *observation + } + + fn runtime_world_view_to_model(&self, world: &Self::WorldView) -> Self::ModelWorldView { + *world + } +} + +impl VerifiedGame for TicTacToe {} + +impl TerminationWitness for TicTacToe { + fn model_rank(&self, state: &Self::ModelState) -> u64 { + if state.terminal { + 0 + } else { + Self::empty_cell_count(state) + } + } +} + +impl FairnessWitness for TicTacToe {} + +impl ProbabilisticWitness for TicTacToe { + type SupportBuf = FixedVec, 9>; + + fn model_step_support( + &self, + state: &Self::ModelState, + actions: &Self::JointActionBuf, + out: &mut Self::SupportBuf, + ) { + out.clear(); + let action = actions + .as_slice() + .iter() + .find(|candidate| candidate.player == 0) + .map(|candidate| candidate.action); + + if state.terminal { + Self::push_support_outcome(out, *state, 0, 1); + return; + } + + let Some(action_index) = Self::decode_action_index(action) else { + Self::push_support_outcome(out, *state, -3, 1); + return; + }; + if !Self::action_is_legal(state, action_index) { + Self::push_support_outcome(out, *state, -3, 1); + return; + } + + let mut player_state = *state; + if let Some(winner) = + Self::apply_mark(&mut player_state, action_index, TicTacToeCell::Player) + { + Self::push_support_outcome( + out, + player_state, + Self::reward_from_terminal_winner(winner), + 1, + ); + return; + } + + let mut index = 0usize; + while index < player_state.board.len() { + if player_state.board[index] == TicTacToeCell::Empty { + let mut branch = player_state; + let reward = if let Some(winner) = + Self::apply_mark(&mut branch, index, TicTacToeCell::Opponent) + { + Self::reward_from_terminal_winner(winner) + } else { + 0 + }; + Self::push_support_outcome(out, branch, reward, 1); + } + index += 1; + } + } +} + +#[cfg(test)] +mod tests; + +#[cfg(kani)] +mod proofs; diff --git a/src/builtin/tictactoe/proofs.rs b/src/builtin/tictactoe/proofs.rs new file mode 100644 index 0000000..473da47 --- /dev/null +++ b/src/builtin/tictactoe/proofs.rs @@ -0,0 +1,113 @@ +use super::{TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeState}; +use crate::buffer::FixedVec; +use crate::game::Game; +use crate::proof::{assert_finite_support_is_valid, assert_ranked_progress}; +use crate::session::{FixedHistory, SessionKernel}; +use crate::types::PlayerAction; + +fn action(cell: u8) -> FixedVec, 1> { + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(cell), + }) + .unwrap(); + actions +} + +crate::declare_refinement_harnesses!( + game = TicTacToe, + params = (), + seed = 7, + actions = action(0), + trace = [action(0), action(0)], + init = ttt_model_init_refines_runtime, + step = ttt_model_step_refines_runtime, + replay = ttt_model_replay_refines_runtime, +); + +#[kani::proof] +#[kani::unwind(16)] +fn legal_actions_are_exactly_empty_cells() { + let encoded: u32 = kani::any(); + kani::assume(encoded < 3u32.pow(9)); + let mut board = [TicTacToeCell::Empty; 9]; + let mut value = encoded; + for cell in &mut board { + *cell = match value % 3 { + 0 => TicTacToeCell::Empty, + 1 => TicTacToeCell::Player, + _ => TicTacToeCell::Opponent, + }; + value /= 3; + } + let winner = TicTacToe::find_winner(&board); + let terminal = winner.is_some() || TicTacToe::is_full(&board); + let state = TicTacToeState { + board, + terminal, + winner, + }; + let mut legal = FixedVec::::default(); + TicTacToe.legal_actions(&state, 0, &mut legal); + let mut legal_count = 0usize; + let mut legal_index = 0usize; + while legal_index < legal.len() { + let action = legal.as_slice()[legal_index]; + assert_eq!(state.board[action.0 as usize], TicTacToeCell::Empty); + legal_count += 1; + legal_index += 1; + } + + let mut empty_count = 0usize; + let mut board_index = 0usize; + while board_index < state.board.len() { + if state.board[board_index] == TicTacToeCell::Empty { + if !terminal { + assert!( + legal + .as_slice() + .contains(&TicTacToeAction(board_index as u8)) + ); + } + empty_count += 1; + } + board_index += 1; + } + assert_eq!(legal_count, if terminal { 0 } else { empty_count }); +} + +#[kani::proof] +#[kani::unwind(16)] +fn invalid_move_never_mutates_board() { + type ProofSession = SessionKernel>; + + let mut session = ProofSession::new(TicTacToe, 1); + session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(0), + }) + .unwrap(); + let before = *session.state(); + session.step_with_joint_actions(&actions); + assert_eq!(*session.state(), before); +} + +#[kani::proof] +#[kani::unwind(16)] +fn ranked_progress_holds_for_opening_move() { + assert_ranked_progress(&TicTacToe, &TicTacToeState::default(), &action(0), 7); +} + +#[kani::proof] +#[kani::unwind(16)] +fn probabilistic_support_is_finite_and_nonempty() { + assert_finite_support_is_valid(&TicTacToe, &TicTacToeState::default(), &action(0)); +} diff --git a/src/builtin/tictactoe/tests.rs b/src/builtin/tictactoe/tests.rs new file mode 100644 index 0000000..3eaf243 --- /dev/null +++ b/src/builtin/tictactoe/tests.rs @@ -0,0 +1,82 @@ +use super::*; +use crate::game::Game; +use crate::session::Session; +use crate::types::PlayerAction; +use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; + +#[test] +fn illegal_move_preserves_state_and_penalizes() { + let mut session = Session::new(TicTacToe, 7); + session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + let before = *session.state(); + let outcome = session.step(&[PlayerAction { + player: 0, + action: TicTacToeAction(0), + }]); + assert_eq!(outcome.reward_for(0), -3); + assert_eq!(session.state(), &before); +} + +#[test] +fn legal_actions_match_empty_cells_exhaustively() { + let game = TicTacToe; + for encoded in 0..3u32.pow(9) { + let mut board = [TicTacToeCell::Empty; 9]; + let mut value = encoded; + for cell in &mut board { + *cell = match value % 3 { + 0 => TicTacToeCell::Empty, + 1 => TicTacToeCell::Player, + _ => TicTacToeCell::Opponent, + }; + value /= 3; + } + let winner = TicTacToe::find_winner(&board); + let terminal = winner.is_some() || TicTacToe::is_full(&board); + let state = TicTacToeState { + board, + terminal, + winner, + }; + let mut legal = FixedVec::::default(); + game.legal_actions(&state, 0, &mut legal); + let expected: Vec<_> = if terminal { + Vec::new() + } else { + state + .board + .iter() + .enumerate() + .filter_map(|(index, cell)| { + (*cell == TicTacToeCell::Empty).then_some(TicTacToeAction(index as u8)) + }) + .collect() + }; + assert_eq!( + legal.as_slice(), + expected.as_slice(), + "encoded board state {encoded}" + ); + assert_observation_contracts(&game, &state); + } +} + +#[test] +fn verification_helpers_hold_for_opening_move() { + let game = TicTacToe; + let state = game.init(7); + let mut actions = FixedVec::, 1>::default(); + actions + .push(PlayerAction { + player: 0, + action: TicTacToeAction(0), + }) + .unwrap(); + assert_transition_contracts(&game, &state, &actions, 7); + assert_compact_roundtrip(&game, &TicTacToeAction(0)); +} diff --git a/src/main.rs b/src/cli/mod.rs similarity index 50% rename from src/main.rs rename to src/cli/mod.rs index e82814b..7533bd9 100644 --- a/src/main.rs +++ b/src/cli/mod.rs @@ -1,30 +1,70 @@ +//! Command-line entrypoints for listing, playing, replaying, and validating games. + use std::env; +use std::fmt::Debug; use std::io::{self, Write}; -#[cfg(feature = "render")] -use gameengine::InteractiveSession; -use gameengine::buffer::Buffer; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use crate::buffer::Buffer; +#[cfg(feature = "builtin")] +use crate::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; -use gameengine::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; +use crate::builtin::{Platformer, PlatformerAction}; +use crate::core::observe::{Observe, Observer}; +use crate::policy::{FirstLegalPolicy, Policy, RandomPolicy, ScriptedPolicy}; +use crate::registry::{all_games, find_game}; #[cfg(feature = "render")] -use gameengine::render::{ - PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver, -}; +use crate::render::{PassivePolicyDriver, RenderConfig, RenderMode, RendererApp, TurnBasedDriver}; #[cfg(all(feature = "render", feature = "physics"))] -use gameengine::render::{RealtimeDriver, builtin}; -use gameengine::{CompactGame, Game, Session, stable_hash}; +use crate::render::{RealtimeDriver, builtin}; +#[cfg(feature = "render")] +use crate::session::InteractiveSession; +use crate::{Game, PlayerAction, Session, stable_hash}; -fn main() { - if let Err(error) = run() { - eprintln!("{error}"); - std::process::exit(1); +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub(crate) enum RunMode { + Play, + Replay, +} + +#[derive(Debug)] +enum PolicyChoice { + Human, + Random, + First, + Scripted(Vec), +} + +fn resolve_policy_choice( + mode: RunMode, + policy: &str, + parse_script: fn(&str) -> Result, String>, + game_name: &'static str, +) -> Result, String> { + match policy { + "human" if matches!(mode, RunMode::Play) => Ok(PolicyChoice::Human), + "human" => Err(format!( + "unsupported {game_name} policy for replay mode: human" + )), + "random" => Ok(PolicyChoice::Random), + "first" => Ok(PolicyChoice::First), + script if script.starts_with("script:") => parse_script(script) + .map(PolicyChoice::Scripted) + .map_err(|error| format!("{game_name} script parse error: {error}")), + other => Err(format!("unsupported {game_name} policy: {other}")), } } -fn run() -> Result<(), String> { - let mut args = env::args().skip(1); +/// Runs the CLI using process command-line arguments. +pub fn run_from_env() -> Result<(), String> { + run_from_args(env::args().skip(1)) +} + +/// Runs the CLI using a supplied argument iterator. +pub fn run_from_args(args: I) -> Result<(), String> +where + I: IntoIterator, +{ + let mut args = args.into_iter(); let Some(command) = args.next() else { print_usage(); return Ok(()); @@ -32,35 +72,42 @@ fn run() -> Result<(), String> { match command.as_str() { "list" => { - println!("tictactoe"); - println!("blackjack"); - #[cfg(feature = "physics")] - println!("platformer"); + for descriptor in all_games() { + println!("{}", descriptor.name); + } Ok(()) } - "play" | "replay" => { + "play" => { let game = args .next() - .ok_or_else(|| "missing game name for play/replay".to_string())?; + .ok_or_else(|| "missing game name for play".to_string())?; let config = CliConfig::parse(args)?; - match game.as_str() { - "tictactoe" => run_tictactoe(config), - "blackjack" => run_blackjack(config), - #[cfg(feature = "physics")] - "platformer" => run_platformer(config), - _ => Err(format!("unknown game: {game}")), - } + run_descriptor(&game, config, RunMode::Play) + } + "replay" => { + let game = args + .next() + .ok_or_else(|| "missing game name for replay".to_string())?; + let config = CliConfig::parse(args)?; + run_descriptor(&game, config, RunMode::Replay) } "validate" => run_validation_smoke(), _ => Err(format!("unknown command: {command}")), } } +fn run_descriptor(game_name: &str, config: CliConfig, mode: RunMode) -> Result<(), String> { + let descriptor = find_game(game_name).ok_or_else(|| format!("unknown game: {game_name}"))?; + (descriptor.runner)(config, mode) +} + +/// Parsed command-line execution configuration. #[derive(Clone, Debug)] -struct CliConfig { +pub(crate) struct CliConfig { seed: u64, max_steps: usize, policy: String, + policy_explicit: bool, render: bool, render_physics: bool, ticks_per_second: f64, @@ -77,6 +124,7 @@ impl CliConfig { seed: 1, max_steps: 64, policy: "human".to_string(), + policy_explicit: false, render: false, render_physics: false, ticks_per_second: 12.0, @@ -107,6 +155,7 @@ impl CliConfig { config.policy = iter .next() .ok_or_else(|| "missing value after --policy".to_string())?; + config.policy_explicit = true; } "--render" => { config.render = true; @@ -141,120 +190,246 @@ impl CliConfig { Ok(config) } + + fn policy_for_mode(&self, mode: RunMode) -> &str { + if self.policy_explicit { + &self.policy + } else { + match mode { + RunMode::Play => "human", + RunMode::Replay => "first", + } + } + } +} + +#[cfg(feature = "render")] +fn should_stop_at_tick(tick: u64, max_steps: Option) -> bool { + max_steps.is_some_and(|limit| tick as usize >= limit) +} + +fn collect_scripted_joint_actions( + session: &Session, + script: &[G::Action], + position: &mut usize, + game_name: &'static str, +) -> Result +where + G: Game, +{ + let mut players = G::PlayerBuf::default(); + session.game().players_to_act(session.state(), &mut players); + + let mut joint_actions = G::JointActionBuf::default(); + let mut legal_actions = G::ActionBuf::default(); + for &player in players.as_slice() { + legal_actions.clear(); + session + .game() + .legal_actions(session.state(), player, &mut legal_actions); + if legal_actions.as_slice().is_empty() { + return Err(format!( + "{game_name} player {player} has no legal actions in a non-terminal state" + )); + } + let Some(action) = script.get(*position).copied() else { + return Err(format!( + "{game_name} scripted policy exhausted at index {}", + *position + )); + }; + if !legal_actions.as_slice().contains(&action) { + return Err(format!( + "{game_name} scripted policy action at index {} is illegal for current state", + *position + )); + } + joint_actions + .push(PlayerAction { player, action }) + .expect("joint action buffer capacity exceeded"); + *position += 1; + } + + Ok(joint_actions) +} + +#[cfg(feature = "render")] +fn validate_scripted_policy( + game: G, + seed: u64, + script: &[G::Action], + max_steps: Option, + game_name: &'static str, +) -> Result<(), String> +where + G: Game + Copy, +{ + let mut session = Session::new(game, seed); + let mut position = 0usize; + while !session.is_terminal() && !should_stop_at_tick(session.current_tick(), max_steps) { + let joint_actions = + collect_scripted_joint_actions(&session, script, &mut position, game_name)?; + session.step_with_joint_actions(&joint_actions); + } + Ok(()) +} + +fn run_scripted_headless_game( + game: G, + seed: u64, + script: &[G::Action], + max_steps: usize, + game_name: &'static str, +) -> Result +where + G: Game + Observe + Copy, + G::Obs: Debug, +{ + let mut session = Session::new(game, seed); + let mut position = 0usize; + while !session.is_terminal() && (session.current_tick() as usize) < max_steps { + let joint_actions = + collect_scripted_joint_actions(&session, script, &mut position, game_name)?; + let reward = { + let outcome = session.step_with_joint_actions(&joint_actions); + outcome.reward_for(0) + }; + let observation = session.game().observe(session.state(), Observer::Player(0)); + let mut compact = G::WordBuf::default(); + session + .game() + .encode_observation(&observation, &mut compact); + println!( + "tick={} reward={} terminal={} compact={:?}", + session.current_tick(), + reward, + session.is_terminal(), + compact.as_slice(), + ); + println!("{observation:#?}"); + } + Ok(stable_hash(session.trace())) } -fn run_tictactoe(config: CliConfig) -> Result<(), String> { +fn run_headless_game( + game: G, + config: &CliConfig, + mode: RunMode, + mut human: H, + parse_script: fn(&str) -> Result, String>, + game_name: &'static str, +) -> Result<(), String> +where + G: Game + Observe + Copy, + G::Obs: Debug, + H: Policy, +{ + let mut session = Session::new(game, config.seed); + let mut random = RandomPolicy; + let mut first = FirstLegalPolicy; + let trace_hash = + match resolve_policy_choice(mode, config.policy_for_mode(mode), parse_script, game_name)? { + PolicyChoice::Human => run_with_policy(&mut session, config.max_steps, &mut human), + PolicyChoice::Random => run_with_policy(&mut session, config.max_steps, &mut random), + PolicyChoice::First => run_with_policy(&mut session, config.max_steps, &mut first), + PolicyChoice::Scripted(script) => { + run_scripted_headless_game(game, config.seed, &script, config.max_steps, game_name)? + } + }; + + println!("trace hash: {trace_hash:016x}"); + Ok(()) +} + +pub(crate) fn run_tictactoe(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("tictactoe does not support --render-physics".to_string()); } #[cfg(feature = "render")] if config.render { - return run_tictactoe_render(config); + return run_tictactoe_render(config, mode); } if config.render { return Err("the crate was built without the render feature".to_string()); } - let game = TicTacToe; - let mut session = Session::new(game, config.seed); - let mut human = HumanTicTacToe; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_tictactoe_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported tictactoe policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + TicTacToe, + &config, + mode, + HumanTicTacToe, + parse_tictactoe_script, + "tictactoe", + ) } -fn run_blackjack(config: CliConfig) -> Result<(), String> { +pub(crate) fn run_blackjack(config: CliConfig, mode: RunMode) -> Result<(), String> { if config.render_physics { return Err("blackjack does not support --render-physics".to_string()); } #[cfg(feature = "render")] if config.render { - return run_blackjack_render(config); + return run_blackjack_render(config, mode); } if config.render { return Err("the crate was built without the render feature".to_string()); } - let game = Blackjack; - let mut session = Session::new(game, config.seed); - let mut human = HumanBlackjack; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_blackjack_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported blackjack policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + Blackjack, + &config, + mode, + HumanBlackjack, + parse_blackjack_script, + "blackjack", + ) } #[cfg(feature = "physics")] -fn run_platformer(config: CliConfig) -> Result<(), String> { +pub(crate) fn run_platformer(config: CliConfig, mode: RunMode) -> Result<(), String> { #[cfg(feature = "render")] if config.render || config.render_physics { - return run_platformer_render(config); + return run_platformer_render(config, mode); } if config.render || config.render_physics { return Err("the crate was built without the render feature".to_string()); } - let game = Platformer::default(); - let mut session = Session::new(game, config.seed); - let mut human = HumanPlatformer; - let mut random = RandomPolicy; - let mut first = FirstLegalPolicy; - let mut scripted = ScriptedPolicy::new(parse_platformer_script(&config.policy)); - let trace_hash = match config.policy.as_str() { - "human" => run_with_policy(&mut session, config.max_steps, &mut human), - "random" => run_with_policy(&mut session, config.max_steps, &mut random), - "first" => run_with_policy(&mut session, config.max_steps, &mut first), - policy if policy.starts_with("script:") => { - run_with_policy(&mut session, config.max_steps, &mut scripted) - } - other => return Err(format!("unsupported platformer policy: {other}")), - }; - println!("trace hash: {trace_hash:016x}"); - Ok(()) + run_headless_game( + Platformer::default(), + &config, + mode, + HumanPlatformer, + parse_platformer_script, + "platformer", + ) } fn run_with_policy(session: &mut Session, max_steps: usize, policy: &mut P) -> u64 where - G: Game + CompactGame + Copy, + G: Game + Observe + Copy, + G::Obs: Debug, P: Policy, { let mut policies: [&mut dyn Policy; 1] = [policy]; while !session.is_terminal() && (session.current_tick() as usize) < max_steps { - let outcome = session.step_with_policies(&mut policies).clone(); - let spectator = session.spectator_observation(); + let reward = { + let outcome = session.step_with_policies(&mut policies); + outcome.reward_for(0) + }; + let observation = session.game().observe(session.state(), Observer::Player(0)); let mut compact = G::WordBuf::default(); session .game() - .encode_spectator_observation(&spectator, &mut compact); + .encode_observation(&observation, &mut compact); println!( "tick={} reward={} terminal={} compact={:?}", session.current_tick(), - outcome.reward_for(0), + reward, session.is_terminal(), compact.as_slice(), ); - println!("{spectator:#?}"); + println!("{observation:#?}"); } stable_hash(session.trace()) } @@ -304,19 +479,24 @@ fn build_render_config(config: &CliConfig, mode: RenderMode) -> RenderConfig { } #[cfg(feature = "render")] -fn run_tictactoe_render(config: CliConfig) -> Result<(), String> { - use gameengine::render::builtin::TicTacToePresenter; +fn run_tictactoe_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + use crate::render::builtin::TicTacToePresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match config.policy.as_str() { - "human" => RendererApp::new( + match resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_tictactoe_script, + "tictactoe", + )? { + PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(TicTacToe, config.seed)), TicTacToePresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(TicTacToe, config.seed), @@ -326,7 +506,7 @@ fn run_tictactoe_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(TicTacToe, config.seed), @@ -336,34 +516,41 @@ fn run_tictactoe_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(TicTacToe, config.seed), - ScriptedPolicy::new(parse_tictactoe_script(&config.policy)), - ), - TicTacToePresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), - other => Err(format!("unsupported tictactoe policy: {other}")), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(TicTacToe, config.seed, &script, None, "tictactoe")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(TicTacToe, config.seed), + ScriptedPolicy::new_strict(script), + ), + TicTacToePresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } #[cfg(feature = "render")] -fn run_blackjack_render(config: CliConfig) -> Result<(), String> { - use gameengine::render::builtin::BlackjackPresenter; +fn run_blackjack_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + use crate::render::builtin::BlackjackPresenter; let render_config = build_render_config(&config, RenderMode::Observation); - match config.policy.as_str() { - "human" => RendererApp::new( + match resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_blackjack_script, + "blackjack", + )? { + PolicyChoice::Human => RendererApp::new( render_config, TurnBasedDriver::new(InteractiveSession::new(Blackjack, config.seed)), BlackjackPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(Blackjack, config.seed), @@ -373,7 +560,7 @@ fn run_blackjack_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(Blackjack, config.seed), @@ -383,33 +570,42 @@ fn run_blackjack_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(Blackjack, config.seed), - ScriptedPolicy::new(parse_blackjack_script(&config.policy)), - ), - BlackjackPresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), - other => Err(format!("unsupported blackjack policy: {other}")), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(Blackjack, config.seed, &script, None, "blackjack")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(Blackjack, config.seed), + ScriptedPolicy::new_strict(script), + ), + BlackjackPresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } #[cfg(all(feature = "render", feature = "physics"))] -fn run_platformer_render(config: CliConfig) -> Result<(), String> { - let mode = if config.render_physics { +fn run_platformer_render(config: CliConfig, mode: RunMode) -> Result<(), String> { + let render_mode = if config.render_physics { RenderMode::OracleWorld } else { RenderMode::Observation }; - let render_config = build_render_config(&config, mode); + let render_config = build_render_config(&config, render_mode); let game = Platformer::default(); + let policy_choice = resolve_policy_choice( + mode, + config.policy_for_mode(mode), + parse_platformer_script, + "platformer", + )?; + if config.render_physics { - match config.policy.as_str() { - "human" => RendererApp::new( + match policy_choice { + PolicyChoice::Human => RendererApp::new( render_config, RealtimeDriver::new( InteractiveSession::new(game, config.seed), @@ -419,14 +615,14 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new(InteractiveSession::new(game, config.seed), RandomPolicy), builtin::PlatformerPhysicsPresenter::new(game.config), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), @@ -436,21 +632,23 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(game, config.seed), - ScriptedPolicy::new(parse_platformer_script(&config.policy)), - ), - builtin::PlatformerPhysicsPresenter::new(game.config), - ) - .run_native() - .map_err(|error| error.to_string()), - other => Err(format!("unsupported platformer policy: {other}")), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(game, config.seed, &script, None, "platformer")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(game, config.seed), + ScriptedPolicy::new_strict(script), + ), + builtin::PlatformerPhysicsPresenter::new(game.config), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } else { - match config.policy.as_str() { - "human" => RendererApp::new( + match policy_choice { + PolicyChoice::Human => RendererApp::new( render_config, RealtimeDriver::new( InteractiveSession::new(game, config.seed), @@ -460,14 +658,14 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - "random" => RendererApp::new( + PolicyChoice::Random => RendererApp::new( render_config, PassivePolicyDriver::new(InteractiveSession::new(game, config.seed), RandomPolicy), builtin::PlatformerPresenter::default(), ) .run_native() .map_err(|error| error.to_string()), - "first" => RendererApp::new( + PolicyChoice::First => RendererApp::new( render_config, PassivePolicyDriver::new( InteractiveSession::new(game, config.seed), @@ -477,17 +675,19 @@ fn run_platformer_render(config: CliConfig) -> Result<(), String> { ) .run_native() .map_err(|error| error.to_string()), - policy if policy.starts_with("script:") => RendererApp::new( - render_config, - PassivePolicyDriver::new( - InteractiveSession::new(game, config.seed), - ScriptedPolicy::new(parse_platformer_script(&config.policy)), - ), - builtin::PlatformerPresenter::default(), - ) - .run_native() - .map_err(|error| error.to_string()), - other => Err(format!("unsupported platformer policy: {other}")), + PolicyChoice::Scripted(script) => { + validate_scripted_policy(game, config.seed, &script, None, "platformer")?; + RendererApp::new( + render_config, + PassivePolicyDriver::new( + InteractiveSession::new(game, config.seed), + ScriptedPolicy::new_strict(script), + ), + builtin::PlatformerPresenter::default(), + ) + .run_native() + .map_err(|error| error.to_string()) + } } } } @@ -498,8 +698,14 @@ fn print_usage() { println!( " gameengine play [--seed N] [--max-steps N] [--policy human|random|first|script:...]" ); - println!(" gameengine replay [--seed N] [--max-steps N] [--policy script:...]"); + println!( + " gameengine replay [--seed N] [--max-steps N] [--policy first|random|script:...]" + ); println!(" gameengine validate"); + println!("available games:"); + for descriptor in all_games() { + println!(" - {}", descriptor.name); + } println!("optional rendering flags:"); println!(" --render"); println!(" --render-physics"); @@ -516,11 +722,11 @@ fn prompt(message: &str) -> io::Result { Ok(input) } -fn parse_tictactoe_script(spec: &str) -> Vec { +fn parse_tictactoe_script(spec: &str) -> Result, String> { parse_script(spec, |token| token.parse::().ok().map(TicTacToeAction)) } -fn parse_blackjack_script(spec: &str) -> Vec { +fn parse_blackjack_script(spec: &str) -> Result, String> { parse_script(spec, |token| match token.to_ascii_lowercase().as_str() { "hit" | "h" => Some(BlackjackAction::Hit), "stand" | "s" => Some(BlackjackAction::Stand), @@ -529,7 +735,7 @@ fn parse_blackjack_script(spec: &str) -> Vec { } #[cfg(feature = "physics")] -fn parse_platformer_script(spec: &str) -> Vec { +fn parse_platformer_script(spec: &str) -> Result, String> { parse_script(spec, |token| match token.to_ascii_lowercase().as_str() { "stay" | "s" => Some(PlatformerAction::Stay), "left" | "l" => Some(PlatformerAction::Left), @@ -539,17 +745,25 @@ fn parse_platformer_script(spec: &str) -> Vec { }) } -fn parse_script(spec: &str, parser: F) -> Vec +fn parse_script(spec: &str, parser: F) -> Result, String> where F: Fn(&str) -> Option, { let Some(script) = spec.strip_prefix("script:") else { - return Vec::new(); + return Ok(Vec::new()); }; - script - .split(',') - .filter_map(|token| parser(token.trim())) - .collect() + + let mut actions = Vec::new(); + for (index, token) in script.split(',').enumerate() { + let trimmed = token.trim(); + if trimmed.is_empty() { + return Err(format!("empty action token at position {index}")); + } + let action = parser(trimmed) + .ok_or_else(|| format!("invalid action token at position {index}: {trimmed}"))?; + actions.push(action); + } + Ok(actions) } struct HumanTicTacToe; @@ -560,9 +774,9 @@ impl Policy for HumanTicTacToe { _game: &TicTacToe, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = prompt("choose move [0-8]: ").expect("stdin prompt failed"); @@ -585,9 +799,9 @@ impl Policy for HumanBlackjack { _game: &Blackjack, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = prompt("choose action [hit/stand]: ").expect("stdin prompt failed"); @@ -617,9 +831,9 @@ impl Policy for HumanPlatformer { _game: &Platformer, _state: &::State, _player: usize, - _observation: &::PlayerObservation, + _observation: &::Obs, legal_actions: &[::Action], - _rng: &mut gameengine::DeterministicRng, + _rng: &mut crate::DeterministicRng, ) -> ::Action { loop { let input = @@ -641,3 +855,58 @@ impl Policy for HumanPlatformer { } } } + +#[cfg(test)] +mod tests { + use super::{ + CliConfig, PolicyChoice, RunMode, parse_tictactoe_script, resolve_policy_choice, + run_scripted_headless_game, + }; + use crate::builtin::{TicTacToe, TicTacToeAction}; + + #[test] + fn replay_defaults_to_first_policy() { + let config = CliConfig::parse(Vec::::new()).unwrap(); + let choice = resolve_policy_choice( + RunMode::Replay, + config.policy_for_mode(RunMode::Replay), + parse_tictactoe_script, + "tictactoe", + ) + .unwrap(); + assert!(matches!(choice, PolicyChoice::First)); + } + + #[test] + fn replay_accepts_explicit_random_policy() { + let choice = resolve_policy_choice( + RunMode::Replay, + "random", + parse_tictactoe_script, + "tictactoe", + ) + .unwrap(); + assert!(matches!(choice, PolicyChoice::Random)); + } + + #[test] + fn scripted_headless_run_reports_exhaustion() { + let error = + run_scripted_headless_game(TicTacToe, 1, &[TicTacToeAction(0)], 64, "tictactoe") + .unwrap_err(); + assert!(error.contains("scripted policy exhausted")); + } + + #[test] + fn scripted_headless_run_reports_illegal_action() { + let error = run_scripted_headless_game( + TicTacToe, + 1, + &[TicTacToeAction(0), TicTacToeAction(0)], + 64, + "tictactoe", + ) + .unwrap_err(); + assert!(error.contains("scripted policy action at index 1 is illegal")); + } +} diff --git a/src/compact.rs b/src/compact.rs index addce2f..0b44254 100644 --- a/src/compact.rs +++ b/src/compact.rs @@ -1,19 +1,131 @@ -use crate::buffer::Buffer; -use crate::game::Game; -use crate::types::{PlayerId, Reward}; +//! Compact encoding specifications and validation helpers. +use core::fmt; + +use crate::types::Reward; + +/// Structured compact codec errors. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum CompactError { + /// Reward was outside declared compact range. + RewardOutOfRange { + /// Input reward value. + reward: Reward, + /// Minimum allowed reward. + min_reward: Reward, + /// Maximum allowed reward. + max_reward: Reward, + }, + /// Encoded reward decoded outside declared compact range. + EncodedRewardOutOfRange { + /// Encoded compact reward value. + encoded: u64, + /// Minimum allowed reward. + min_reward: Reward, + /// Maximum allowed reward. + max_reward: Reward, + }, + /// Encoded reward exceeded declared compact bit width. + RewardEncodingExceedsBitWidth { + /// Encoded compact reward value. + encoded: u64, + /// Declared compact reward bit width. + reward_bits: u8, + }, + /// Observation word stream length differs from declared schema. + ObservationLengthMismatch { + /// Actual number of observation words emitted. + actual_len: usize, + /// Declared number of observation words. + expected_len: usize, + }, + /// Observation word exceeded declared observation bit width. + ObservationWordOutOfRange { + /// Word index in observation stream. + index: usize, + /// Actual encoded word value. + word: u64, + /// Maximum representable word value for the schema. + max_word: u64, + }, + /// Encoded action had no valid decoding. + InvalidActionEncoding { + /// Encoded action value. + encoded: u64, + }, +} + +impl fmt::Display for CompactError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::RewardOutOfRange { + reward, + min_reward, + max_reward, + } => write!( + f, + "reward {reward} is outside compact range [{min_reward}, {max_reward}]" + ), + Self::EncodedRewardOutOfRange { + encoded, + min_reward, + max_reward, + } => write!( + f, + "encoded reward {encoded} decodes outside compact range [{min_reward}, {max_reward}]" + ), + Self::RewardEncodingExceedsBitWidth { + encoded, + reward_bits, + } => write!( + f, + "encoded reward {encoded} exceeds declared reward bit width {reward_bits}" + ), + Self::ObservationLengthMismatch { + actual_len, + expected_len, + } => write!( + f, + "observation stream length {actual_len} does not match declared length {expected_len}" + ), + Self::ObservationWordOutOfRange { + index, + word, + max_word, + } => write!( + f, + "observation word {index} has value {word}, exceeding schema maximum {max_word}" + ), + Self::InvalidActionEncoding { encoded } => { + write!(f, "invalid action encoding {encoded}") + } + } + } +} + +impl std::error::Error for CompactError {} + +/// Compact schema descriptor for action/observation/reward encoding. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct CompactSpec { + /// Number of legal compact action values. pub action_count: u64, + /// Bit width of one observation word. pub observation_bits: u8, + /// Number of observation words emitted per observation. pub observation_stream_len: usize, + /// Bit width of encoded reward. pub reward_bits: u8, + /// Minimum reward value. pub min_reward: Reward, + /// Maximum reward value. pub max_reward: Reward, + /// Signed offset used for reward encoding. pub reward_offset: Reward, } impl CompactSpec { + /// Maximum representable value for one observation word. pub fn max_observation_value(&self) -> u64 { if self.observation_bits == 0 { 0 @@ -24,61 +136,209 @@ impl CompactSpec { } } + /// Maximum representable compact reward value from declared bit width. + pub fn max_reward_value(&self) -> u64 { + if self.reward_bits == 0 { + 0 + } else if self.reward_bits >= 64 { + u64::MAX + } else { + (1u64 << self.reward_bits) - 1 + } + } + + /// Validates one encoded reward against declared reward bit width. + pub fn validate_encoded_reward_bits(&self, encoded: u64) -> Result<(), CompactError> { + if encoded > self.max_reward_value() { + return Err(CompactError::RewardEncodingExceedsBitWidth { + encoded, + reward_bits: self.reward_bits, + }); + } + Ok(()) + } + + /// Validates a full observation stream against declared shape and bit bounds. + pub fn validate_observation_words(&self, words: &[u64]) -> Result<(), CompactError> { + if words.len() != self.observation_stream_len { + return Err(CompactError::ObservationLengthMismatch { + actual_len: words.len(), + expected_len: self.observation_stream_len, + }); + } + + let max_word = self.max_observation_value(); + let mut index = 0usize; + while index < words.len() { + let word = words[index]; + if word > max_word { + return Err(CompactError::ObservationWordOutOfRange { + index, + word, + max_word, + }); + } + index += 1; + } + Ok(()) + } + + /// Encode reward and panic on out-of-range input. pub fn encode_reward(&self, reward: Reward) -> u64 { - debug_assert!(reward >= self.min_reward); - debug_assert!(reward <= self.max_reward); - (reward + self.reward_offset) as u64 + self.try_encode_reward(reward) + .expect("reward out of compact range") } + /// Decode reward and panic on out-of-range encoded input. pub fn decode_reward(&self, encoded: u64) -> Reward { - (encoded as Reward) - self.reward_offset + self.try_decode_reward(encoded) + .expect("encoded reward out of compact range") } - pub fn reward_range_is_sound(&self) -> bool { - self.min_reward <= self.max_reward - && self.encode_reward(self.min_reward) <= self.encode_reward(self.max_reward) + /// Checked reward encoder. + pub fn try_encode_reward(&self, reward: Reward) -> Result { + if reward < self.min_reward || reward > self.max_reward { + return Err(CompactError::RewardOutOfRange { + reward, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + let encoded = i128::from(reward) + i128::from(self.reward_offset); + if !(0..=i128::from(u64::MAX)).contains(&encoded) { + return Err(CompactError::RewardOutOfRange { + reward, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + let encoded = encoded as u64; + self.validate_encoded_reward_bits(encoded)?; + Ok(encoded) + } + + /// Checked reward decoder. + pub fn try_decode_reward(&self, encoded: u64) -> Result { + self.validate_encoded_reward_bits(encoded)?; + let decoded = i128::from(encoded) - i128::from(self.reward_offset); + if decoded < i128::from(self.min_reward) || decoded > i128::from(self.max_reward) { + return Err(CompactError::EncodedRewardOutOfRange { + encoded, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + if decoded < i128::from(Reward::MIN) || decoded > i128::from(Reward::MAX) { + return Err(CompactError::EncodedRewardOutOfRange { + encoded, + min_reward: self.min_reward, + max_reward: self.max_reward, + }); + } + Ok(decoded as Reward) } -} -pub trait CompactGame: Game { - fn compact_spec(&self) -> CompactSpec; - fn encode_action(&self, action: &Self::Action) -> u64; - fn decode_action(&self, encoded: u64) -> Option; - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ); - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - let _ = observation; - out.clear(); - } - - fn encode_player_view(&self, state: &Self::State, player: PlayerId, out: &mut Self::WordBuf) { - let observation = self.observe_player(state, player); - self.encode_player_observation(&observation, out); - } - - fn compact_invariant(&self, words: &Self::WordBuf) -> bool { - let spec = self.compact_spec(); - if words.len() != spec.observation_stream_len { + /// Validate internal reward-range consistency. + pub fn reward_range_is_sound(&self) -> bool { + if self.min_reward > self.max_reward { return false; } - let max_value = spec.max_observation_value(); - let slice = words.as_slice(); - let mut index = 0usize; - while index < slice.len() { - if slice[index] > max_value { - return false; - } - index += 1; + let Ok(min_encoded) = self.try_encode_reward(self.min_reward) else { + return false; + }; + let Ok(max_encoded) = self.try_encode_reward(self.max_reward) else { + return false; + }; + min_encoded <= max_encoded + && self.try_decode_reward(min_encoded).ok() == Some(self.min_reward) + && self.try_decode_reward(max_encoded).ok() == Some(self.max_reward) + } +} + +/// Encode one finite enum action using an explicit canonical action table. +pub fn encode_enum_action(action: T, action_table: &[T]) -> u64 +where + T: Copy + Eq, +{ + let mut index = 0usize; + while index < action_table.len() { + if action_table[index] == action { + return index as u64; } - true + index += 1; + } + panic!("action missing from compact action table"); +} + +/// Decode one finite enum action using an explicit canonical action table. +pub fn decode_enum_action(encoded: u64, action_table: &[T]) -> Option +where + T: Copy, +{ + action_table.get(encoded as usize).copied() +} + +#[cfg(test)] +mod tests { + use super::CompactSpec; + + #[test] + fn try_decode_reward_rejects_large_values_without_overflow() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 2, + min_reward: -1, + max_reward: 1, + reward_offset: 1, + }; + assert!(spec.try_decode_reward(u64::MAX).is_err()); + } + + #[test] + fn try_encode_reward_handles_negative_ranges() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 3, + min_reward: -3, + max_reward: 2, + reward_offset: 3, + }; + assert_eq!(spec.try_encode_reward(-3).unwrap(), 0); + assert_eq!(spec.try_encode_reward(2).unwrap(), 5); + } + + #[test] + fn observation_stream_validation_catches_shape_errors() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 3, + observation_stream_len: 2, + reward_bits: 2, + min_reward: 0, + max_reward: 1, + reward_offset: 0, + }; + assert!(spec.validate_observation_words(&[1, 7]).is_ok()); + assert!(spec.validate_observation_words(&[1]).is_err()); + assert!(spec.validate_observation_words(&[1, 8]).is_err()); + } + + #[test] + fn reward_bit_width_is_enforced() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + }; + assert!(spec.try_decode_reward(4).is_err()); } } @@ -103,4 +363,37 @@ mod proofs { assert_eq!(spec.decode_reward(encoded), reward); assert!(spec.reward_range_is_sound()); } + + #[kani::proof] + fn compact_observation_words_match_schema() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 1, + reward_offset: 0, + }; + let word: u64 = kani::any(); + if word <= spec.max_observation_value() { + assert!(spec.validate_observation_words(&[word]).is_ok()); + } else { + assert!(spec.validate_observation_words(&[word]).is_err()); + } + } + + #[kani::proof] + fn compact_reward_bit_width_is_enforced() { + let spec = CompactSpec { + action_count: 2, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 2, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + }; + assert!(spec.try_decode_reward(4).is_err()); + } } diff --git a/src/core/cards.rs b/src/core/cards.rs new file mode 100644 index 0000000..bdb7696 --- /dev/null +++ b/src/core/cards.rs @@ -0,0 +1,87 @@ +//! Shared card/deck helpers for card-based builtin environments. + +/// A compact summary of blackjack hand value semantics. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct BlackjackValue { + /// Total score after soft-ace normalization. + pub total: u8, + /// Whether the hand still contains at least one soft ace. + pub soft: bool, + /// Whether the hand score exceeds 21. + pub busted: bool, +} + +/// Evaluate a blackjack hand from rank values in `[1, 13]`. +pub fn evaluate_blackjack_hand( + cards: &[u8; MAX_CARDS], + len: u8, +) -> BlackjackValue { + let mut total = 0u8; + let mut aces = 0u8; + let limit = len as usize; + let mut index = 0usize; + while index < MAX_CARDS && index < limit { + let card = cards[index]; + match card { + 1 => { + total = total.saturating_add(11); + aces += 1; + } + 11..=13 => total = total.saturating_add(10), + value => total = total.saturating_add(value), + } + index += 1; + } + for _ in 0..MAX_CARDS { + if total <= 21 || aces == 0 { + break; + } + total -= 10; + aces -= 1; + } + BlackjackValue { + total, + soft: aces > 0, + busted: total > 21, + } +} + +/// Fill a 52-card deck using ranks `[1, 13]` with four suits per rank. +pub fn fill_standard_deck_52(deck: &mut [u8; 52]) { + let mut index = 0usize; + for _ in 0..4 { + for rank in 1..=13 { + deck[index] = rank; + index += 1; + } + } +} + +/// Returns true when `deck` is a full 52-card rank multiset with four of each rank 1..=13. +pub fn is_standard_deck_52_permutation(deck: &[u8; 52]) -> bool { + let mut counts = [0u8; 14]; + for card in deck { + if !(1..=13).contains(card) { + return false; + } + counts[*card as usize] += 1; + } + for count in counts.iter().skip(1) { + if *count != 4 { + return false; + } + } + true +} + +/// Pack cards as 4-bit nibbles into a single `u64`. +pub fn pack_cards_nibbles(cards: &[u8; MAX_CARDS], len: u8) -> u64 { + let mut packed = 0u64; + let limit = len as usize; + let mut index = 0usize; + while index < MAX_CARDS && index < limit { + packed |= u64::from(cards[index]) << (index * 4); + index += 1; + } + packed +} diff --git a/src/core/env.rs b/src/core/env.rs new file mode 100644 index 0000000..efbf852 --- /dev/null +++ b/src/core/env.rs @@ -0,0 +1,1338 @@ +//! Compact environment wrapper for infotheory-compatible stepping. + +use core::fmt; + +use crate::buffer::{Buffer, FixedVec}; +use crate::compact::CompactError; +use crate::core::observe::{Observe, Observer}; +use crate::session::{HistoryStore, SessionKernel}; +use crate::types::{PlayerAction, PlayerId, Reward, Seed}; + +/// Compact observation packet represented as fixed-capacity machine words. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] +pub struct BitPacket { + words: FixedVec, +} + +impl BitPacket { + /// Returns the currently populated word slice. + pub fn words(&self) -> &[u64] { + self.words.as_slice() + } + + /// Clears all packet words. + pub fn clear(&mut self) { + self.words.clear(); + } + + fn push_word(&mut self, word: u64) { + self.words.push(word).expect("bit packet capacity exceeded"); + } +} + +/// Reward emitted by the environment in raw and compact-encoded form. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] +pub struct CompactReward { + /// Raw reward value from game semantics. + pub raw: Reward, + /// Compactly encoded reward value according to `CompactSpec`. + pub encoded: u64, +} + +/// One environment step result with compact observation and reward. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct EnvStep { + /// Encoded observation packet after the step. + pub observation_bits: BitPacket, + /// Raw and compact reward representation. + pub reward: CompactReward, + /// True if the episode has reached terminal state. + pub terminated: bool, + /// True if the episode was truncated externally. + pub truncated: bool, +} + +/// Errors produced by compact environment reset/step operations. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum EnvError { + /// Step was requested after the session already terminated. + SessionTerminated, + /// Action bit pattern does not decode into a legal action value. + InvalidActionEncoding { + /// Raw encoded action word. + encoded: u64, + }, + /// Observation encoding exceeded configured packet capacity. + ObservationOverflow { + /// Number of words requested by the game encoder. + actual_words: usize, + /// Maximum words accepted by this environment wrapper. + max_words: usize, + }, + /// Observation stream violated the compact schema constraints. + InvalidObservationEncoding { + /// Canonical compact constraint violation details. + reason: CompactError, + }, + /// Reward cannot be represented by the configured compact reward range. + RewardOutOfRange { + /// Raw out-of-range reward. + reward: Reward, + /// Minimum representable reward. + min: Reward, + /// Maximum representable reward. + max: Reward, + }, + /// Reward encoding violated compact schema constraints. + InvalidRewardEncoding { + /// Canonical compact constraint violation details. + reason: CompactError, + }, + /// Selected agent player id is outside game player range. + InvalidAgentPlayer { + /// Requested player id. + player: PlayerId, + /// Number of players exposed by the game. + player_count: usize, + }, + /// Parameter bundle was rejected by the game's parameter invariant. + InvalidParameters { + /// Stable machine-readable game name. + game: &'static str, + }, +} + +impl fmt::Display for EnvError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::SessionTerminated => write!(f, "cannot step a terminal session"), + Self::InvalidActionEncoding { encoded } => { + write!(f, "invalid compact action encoding: {encoded}") + } + Self::ObservationOverflow { + actual_words, + max_words, + } => { + write!( + f, + "observation packet requires {actual_words} words but maximum is {max_words}" + ) + } + Self::InvalidObservationEncoding { reason } => { + write!(f, "observation does not satisfy compact schema: {reason}") + } + Self::RewardOutOfRange { reward, min, max } => { + write!( + f, + "reward {reward} is outside compact spec range [{min}, {max}]" + ) + } + Self::InvalidRewardEncoding { reason } => { + write!(f, "reward does not satisfy compact schema: {reason}") + } + Self::InvalidAgentPlayer { + player, + player_count, + } => write!( + f, + "agent player {player} is outside player range 0..{player_count}" + ), + Self::InvalidParameters { game } => { + write!(f, "invalid parameter bundle for game `{game}`") + } + } + } +} + +impl std::error::Error for EnvError {} + +/// Minimal infotheory-compatible compact environment interface. +pub trait InfotheoryEnvironment { + /// Parameter bundle used to initialize/reset environment state. + type Params; + + /// Resets environment state and returns initial compact observation. + fn reset_seed(&mut self, seed: Seed) -> Result, EnvError>; + + /// Resets environment state from explicit params and returns compact observation. + fn reset_seed_with_params( + &mut self, + seed: Seed, + params: Self::Params, + ) -> Result, EnvError>; + + /// Steps environment using compact action bits. + fn step_bits(&mut self, action_bits: u64) -> Result, EnvError>; +} + +/// Generic environment adapter over `SessionKernel` and compact codecs. +#[derive(Clone, Debug)] +pub struct Environment +where + G: Observe, + H: HistoryStore, +{ + session: SessionKernel, + observer: Observer, + agent_player: PlayerId, +} + +/// Default environment alias with dynamic history and packet capacity. +pub type DefaultEnvironment = + Environment, MAX_WORDS>; + +impl Environment +where + G: Observe, + H: HistoryStore, +{ + fn validate_params(game: &G, params: &G::Params) -> Result<(), EnvError> { + if game.params_invariant(params) { + Ok(()) + } else { + Err(EnvError::InvalidParameters { game: game.name() }) + } + } + + /// Creates a new compact environment initialized with explicit params. + pub fn try_new_with_params( + game: G, + seed: Seed, + observer: Observer, + params: G::Params, + ) -> Result { + Self::validate_params(&game, ¶ms)?; + let agent_player = match observer { + Observer::Player(player) => player, + Observer::Spectator => 0, + }; + Ok(Self { + session: SessionKernel::new_with_params(game, seed, params), + observer, + agent_player, + }) + } + + /// Creates a new compact environment initialized with explicit params. + pub fn new_with_params(game: G, seed: Seed, observer: Observer, params: G::Params) -> Self { + Self::try_new_with_params(game, seed, observer, params) + .expect("invalid parameter bundle for compact environment") + } + + /// Creates a new compact environment. + pub fn new(game: G, seed: Seed, observer: Observer) -> Self { + let params = game.default_params(); + Self::new_with_params(game, seed, observer, params) + } + + /// Returns immutable access to the underlying session kernel. + pub fn session(&self) -> &SessionKernel { + &self.session + } + + /// Returns mutable access to the underlying session kernel. + pub fn session_mut(&mut self) -> &mut SessionKernel { + &mut self.session + } + + /// Returns current observer viewpoint. + pub fn observer(&self) -> Observer { + self.observer + } + + /// Sets observer viewpoint used for future observation encodes. + pub fn set_observer(&mut self, observer: Observer) { + self.observer = observer; + if let Observer::Player(player) = observer { + self.agent_player = player; + } + } + + /// Returns the player id controlled by compact `step()` actions. + pub fn agent_player(&self) -> PlayerId { + self.agent_player + } + + fn validate_player(&self, player: PlayerId) -> Result<(), EnvError> { + let player_count = self.session.game().player_count(); + if player >= player_count { + return Err(EnvError::InvalidAgentPlayer { + player, + player_count, + }); + } + Ok(()) + } + + fn validate_observer(&self) -> Result<(), EnvError> { + if let Observer::Player(player) = self.observer { + self.validate_player(player)?; + } + Ok(()) + } + + /// Sets the player id controlled by compact `step()` actions. + pub fn set_agent_player(&mut self, player: PlayerId) { + self.agent_player = player; + } + + /// Resets session state and returns initial compact observation. + pub fn reset(&mut self, seed: Seed) -> Result, EnvError> { + self.session.reset(seed); + self.encode_current_observation() + } + + /// Resets state from explicit params and returns initial compact observation. + pub fn reset_with_params( + &mut self, + seed: Seed, + params: G::Params, + ) -> Result, EnvError> { + Self::validate_params(self.session.game(), ¶ms)?; + self.session.reset_with_params(seed, params); + self.encode_current_observation() + } + + /// Steps the environment from an encoded action value. + pub fn step(&mut self, action_bits: u64) -> Result, EnvError> { + if self.session.is_terminal() { + return Err(EnvError::SessionTerminated); + } + + let Some(action) = self.session.game().decode_action(action_bits) else { + return Err(EnvError::InvalidActionEncoding { + encoded: action_bits, + }); + }; + + self.validate_player(self.agent_player)?; + + let mut actions = G::JointActionBuf::default(); + actions + .push(PlayerAction { + player: self.agent_player, + action, + }) + .expect("joint action buffer capacity exceeded"); + + let (reward, terminated) = { + let outcome = self.session.step_with_joint_actions(&actions); + (outcome.reward_for(self.agent_player), outcome.is_terminal()) + }; + + let spec = self.session.compact_spec(); + let encoded_reward = spec + .try_encode_reward(reward) + .map_err(|reason| match reason { + CompactError::RewardOutOfRange { .. } => EnvError::RewardOutOfRange { + reward, + min: spec.min_reward, + max: spec.max_reward, + }, + other => EnvError::InvalidRewardEncoding { reason: other }, + })?; + + Ok(EnvStep { + observation_bits: self.encode_current_observation()?, + reward: CompactReward { + raw: reward, + encoded: encoded_reward, + }, + terminated, + truncated: false, + }) + } + + /// Encodes current observation into a bounded compact packet. + pub fn encode_current_observation(&self) -> Result, EnvError> { + self.validate_observer()?; + + let mut encoded = G::WordBuf::default(); + self.session + .game() + .observe_and_encode(self.session.state(), self.observer, &mut encoded); + if encoded.len() > MAX_WORDS { + return Err(EnvError::ObservationOverflow { + actual_words: encoded.len(), + max_words: MAX_WORDS, + }); + } + self.session + .compact_spec() + .validate_observation_words(encoded.as_slice()) + .map_err(|reason| EnvError::InvalidObservationEncoding { reason })?; + + let mut packet = BitPacket::default(); + for &word in encoded.as_slice() { + packet.push_word(word); + } + Ok(packet) + } +} + +impl InfotheoryEnvironment for Environment +where + G: Observe, + H: HistoryStore, +{ + type Params = G::Params; + + /// Resets environment and emits initial packet. + fn reset_seed(&mut self, seed: Seed) -> Result, EnvError> { + self.reset(seed) + } + + /// Resets environment from explicit params and emits initial packet. + fn reset_seed_with_params( + &mut self, + seed: Seed, + params: Self::Params, + ) -> Result, EnvError> { + self.reset_with_params(seed, params) + } + + /// Steps environment with compact action bits. + fn step_bits(&mut self, action_bits: u64) -> Result, EnvError> { + self.step(action_bits) + } +} + +#[cfg(test)] +mod regression_tests { + use super::{DefaultEnvironment, EnvError, Observer}; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct DemoGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct DemoState { + terminal: bool, + marker: u8, + } + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + enum DemoAction { + #[default] + Step, + } + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct BadObservationGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct BadRewardGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct ParamRewardGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct RejectingParamsGame; + + impl Game for DemoGame { + type Params = u8; + type State = DemoState; + type Action = DemoAction; + type Obs = u8; + type WorldView = u8; + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 2>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + 0 + } + + fn name(&self) -> &'static str { + "demo" + } + + fn player_count(&self) -> usize { + 2 + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + DemoState { + terminal: false, + marker: *params, + } + } + + fn is_terminal(&self, state: &Self::State) -> bool { + state.terminal + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !state.terminal { + out.push(0).unwrap(); + out.push(1).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !state.terminal && player < 2 { + out.push(DemoAction::Step).unwrap(); + } + } + + fn observe_player(&self, _state: &Self::State, player: PlayerId) -> Self::Obs { + player as u8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 99 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView { + 0 + } + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 10, + }) + .unwrap(); + out.rewards + .push(PlayerReward { + player: 1, + reward: 20, + }) + .unwrap(); + state.terminal = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 64, + observation_stream_len: 1, + reward_bits: 6, + min_reward: 0, + max_reward: 63, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + match action { + DemoAction::Step => 0, + } + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(DemoAction::Step) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(100 + u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(200 + u64::from(*observation)).unwrap(); + } + } + + impl Game for BadObservationGame { + type Params = (); + type State = (); + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "bad-observation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State {} + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 8 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for BadRewardGame { + type Params = (); + type State = bool; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "bad-reward" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + false + } + + fn is_terminal(&self, state: &Self::State) -> bool { + *state + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, _player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 3, + }) + .unwrap(); + *state = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for ParamRewardGame { + type Params = u8; + type State = u8; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + 0 + } + + fn name(&self) -> &'static str { + "param-reward" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + *params + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + *state + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + *state + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: i64::from(*state), + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + let max_reward = i64::from(*params); + let reward_bits = if max_reward == 0 { + 1 + } else { + (u64::BITS - (max_reward as u64).leading_zeros()) as u8 + }; + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits, + min_reward: 0, + max_reward, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for RejectingParamsGame { + type Params = i32; + type State = i32; + type Action = u8; + type Obs = i32; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn default_params(&self) -> Self::Params { + 0 + } + + fn name(&self) -> &'static str { + "rejecting-params" + } + + fn player_count(&self) -> usize { + 1 + } + + fn params_invariant(&self, params: &Self::Params) -> bool { + *params >= 0 + } + + fn init_with_params(&self, _seed: Seed, params: &Self::Params) -> Self::State { + assert!(*params >= 0); + *params + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + *state + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + *state + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 8, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(*observation as u64).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + #[test] + fn step_uses_agent_player_reward() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.set_agent_player(1); + let step = env.step(0).unwrap(); + assert_eq!(step.reward.raw, 20); + assert_eq!(step.reward.encoded, 20); + } + + #[test] + fn stepping_terminal_session_returns_error() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.step(0).unwrap(); + assert_eq!(env.step(0), Err(EnvError::SessionTerminated)); + } + + #[test] + fn spectator_observations_use_spectator_encoder() { + let env = DefaultEnvironment::::new(DemoGame, 3, Observer::Spectator); + let packet = env.encode_current_observation().unwrap(); + assert_eq!(packet.words(), &[299]); + } + + #[test] + fn reset_with_params_updates_session_seed_params_state() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + assert_eq!(env.session().state().marker, 0); + env.reset_with_params(11, 42).unwrap(); + assert_eq!(env.session().current_tick(), 0); + assert_eq!(env.session().state().marker, 42); + } + + #[test] + fn observation_schema_violations_are_rejected() { + let env = DefaultEnvironment::::new( + BadObservationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.encode_current_observation(), + Err(EnvError::InvalidObservationEncoding { .. }) + )); + } + + #[test] + fn reward_bit_width_violations_are_rejected() { + let mut env = + DefaultEnvironment::::new(BadRewardGame, 1, Observer::Player(0)); + assert!(matches!( + env.step(0), + Err(EnvError::InvalidRewardEncoding { .. }) + )); + } + + #[test] + fn observation_rejects_out_of_range_player_observer() { + let mut env = DefaultEnvironment::::new(DemoGame, 3, Observer::Player(0)); + env.set_observer(Observer::Player(7)); + assert_eq!( + env.encode_current_observation(), + Err(EnvError::InvalidAgentPlayer { + player: 7, + player_count: 2, + }) + ); + } + + #[test] + fn reward_encoding_uses_active_session_params() { + let mut env = + DefaultEnvironment::::new(ParamRewardGame, 1, Observer::Player(0)); + env.reset_with_params(1, 5).unwrap(); + let step = env.step(0).unwrap(); + assert_eq!(step.reward.raw, 5); + assert_eq!(step.reward.encoded, 5); + } + + #[test] + fn reset_with_invalid_params_returns_error() { + let mut env = DefaultEnvironment::::new( + RejectingParamsGame, + 1, + Observer::Player(0), + ); + assert_eq!( + env.reset_with_params(1, -1), + Err(EnvError::InvalidParameters { + game: "rejecting-params" + }) + ); + } + + #[test] + fn try_new_with_invalid_params_returns_error() { + assert!(matches!( + DefaultEnvironment::::try_new_with_params( + RejectingParamsGame, + 1, + Observer::Player(0), + -1, + ), + Err(EnvError::InvalidParameters { + game: "rejecting-params" + }) + )); + } +} + +#[cfg(kani)] +mod proofs { + use super::{DefaultEnvironment, EnvError, Observer}; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct ObservationViolationGame; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct RewardBitsViolationGame; + + impl Game for ObservationViolationGame { + type Params = (); + type State = (); + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "observation-violation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State {} + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 8 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 8 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 3, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + impl Game for RewardBitsViolationGame { + type Params = (); + type State = bool; + type Action = u8; + type Obs = u8; + type WorldView = (); + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "reward-violation" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + false + } + + fn is_terminal(&self, state: &Self::State) -> bool { + *state + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn legal_actions(&self, state: &Self::State, _player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !*state { + out.push(0).unwrap(); + } + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView {} + + fn step_in_place( + &self, + state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 3, + }) + .unwrap(); + *state = true; + out.termination = Termination::Terminal { winner: Some(0) }; + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 1, + observation_bits: 1, + observation_stream_len: 1, + reward_bits: 1, + min_reward: 0, + max_reward: 3, + reward_offset: 0, + } + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + u64::from(*action) + } + + fn decode_action(&self, encoded: u64) -> Option { + (encoded == 0).then_some(0) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + out.push(u64::from(*observation)).unwrap(); + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + } + + #[kani::proof] + fn env_rejects_invalid_observation_words() { + let env = DefaultEnvironment::::new( + ObservationViolationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.encode_current_observation(), + Err(EnvError::InvalidObservationEncoding { .. }) + )); + } + + #[kani::proof] + fn env_rejects_reward_encoding_that_exceeds_bit_width() { + let mut env = DefaultEnvironment::::new( + RewardBitsViolationGame, + 1, + Observer::Player(0), + ); + assert!(matches!( + env.step(0), + Err(EnvError::InvalidRewardEncoding { .. }) + )); + } +} + +#[cfg(all(test, feature = "builtin"))] +mod tests { + use super::{DefaultEnvironment, Observer}; + use crate::builtin::{TicTacToe, TicTacToeAction}; + use crate::game::Game; + + #[test] + fn env_wrapper_emits_compact_observations() { + let mut env = DefaultEnvironment::::new(TicTacToe, 7, Observer::Player(0)); + let initial = env.encode_current_observation().unwrap(); + assert_eq!(initial.words(), &[0]); + + let action = TicTacToe.encode_action(&TicTacToeAction(0)); + let step = env.step(action).unwrap(); + assert_eq!(step.observation_bits.words().len(), 1); + } +} diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..7095a0a --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,21 @@ +//! Core engine helpers shared across environments and adapters. + +pub mod cards; +pub mod env; +pub mod observe; +pub mod single_player; +pub mod stepper; + +pub use crate::buffer::{BitWords, Buffer, CapacityError, FixedVec}; +pub use crate::compact::CompactSpec; +pub use crate::core::single_player::SinglePlayerGame; +pub use crate::game::Game; +pub use crate::rng::{DeterministicRng, SplitMix64}; +pub use crate::session::{ + DynamicHistory, FixedHistory, HistorySnapshot, HistoryStore, InteractiveSession, Session, + SessionKernel, +}; +pub use crate::types::{ + DynamicReplayTrace, PlayerAction, PlayerId, PlayerReward, ReplayStep, ReplayTrace, Reward, + Seed, StepOutcome, Termination, Tick, stable_hash, +}; diff --git a/src/core/observe.rs b/src/core/observe.rs new file mode 100644 index 0000000..f6ac94c --- /dev/null +++ b/src/core/observe.rs @@ -0,0 +1,67 @@ +//! Observation adapter trait and viewpoint selection types. + +use crate::game::Game; +use crate::types::PlayerId; + +/// Viewpoint used when requesting an observation. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Observer { + /// Player-local, potentially partial-information observation. + Player(PlayerId), + /// Full spectator observation. + Spectator, +} + +/// Adapter trait for producing and encoding generic observations. +pub trait Observe: Game { + /// Builds an observation for the selected viewpoint. + fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs; + + /// Encodes an observation into the compact word stream. + fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf); + + /// Encodes an observation with explicit viewpoint context. + fn encode_observation_for( + &self, + who: Observer, + observation: &Self::Obs, + out: &mut Self::WordBuf, + ) { + let _ = who; + self.encode_observation(observation, out); + } + + /// Convenience helper to observe and encode in one call. + fn observe_and_encode(&self, state: &Self::State, who: Observer, out: &mut Self::WordBuf) { + let observation = self.observe(state, who); + self.encode_observation_for(who, &observation, out); + } +} + +impl Observe for G +where + G: Game, +{ + fn observe(&self, state: &Self::State, who: Observer) -> Self::Obs { + match who { + Observer::Player(player) => self.observe_player(state, player), + Observer::Spectator => self.observe_spectator(state), + } + } + + fn encode_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + + fn encode_observation_for( + &self, + who: Observer, + observation: &Self::Obs, + out: &mut Self::WordBuf, + ) { + match who { + Observer::Player(_) => self.encode_player_observation(observation, out), + Observer::Spectator => self.encode_spectator_observation(observation, out), + } + } +} diff --git a/src/core/single_player.rs b/src/core/single_player.rs new file mode 100644 index 0000000..c7046b6 --- /dev/null +++ b/src/core/single_player.rs @@ -0,0 +1,352 @@ +//! Reusable helpers and authoring adapter for deterministic single-player games. + +use core::fmt::Debug; +use core::hash::Hash; + +use crate::buffer::{Buffer, FixedVec}; +use crate::compact::{CompactError, CompactSpec}; +use crate::game::Game; +use crate::rng::DeterministicRng; +use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome}; + +/// Canonical acting player id used by single-player environments. +pub const SOLO_PLAYER: PlayerId = 0; + +/// Canonical fixed-capacity player buffer for single-player games. +pub type SinglePlayerBuf = FixedVec; +/// Canonical fixed-capacity joint-action buffer for single-player games. +pub type SinglePlayerJointActionBuf = FixedVec, 1>; +/// Canonical fixed-capacity reward buffer for single-player games. +pub type SinglePlayerRewardBuf = FixedVec; + +/// Returns true when `player` can act in a non-terminal single-player state. +pub const fn can_act(player: PlayerId, terminal: bool) -> bool { + player == SOLO_PLAYER && !terminal +} + +/// Clears and emits the single acting player when the state is ongoing. +pub fn write_players_to_act(out: &mut B, terminal: bool) +where + B: Buffer, +{ + out.clear(); + if !terminal { + out.push(SOLO_PLAYER).unwrap(); + } +} + +/// Returns the first action assigned to the single acting player. +pub fn first_action(joint_actions: &[PlayerAction]) -> Option { + for candidate in joint_actions { + if candidate.player == SOLO_PLAYER { + return Some(candidate.action); + } + } + None +} + +/// Appends one reward entry for the single acting player. +pub fn push_reward(out: &mut B, reward: Reward) +where + B: Buffer, +{ + out.push(PlayerReward { + player: SOLO_PLAYER, + reward, + }) + .unwrap(); +} + +/// Ergonomic authoring trait for deterministic single-player games. +/// +/// Implement this trait to avoid repeating boilerplate for: +/// +/// - player-id dispatch (`player_count = 1`, `players_to_act`, legality gating), +/// - joint-action extraction (`Option` from one-player action stream), +/// - fixed-capacity reward and joint-action buffer wiring. +pub trait SinglePlayerGame { + /// Parameter bundle used to initialize/reset game state. + type Params: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Concrete game state. + type State: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Atomic action type. + type Action: Clone + Copy + Debug + Default + Eq + Hash + PartialEq; + /// Canonical observation type. + type Obs: Clone + Debug + Default + Eq + PartialEq; + /// Render/debug world view. + type WorldView: Clone + Debug + Default + Eq + PartialEq; + /// Buffer type for legal actions. + type ActionBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for compact observation words. + type WordBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + + /// Stable machine-readable game name. + fn name(&self) -> &'static str; + /// Returns default parameter bundle used by `init` and `SessionKernel::new`. + fn default_params(&self) -> Self::Params { + Self::Params::default() + } + /// Returns whether a parameter bundle is valid for `init_with_params`. + fn params_invariant(&self, _params: &Self::Params) -> bool { + true + } + /// Initialize deterministic state from a seed and parameter bundle. + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; + /// Whether the state is terminal. + fn is_terminal(&self, state: &Self::State) -> bool; + /// Emit legal actions for the single acting player in the current state. + fn legal_actions(&self, state: &Self::State, out: &mut Self::ActionBuf); + /// Build player observation. + fn observe_player(&self, state: &Self::State) -> Self::Obs; + /// Build spectator observation. + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + self.observe_player(state) + } + /// Build world/debug view. + fn world_view(&self, state: &Self::State) -> Self::WorldView; + /// Apply one transition in-place from an optional single-player action. + fn step_in_place( + &self, + state: &mut Self::State, + action: Option, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ); + + /// Compact codec descriptor for actions, observations, and rewards. + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 0, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + /// Compact codec descriptor for an explicit parameter bundle. + fn compact_spec_for_params(&self, _params: &Self::Params) -> CompactSpec { + self.compact_spec() + } + + /// Encode an action into compact integer representation. + fn encode_action(&self, _action: &Self::Action) -> u64 { + 0 + } + + /// Decode a compact action value. + fn decode_action(&self, _encoded: u64) -> Option { + None + } + + /// Checked action decoding helper that yields a structured error. + fn decode_action_checked(&self, encoded: u64) -> Result { + self.decode_action(encoded) + .ok_or(CompactError::InvalidActionEncoding { encoded }) + } + + /// Encode a player observation into compact words. + fn encode_player_observation(&self, _observation: &Self::Obs, out: &mut Self::WordBuf) { + out.clear(); + } + + /// Encode a spectator observation into compact words. + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + self.encode_player_observation(observation, out); + } + + /// State invariant used by checked stepping and proof helpers. + fn state_invariant(&self, _state: &Self::State) -> bool { + true + } + + /// Action invariant used by checked stepping and proof helpers. + fn action_invariant(&self, _action: &Self::Action) -> bool { + true + } + + /// Invariant for player observations. + fn player_observation_invariant(&self, _state: &Self::State, _observation: &Self::Obs) -> bool { + true + } + + /// Invariant for spectator observations. + fn spectator_observation_invariant( + &self, + _state: &Self::State, + _observation: &Self::Obs, + ) -> bool { + true + } + + /// Invariant for world/debug views. + fn world_view_invariant(&self, _state: &Self::State, _world: &Self::WorldView) -> bool { + true + } + + /// Transition postcondition checked in instrumented stepping. + fn transition_postcondition( + &self, + _pre: &Self::State, + _action: Option, + _post: &Self::State, + _outcome: &StepOutcome, + ) -> bool { + true + } +} + +impl Game for T +where + T: SinglePlayerGame, +{ + type Params = T::Params; + type State = T::State; + type Action = T::Action; + type Obs = T::Obs; + type WorldView = T::WorldView; + type PlayerBuf = SinglePlayerBuf; + type ActionBuf = T::ActionBuf; + type JointActionBuf = SinglePlayerJointActionBuf; + type RewardBuf = SinglePlayerRewardBuf; + type WordBuf = T::WordBuf; + + fn name(&self) -> &'static str { + ::name(self) + } + + fn player_count(&self) -> usize { + 1 + } + + fn default_params(&self) -> Self::Params { + ::default_params(self) + } + + fn params_invariant(&self, params: &Self::Params) -> bool { + ::params_invariant(self, params) + } + + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State { + ::init_with_params(self, seed, params) + } + + fn is_terminal(&self, state: &Self::State) -> bool { + ::is_terminal(self, state) + } + + fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { + write_players_to_act(out, self.is_terminal(state)); + } + + fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { + out.clear(); + if !can_act(player, self.is_terminal(state)) { + return; + } + ::legal_actions(self, state, out); + } + + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { + ::observe_player(self, state) + } + + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { + ::observe_spectator(self, state) + } + + fn world_view(&self, state: &Self::State) -> Self::WorldView { + ::world_view(self, state) + } + + fn step_in_place( + &self, + state: &mut Self::State, + joint_actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + ::step_in_place( + self, + state, + first_action(joint_actions.as_slice()), + rng, + out, + ); + } + + fn compact_spec(&self) -> CompactSpec { + ::compact_spec(self) + } + + fn compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + ::compact_spec_for_params(self, params) + } + + fn encode_action(&self, action: &Self::Action) -> u64 { + ::encode_action(self, action) + } + + fn decode_action(&self, encoded: u64) -> Option { + ::decode_action(self, encoded) + } + + fn decode_action_checked(&self, encoded: u64) -> Result { + ::decode_action_checked(self, encoded) + } + + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + ::encode_player_observation(self, observation, out) + } + + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + ::encode_spectator_observation(self, observation, out) + } + + fn state_invariant(&self, state: &Self::State) -> bool { + ::state_invariant(self, state) + } + + fn action_invariant(&self, action: &Self::Action) -> bool { + ::action_invariant(self, action) + } + + fn player_observation_invariant( + &self, + state: &Self::State, + _player: PlayerId, + observation: &Self::Obs, + ) -> bool { + ::player_observation_invariant(self, state, observation) + } + + fn spectator_observation_invariant( + &self, + state: &Self::State, + observation: &Self::Obs, + ) -> bool { + ::spectator_observation_invariant(self, state, observation) + } + + fn world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { + ::world_view_invariant(self, state, world) + } + + fn transition_postcondition( + &self, + pre: &Self::State, + actions: &Self::JointActionBuf, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + ::transition_postcondition( + self, + pre, + first_action(actions.as_slice()), + post, + outcome, + ) + } +} diff --git a/src/core/stepper.rs b/src/core/stepper.rs new file mode 100644 index 0000000..a280fef --- /dev/null +++ b/src/core/stepper.rs @@ -0,0 +1,39 @@ +//! Session stepping adapters for checked and unchecked execution paths. + +use crate::game::Game; +use crate::session::{HistoryStore, SessionKernel}; +use crate::types::StepOutcome; + +/// Minimal wrapper that executes unchecked kernel steps. +pub struct KernelStepper<'a, G: Game, H: HistoryStore> { + session: &'a mut SessionKernel, +} + +impl<'a, G: Game, H: HistoryStore> KernelStepper<'a, G, H> { + /// Creates an unchecked stepper over a session kernel. + pub fn new(session: &'a mut SessionKernel) -> Self { + Self { session } + } + + /// Applies one joint-action step. + pub fn step(&mut self, actions: &G::JointActionBuf) -> &StepOutcome { + self.session.step_with_joint_actions(actions) + } +} + +/// Wrapper that executes checked kernel steps with contract assertions. +pub struct CheckedStepper<'a, G: Game, H: HistoryStore> { + session: &'a mut SessionKernel, +} + +impl<'a, G: Game, H: HistoryStore> CheckedStepper<'a, G, H> { + /// Creates a checked stepper over a session kernel. + pub fn new(session: &'a mut SessionKernel) -> Self { + Self { session } + } + + /// Applies one checked joint-action step. + pub fn step(&mut self, actions: &G::JointActionBuf) -> &StepOutcome { + self.session.step_with_joint_actions_checked(actions) + } +} diff --git a/src/game.rs b/src/game.rs index 7f8e1c7..4e60141 100644 --- a/src/game.rs +++ b/src/game.rs @@ -1,18 +1,33 @@ +//! Core game trait defining state transitions, observations, and compact codecs. + use core::fmt::Debug; use core::hash::Hash; use crate::buffer::Buffer; +use crate::compact::{CompactError, CompactSpec}; use crate::rng::DeterministicRng; use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome}; +/// Deterministic game contract used by the session kernel. +/// +/// Implementations provide pure state transition logic plus compact codec hooks +/// for actions and observations. pub trait Game { + /// Parameter bundle used to initialize/reset game state. + type Params: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Concrete game state. type State: Clone + Debug + Default + Eq + Hash + PartialEq; + /// Atomic player action type. type Action: Clone + Copy + Debug + Default + Eq + Hash + PartialEq; - type PlayerObservation: Clone + Debug + Default + Eq + PartialEq; - type SpectatorObservation: Clone + Debug + Default + Eq + PartialEq; + /// Canonical observation type shared across all viewpoints. + type Obs: Clone + Debug + Default + Eq + PartialEq; + /// Render/debug world view type. type WorldView: Clone + Debug + Default + Eq + PartialEq; + /// Buffer type for active-player lists. type PlayerBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for legal actions. type ActionBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for joint actions. type JointActionBuf: Buffer> + Clone + Debug @@ -20,18 +35,50 @@ pub trait Game { + Eq + Hash + PartialEq; + /// Buffer type for per-player rewards. type RewardBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Buffer type for compact observation words. type WordBuf: Buffer + Clone + Debug + Default + Eq + Hash + PartialEq; + /// Stable machine-readable game name. fn name(&self) -> &'static str; + /// Total number of players in the game. fn player_count(&self) -> usize; - fn init(&self, seed: Seed) -> Self::State; + /// Returns default parameter bundle used by `init` and `SessionKernel::new`. + fn default_params(&self) -> Self::Params { + Self::Params::default() + } + + /// Returns whether a parameter bundle is valid for `init_with_params`. + /// + /// Infallible engine APIs may assume this precondition holds. Fallible wrappers + /// such as compact environments can use it to reject malformed runtime input + /// before calling into game initialization. + fn params_invariant(&self, _params: &Self::Params) -> bool { + true + } + + /// Initialize deterministic state from a seed and parameter bundle. + fn init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::State; + + /// Initialize the deterministic state from a seed. + fn init(&self, seed: Seed) -> Self::State { + let params = self.default_params(); + self.init_with_params(seed, ¶ms) + } + /// Whether the state is terminal. fn is_terminal(&self, state: &Self::State) -> bool; + /// Emit active players for the current tick. fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf); + /// Emit legal actions for a player in the current state. fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf); - fn observe_player(&self, state: &Self::State, player: PlayerId) -> Self::PlayerObservation; - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation; + /// Build a player-scoped observation. + fn observe_player(&self, state: &Self::State, player: PlayerId) -> Self::Obs; + /// Build a spectator observation. + fn observe_spectator(&self, state: &Self::State) -> Self::Obs; + /// Build a world/debug view consumed by render and tooling. fn world_view(&self, state: &Self::State) -> Self::WorldView; + /// Apply one transition in-place. fn step_in_place( &self, state: &mut Self::State, @@ -40,35 +87,100 @@ pub trait Game { out: &mut StepOutcome, ); + /// Compact codec descriptor for actions, observations and rewards. + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: 0, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + + /// Compact codec descriptor for an explicit parameter bundle. + fn compact_spec_for_params(&self, _params: &Self::Params) -> CompactSpec { + self.compact_spec() + } + + /// Encode an action into its compact integer representation. + fn encode_action(&self, _action: &Self::Action) -> u64 { + 0 + } + + /// Decode a compact action value. + fn decode_action(&self, _encoded: u64) -> Option { + None + } + + /// Checked action decoding helper that yields a structured error. + fn decode_action_checked(&self, encoded: u64) -> Result { + self.decode_action(encoded) + .ok_or(CompactError::InvalidActionEncoding { encoded }) + } + + /// Encode a player observation into compact words. + fn encode_player_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + let _ = observation; + out.clear(); + } + + /// Encode a spectator observation into compact words. + fn encode_spectator_observation(&self, observation: &Self::Obs, out: &mut Self::WordBuf) { + let _ = observation; + out.clear(); + } + + /// Convenience helper that observes a player and encodes the result. + fn encode_player_view(&self, state: &Self::State, player: PlayerId, out: &mut Self::WordBuf) { + let observation = self.observe_player(state, player); + self.encode_player_observation(&observation, out); + } + + /// Validate compact observation shape against the declared compact spec. + fn compact_invariant(&self, words: &Self::WordBuf) -> bool { + self.compact_spec() + .validate_observation_words(words.as_slice()) + .is_ok() + } + + /// State invariant used by checked stepping and proof helpers. fn state_invariant(&self, _state: &Self::State) -> bool { true } + /// Action invariant used by checked stepping and proof helpers. fn action_invariant(&self, _action: &Self::Action) -> bool { true } + /// Invariant for player observations. fn player_observation_invariant( &self, _state: &Self::State, _player: PlayerId, - _observation: &Self::PlayerObservation, + _observation: &Self::Obs, ) -> bool { true } + /// Invariant for spectator observations. fn spectator_observation_invariant( &self, _state: &Self::State, - _observation: &Self::SpectatorObservation, + _observation: &Self::Obs, ) -> bool { true } + /// Invariant for world/debug views. fn world_view_invariant(&self, _state: &Self::State, _world: &Self::WorldView) -> bool { true } + /// Transition postcondition checked in instrumented stepping. fn transition_postcondition( &self, _pre: &Self::State, @@ -79,10 +191,12 @@ pub trait Game { true } + /// Maximum supported player count from buffer capacity. fn max_players(&self) -> usize { ::CAPACITY } + /// Convenience legality query backed by `legal_actions`. fn is_action_legal( &self, state: &Self::State, diff --git a/src/games/blackjack.rs b/src/games/blackjack.rs deleted file mode 100644 index ef45635..0000000 --- a/src/games/blackjack.rs +++ /dev/null @@ -1,791 +0,0 @@ -use crate::buffer::FixedVec; -use crate::compact::{CompactGame, CompactSpec}; -use crate::game::Game; -use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; -const MAX_HAND_CARDS: usize = 12; -const DECK_SIZE: usize = 52; - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub enum BlackjackAction { - #[default] - Hit, - Stand, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub enum BlackjackPhase { - #[default] - PlayerTurn, - OpponentTurn, - Terminal, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct HandValue { - pub total: u8, - pub soft: bool, - pub busted: bool, -} - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub struct BlackjackState { - pub deck: [u8; DECK_SIZE], - pub next_card: u8, - pub player_cards: [u8; MAX_HAND_CARDS], - pub player_len: u8, - pub opponent_cards: [u8; MAX_HAND_CARDS], - pub opponent_len: u8, - pub phase: BlackjackPhase, - pub winner: Option, -} - -impl Default for BlackjackState { - fn default() -> Self { - Self { - deck: [0; DECK_SIZE], - next_card: 0, - player_cards: [0; MAX_HAND_CARDS], - player_len: 0, - opponent_cards: [0; MAX_HAND_CARDS], - opponent_len: 0, - phase: BlackjackPhase::PlayerTurn, - winner: None, - } - } -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct BlackjackObservation { - pub phase: BlackjackPhase, - pub terminal: bool, - pub winner: Option, - pub player_cards: [u8; MAX_HAND_CARDS], - pub player_len: u8, - pub player_value: HandValue, - pub opponent_cards: [u8; MAX_HAND_CARDS], - pub opponent_visible_len: u8, - pub opponent_card_count: u8, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct BlackjackSpectatorObservation { - pub phase: BlackjackPhase, - pub terminal: bool, - pub winner: Option, - pub player_cards: [u8; MAX_HAND_CARDS], - pub player_len: u8, - pub player_value: HandValue, - pub opponent_cards: [u8; MAX_HAND_CARDS], - pub opponent_len: u8, - pub opponent_value: HandValue, -} - -pub type BlackjackWorldView = BlackjackSpectatorObservation; - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct Blackjack; - -impl Blackjack { - fn evaluate_hand(cards: &[u8], len: u8) -> HandValue { - let mut total = 0u8; - let mut aces = 0u8; - let limit = len as usize; - let mut index = 0usize; - let max_len = MAX_HAND_CARDS.min(cards.len()); - while index < max_len { - if index >= limit { - break; - } - let card = cards[index]; - match card { - 1 => { - total = total.saturating_add(11); - aces += 1; - } - 11..=13 => total = total.saturating_add(10), - value => total = total.saturating_add(value), - } - index += 1; - } - for _ in 0..MAX_HAND_CARDS { - if total <= 21 || aces == 0 { - break; - } - total -= 10; - aces -= 1; - } - HandValue { - total, - soft: aces > 0, - busted: total > 21, - } - } - - fn fill_deck(deck: &mut [u8; DECK_SIZE]) { - let mut index = 0usize; - for _ in 0..4 { - for rank in 1..=13 { - deck[index] = rank; - index += 1; - } - } - } - - fn draw_card(state: &mut BlackjackState) -> u8 { - let card = state.deck[state.next_card as usize]; - state.next_card += 1; - card - } - - fn push_player_card(state: &mut BlackjackState, card: u8) { - state.player_cards[state.player_len as usize] = card; - state.player_len += 1; - } - - fn push_opponent_card(state: &mut BlackjackState, card: u8) { - state.opponent_cards[state.opponent_len as usize] = card; - state.opponent_len += 1; - } - - fn player_value(state: &BlackjackState) -> HandValue { - Self::evaluate_hand(&state.player_cards, state.player_len) - } - - fn opponent_value(state: &BlackjackState) -> HandValue { - Self::evaluate_hand(&state.opponent_cards, state.opponent_len) - } - - fn resolve_terminal(state: &mut BlackjackState) -> i64 { - let player = Self::player_value(state); - let opponent = Self::opponent_value(state); - state.phase = BlackjackPhase::Terminal; - let (reward, winner) = if player.busted { - (-1, Some(1)) - } else if opponent.busted || player.total > opponent.total { - (1, Some(0)) - } else if player.total < opponent.total { - (-1, Some(1)) - } else { - (0, None) - }; - state.winner = winner; - reward - } - - fn resolve_opponent_turn(state: &mut BlackjackState, rng: &mut DeterministicRng) -> i64 { - state.phase = BlackjackPhase::OpponentTurn; - loop { - let value = Self::opponent_value(state); - if value.busted || value.total == 21 { - break; - } - let hit = rng.gen_range(2) == 0; - if !hit { - break; - } - if state.next_card as usize >= DECK_SIZE { - break; - } - let card = Self::draw_card(state); - Self::push_opponent_card(state, card); - } - Self::resolve_terminal(state) - } - - fn pack_cards(cards: &[u8; MAX_HAND_CARDS], len: u8) -> u64 { - let mut packed = 0u64; - let limit = len as usize; - let mut index = 0usize; - while index < MAX_HAND_CARDS { - if index >= limit { - break; - } - packed |= u64::from(cards[index]) << (index * 4); - index += 1; - } - packed - } -} - -impl Game for Blackjack { - type State = BlackjackState; - type Action = BlackjackAction; - type PlayerObservation = BlackjackObservation; - type SpectatorObservation = BlackjackSpectatorObservation; - type WorldView = BlackjackWorldView; - type PlayerBuf = FixedVec; - type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; - type WordBuf = FixedVec; - - fn name(&self) -> &'static str { - "blackjack" - } - - fn player_count(&self) -> usize { - 1 - } - - fn init(&self, seed: Seed) -> Self::State { - let mut rng = DeterministicRng::from_seed_and_stream(seed, 0); - let mut deck = [0u8; DECK_SIZE]; - Self::fill_deck(&mut deck); - rng.shuffle(&mut deck); - - let mut state = BlackjackState { - deck, - next_card: 0, - player_cards: [0; MAX_HAND_CARDS], - player_len: 0, - opponent_cards: [0; MAX_HAND_CARDS], - opponent_len: 0, - phase: BlackjackPhase::PlayerTurn, - winner: None, - }; - - let player_card_1 = Self::draw_card(&mut state); - Self::push_player_card(&mut state, player_card_1); - let opponent_card_1 = Self::draw_card(&mut state); - Self::push_opponent_card(&mut state, opponent_card_1); - let player_card_2 = Self::draw_card(&mut state); - Self::push_player_card(&mut state, player_card_2); - let opponent_card_2 = Self::draw_card(&mut state); - Self::push_opponent_card(&mut state, opponent_card_2); - state - } - - fn is_terminal(&self, state: &Self::State) -> bool { - matches!(state.phase, BlackjackPhase::Terminal) - } - - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !self.is_terminal(state) { - out.push(0).unwrap(); - } - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { - out.clear(); - if player != 0 || self.is_terminal(state) { - return; - } - let value = Self::player_value(state); - if value.total >= 21 { - out.push(BlackjackAction::Stand).unwrap(); - } else { - out.push(BlackjackAction::Hit).unwrap(); - out.push(BlackjackAction::Stand).unwrap(); - } - } - - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { - let terminal = self.is_terminal(state); - let opponent_visible_len = if terminal { state.opponent_len } else { 0 }; - let mut opponent_cards = [0u8; MAX_HAND_CARDS]; - if terminal { - opponent_cards = state.opponent_cards; - } - BlackjackObservation { - phase: state.phase, - terminal, - winner: state.winner, - player_cards: state.player_cards, - player_len: state.player_len, - player_value: Self::player_value(state), - opponent_cards, - opponent_visible_len, - opponent_card_count: state.opponent_len, - } - } - - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { - BlackjackSpectatorObservation { - phase: state.phase, - terminal: self.is_terminal(state), - winner: state.winner, - player_cards: state.player_cards, - player_len: state.player_len, - player_value: Self::player_value(state), - opponent_cards: state.opponent_cards, - opponent_len: state.opponent_len, - opponent_value: Self::opponent_value(state), - } - } - - fn world_view(&self, state: &Self::State) -> Self::WorldView { - self.observe_spectator(state) - } - - fn step_in_place( - &self, - state: &mut Self::State, - joint_actions: &Self::JointActionBuf, - rng: &mut DeterministicRng, - out: &mut StepOutcome, - ) { - let actions = joint_actions.as_slice(); - let mut action = None; - let mut index = 0usize; - while index < actions.len() { - let candidate = &actions[index]; - if candidate.player == 0 { - action = Some(candidate.action); - break; - } - index += 1; - } - - let reward = if self.is_terminal(state) { - out.termination = Termination::Terminal { - winner: state.winner, - }; - 0 - } else if let Some(action) = action { - let player_value = Self::player_value(state); - let legal = if player_value.total >= 21 { - matches!(action, BlackjackAction::Stand) - } else { - true - }; - if !legal { - state.phase = BlackjackPhase::Terminal; - state.winner = Some(1); - out.termination = Termination::Terminal { winner: Some(1) }; - -1 - } else { - match action { - BlackjackAction::Hit => { - let card = Self::draw_card(state); - Self::push_player_card(state, card); - let updated = Self::player_value(state); - if updated.busted { - state.phase = BlackjackPhase::Terminal; - state.winner = Some(1); - out.termination = Termination::Terminal { winner: Some(1) }; - -1 - } else if updated.total == 21 { - let reward = Self::resolve_opponent_turn(state, rng); - out.termination = Termination::Terminal { - winner: state.winner, - }; - reward - } else { - 0 - } - } - BlackjackAction::Stand => { - let reward = Self::resolve_opponent_turn(state, rng); - out.termination = Termination::Terminal { - winner: state.winner, - }; - reward - } - } - } - } else { - state.phase = BlackjackPhase::Terminal; - state.winner = Some(1); - out.termination = Termination::Terminal { winner: Some(1) }; - -1 - }; - - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); - if !self.is_terminal(state) { - out.termination = Termination::Ongoing; - } - } - - fn state_invariant(&self, state: &Self::State) -> bool { - if state.player_len < 2 - || state.opponent_len < 2 - || usize::from(state.player_len) > MAX_HAND_CARDS - || usize::from(state.opponent_len) > MAX_HAND_CARDS - || usize::from(state.next_card) > DECK_SIZE - { - return false; - } - let mut counts = [0u8; 14]; - for index in 0..DECK_SIZE { - let card = state.deck[index]; - if !(1..=13).contains(&card) { - return false; - } - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - if counts[rank] != 4 { - return false; - } - rank += 1; - } - if self.is_terminal(state) { - let mut resolved = *state; - Self::resolve_terminal(&mut resolved); - resolved.winner == state.winner - } else { - true - } - } - - fn player_observation_invariant( - &self, - state: &Self::State, - _player: PlayerId, - observation: &Self::PlayerObservation, - ) -> bool { - if self.is_terminal(state) { - observation.opponent_visible_len == state.opponent_len - && observation.opponent_cards == state.opponent_cards - } else { - if observation.opponent_visible_len != 0 { - return false; - } - for index in 0..MAX_HAND_CARDS { - if observation.opponent_cards[index] != 0 { - return false; - } - } - true - } - } - - fn transition_postcondition( - &self, - _pre: &Self::State, - _actions: &Self::JointActionBuf, - post: &Self::State, - outcome: &StepOutcome, - ) -> bool { - matches!(outcome.reward_for(0), -1..=1) - && (post.phase == BlackjackPhase::Terminal) == outcome.is_terminal() - } -} - -impl CompactGame for Blackjack { - fn compact_spec(&self) -> CompactSpec { - CompactSpec { - action_count: 2, - observation_bits: 64, - observation_stream_len: 4, - reward_bits: 2, - min_reward: -1, - max_reward: 1, - reward_offset: 1, - } - } - - fn encode_action(&self, action: &Self::Action) -> u64 { - match action { - BlackjackAction::Hit => 0, - BlackjackAction::Stand => 1, - } - } - - fn decode_action(&self, encoded: u64) -> Option { - match encoded { - 0 => Some(BlackjackAction::Hit), - 1 => Some(BlackjackAction::Stand), - _ => None, - } - } - - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { - out.clear(); - let winner_code = match observation.winner { - None => 0, - Some(0) => 1, - Some(_) => 2, - }; - let phase = match observation.phase { - BlackjackPhase::PlayerTurn => 0u64, - BlackjackPhase::OpponentTurn => 1, - BlackjackPhase::Terminal => 2, - }; - let header = phase - | ((observation.terminal as u64) << 4) - | ((u64::from(observation.player_len)) << 8) - | ((u64::from(observation.player_value.total)) << 12) - | ((observation.player_value.soft as u64) << 20) - | ((u64::from(observation.opponent_card_count)) << 24) - | ((u64::from(observation.opponent_visible_len)) << 28) - | ((winner_code as u64) << 32); - out.push(header).unwrap(); - out.push(Self::pack_cards( - &observation.player_cards, - observation.player_len, - )) - .unwrap(); - out.push(Self::pack_cards( - &observation.opponent_cards, - observation.opponent_visible_len, - )) - .unwrap(); - out.push(0).unwrap(); - } - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - out.clear(); - let winner_code = match observation.winner { - None => 0, - Some(0) => 1, - Some(_) => 2, - }; - let phase = match observation.phase { - BlackjackPhase::PlayerTurn => 0u64, - BlackjackPhase::OpponentTurn => 1, - BlackjackPhase::Terminal => 2, - }; - let header = phase - | ((observation.terminal as u64) << 4) - | ((u64::from(observation.player_len)) << 8) - | ((u64::from(observation.player_value.total)) << 12) - | ((observation.player_value.soft as u64) << 20) - | ((u64::from(observation.opponent_len)) << 24) - | ((u64::from(observation.opponent_value.total)) << 28) - | ((winner_code as u64) << 36); - out.push(header).unwrap(); - out.push(Self::pack_cards( - &observation.player_cards, - observation.player_len, - )) - .unwrap(); - out.push(Self::pack_cards( - &observation.opponent_cards, - observation.opponent_len, - )) - .unwrap(); - out.push(0).unwrap(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::policy::{FirstLegalPolicy, RandomPolicy}; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - fn state_from_hands(player: &[u8], opponent: &[u8]) -> BlackjackState { - let mut state = BlackjackState { - deck: [0; DECK_SIZE], - next_card: 0, - player_cards: [0; MAX_HAND_CARDS], - player_len: 0, - opponent_cards: [0; MAX_HAND_CARDS], - opponent_len: 0, - phase: BlackjackPhase::PlayerTurn, - winner: None, - }; - Blackjack::fill_deck(&mut state.deck); - for &card in player { - Blackjack::push_player_card(&mut state, card); - } - for &card in opponent { - Blackjack::push_opponent_card(&mut state, card); - } - state - } - - #[test] - fn hand_value_handles_soft_aces() { - assert_eq!( - Blackjack::evaluate_hand(&[1, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2), - HandValue { - total: 21, - soft: true, - busted: false, - } - ); - assert_eq!( - Blackjack::evaluate_hand(&[1, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3), - HandValue { - total: 21, - soft: true, - busted: false, - } - ); - assert_eq!( - Blackjack::evaluate_hand(&[1, 1, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0], 4), - HandValue { - total: 22, - soft: false, - busted: true, - } - ); - } - - #[test] - fn shuffled_deck_is_a_full_permutation() { - let state = Blackjack.init(11); - let mut counts = [0u8; 14]; - for card in state.deck { - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - assert_eq!(counts[rank], 4, "rank {rank} should appear four times"); - rank += 1; - } - assert_observation_contracts(&Blackjack, &state); - } - - #[test] - fn showdown_matrix_is_correct() { - let mut player_win = state_from_hands(&[10, 10], &[9, 9]); - assert_eq!(Blackjack::resolve_terminal(&mut player_win), 1); - assert_eq!(player_win.winner, Some(0)); - - let mut opponent_win = state_from_hands(&[10, 8], &[10, 9]); - assert_eq!(Blackjack::resolve_terminal(&mut opponent_win), -1); - assert_eq!(opponent_win.winner, Some(1)); - - let mut push = state_from_hands(&[10, 7], &[9, 8]); - assert_eq!(Blackjack::resolve_terminal(&mut push), 0); - assert_eq!(push.winner, None); - } - - #[test] - fn seeded_round_trip_is_reproducible() { - let mut left = Session::new(Blackjack, 11); - let mut right = Session::new(Blackjack, 11); - let action = [PlayerAction { - player: 0, - action: BlackjackAction::Hit, - }]; - let left_outcome = left.step(&action).clone(); - let right_outcome = right.step(&action).clone(); - assert_eq!(left.state(), right.state()); - assert_eq!(left_outcome, right_outcome); - } - - #[test] - fn verification_helpers_hold_for_player_hit() { - let game = Blackjack; - let state = game.init(11); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: BlackjackAction::Hit, - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 11); - assert_compact_roundtrip(&game, &BlackjackAction::Hit); - } - - #[test] - fn seeded_sessions_preserve_invariants_across_policies() { - for seed in 1..=256 { - let mut first = FirstLegalPolicy; - let mut random = RandomPolicy; - - let mut first_session = Session::new(Blackjack, seed); - assert!(Blackjack.state_invariant(first_session.state())); - let mut first_policies: [&mut dyn crate::policy::Policy; 1] = [&mut first]; - while !first_session.is_terminal() && first_session.current_tick() < 16 { - first_session.step_with_policies(&mut first_policies); - } - assert!(Blackjack.state_invariant(first_session.state())); - - let mut random_session = Session::new(Blackjack, seed); - assert!(Blackjack.state_invariant(random_session.state())); - let mut random_policies: [&mut dyn crate::policy::Policy; 1] = [&mut random]; - while !random_session.is_terminal() && random_session.current_tick() < 16 { - random_session.step_with_policies(&mut random_policies); - } - assert!(Blackjack.state_invariant(random_session.state())); - } - } -} - -#[cfg(kani)] -mod proofs { - use super::{Blackjack, BlackjackAction, BlackjackPhase, HandValue, MAX_HAND_CARDS}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(64)] - fn concrete_seed_shuffle_is_a_full_permutation() { - let state = Blackjack.init(11); - let mut counts = [0u8; 14]; - for card in state.deck { - counts[card as usize] += 1; - } - let mut rank = 1usize; - while rank <= 13 { - assert_eq!(counts[rank], 4); - rank += 1; - } - } - - #[kani::proof] - #[kani::unwind(64)] - fn player_observation_hides_opponent_hand_before_terminal() { - let state = Blackjack.init(11); - let observation = Blackjack.observe_player(&state, 0); - if state.phase != BlackjackPhase::Terminal { - assert_eq!(observation.opponent_visible_len, 0); - } - } - - #[kani::proof] - #[kani::unwind(64)] - fn initial_observation_contracts_hold_for_concrete_seed() { - let game = Blackjack; - let state = game.init(11); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - #[kani::unwind(64)] - fn stand_action_replays_deterministically_for_seed_17() { - let state = Blackjack.init(17); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: BlackjackAction::Stand, - }) - .unwrap(); - crate::verification::assert_transition_contracts(&Blackjack, &state, &actions, 17); - } - - #[kani::proof] - #[kani::unwind(32)] - fn hand_evaluation_matches_busted_flag() { - let len: u8 = kani::any(); - kani::assume(len <= MAX_HAND_CARDS as u8); - let mut cards = [1u8; MAX_HAND_CARDS]; - for card in &mut cards { - *card = kani::any(); - kani::assume((1..=13).contains(card)); - } - let value = Blackjack::evaluate_hand(&cards, len); - assert_eq!( - value, - HandValue { - total: value.total, - soft: value.soft, - busted: value.total > 21, - } - ); - } -} diff --git a/src/games/mod.rs b/src/games/mod.rs deleted file mode 100644 index d92d2c0..0000000 --- a/src/games/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -pub mod blackjack; -#[cfg(feature = "physics")] -pub mod platformer; -pub mod tictactoe; - -pub use blackjack::{ - Blackjack, BlackjackAction, BlackjackObservation, BlackjackPhase, - BlackjackSpectatorObservation, BlackjackWorldView, -}; -#[cfg(feature = "physics")] -pub use platformer::{ - BerryView, Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation, - PlatformerWorldView, -}; -pub use tictactoe::{ - TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeObservation, TicTacToeWorldView, -}; diff --git a/src/games/platformer.rs b/src/games/platformer.rs deleted file mode 100644 index 050eaeb..0000000 --- a/src/games/platformer.rs +++ /dev/null @@ -1,763 +0,0 @@ -use crate::buffer::{FixedVec, default_array}; -use crate::compact::{CompactGame, CompactSpec}; -use crate::game::Game; -use crate::math::{Aabb2, StrictF64, Vec2}; -use crate::physics::{BodyKind, Contact2d, PhysicsBody2d, PhysicsOracleView2d, PhysicsWorld2d}; -use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Reward, Seed, StepOutcome, Termination}; - -const BERRY_COUNT: usize = 6; -const PLAYER_BODY_ID: u16 = 1; -const FIRST_BERRY_BODY_ID: u16 = 10; -const PLATFORMER_BODIES: usize = 1 + BERRY_COUNT; -const PLATFORMER_CONTACTS: usize = PLATFORMER_BODIES * (PLATFORMER_BODIES - 1) / 2; -const ALL_BERRIES_MASK: u8 = 0b00_111111; - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub enum PlatformerAction { - #[default] - Stay, - Left, - Right, - Jump, -} - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub struct PlatformerConfig { - pub width: u8, - pub height: u8, - pub player_width: u8, - pub player_height: u8, - pub jump_delta: u8, - pub berry_y: u8, - pub berry_xs: [u8; BERRY_COUNT], - pub sprain_numerator: u64, - pub sprain_denominator: u64, - pub berry_reward: Reward, - pub finish_bonus: Reward, -} - -impl Default for PlatformerConfig { - fn default() -> Self { - Self { - width: 12, - height: 3, - player_width: 1, - player_height: 1, - jump_delta: 1, - berry_y: 2, - berry_xs: [1, 3, 5, 7, 9, 11], - sprain_numerator: 1, - sprain_denominator: 10, - berry_reward: 1, - finish_bonus: 10, - } - } -} - -impl PlatformerConfig { - pub fn arena_bounds(self) -> Aabb2 { - Aabb2::new( - Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), - Vec2::new( - StrictF64::new(self.width as f64), - StrictF64::new(self.height as f64), - ), - ) - } - - pub fn player_half_extents(self) -> Vec2 { - Vec2::new( - StrictF64::new(self.player_width as f64 / 2.0), - StrictF64::new(self.player_height as f64 / 2.0), - ) - } - - pub fn player_center(self, x: u8, y: u8) -> Vec2 { - Vec2::new( - StrictF64::new(x as f64 + self.player_width as f64 / 2.0), - StrictF64::new(y as f64 + self.player_height as f64 / 2.0), - ) - } - - pub fn berry_center(self, index: usize) -> Vec2 { - Vec2::new( - StrictF64::new(self.berry_xs[index] as f64 + 0.5), - StrictF64::new(self.berry_y as f64), - ) - } - - pub fn invariant(self) -> bool { - if self.width == 0 - || self.height == 0 - || self.player_width == 0 - || self.player_height == 0 - || self.player_width > self.width - || self.player_height > self.height - || self.jump_delta >= self.height - || self.sprain_denominator == 0 - || self.sprain_numerator > self.sprain_denominator - || self.berry_y >= self.height - { - return false; - } - - let mut index = 1usize; - while index < self.berry_xs.len() { - if self.berry_xs[index - 1] >= self.berry_xs[index] - || self.berry_xs[index] >= self.width - { - return false; - } - index += 1; - } - - true - } -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct PlatformerState { - pub world: PhysicsWorld2d, - pub remaining_berries: u8, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct PlatformerObservation { - pub x: u8, - pub y: u8, - pub remaining_berries: u8, - pub terminal: bool, - pub winner: Option, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct BerryView { - pub id: u16, - pub x: u8, - pub y: u8, - pub collected: bool, -} - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct PlatformerWorldView { - pub config: PlatformerConfig, - pub physics: PhysicsWorld2d, - pub berries: [BerryView; BERRY_COUNT], -} - -impl Default for PlatformerState { - fn default() -> Self { - Platformer::default().init(0) - } -} - -impl Default for PlatformerWorldView { - fn default() -> Self { - Platformer::default().world_view(&Platformer::default().init(0)) - } -} - -impl PhysicsOracleView2d for PlatformerWorldView { - fn bounds(&self) -> Aabb2 { - self.physics.bounds() - } - - fn tick(&self) -> u64 { - self.physics.tick() - } - - fn bodies(&self) -> &[PhysicsBody2d] { - self.physics.bodies() - } - - fn contacts(&self) -> &[Contact2d] { - self.physics.contacts() - } -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct Platformer { - pub config: PlatformerConfig, -} - -impl Platformer { - pub fn new(config: PlatformerConfig) -> Self { - assert!(config.invariant(), "invalid platformer config"); - Self { config } - } - - fn player_body(state: &PlatformerState) -> &PhysicsBody2d { - state.world.require_body(PLAYER_BODY_ID) - } - - fn player_position(&self, state: &PlatformerState) -> (u8, u8) { - let player = Self::player_body(state); - let min = player.aabb().min; - let x = min.x.to_f64(); - let y = min.y.to_f64(); - debug_assert!(x >= 0.0 && y >= 0.0); - (x as u8, y as u8) - } - - fn is_terminal_state(state: &PlatformerState) -> bool { - state.remaining_berries == 0 - } - - fn winner(state: &PlatformerState) -> Option { - Self::is_terminal_state(state).then_some(0) - } - - fn sync_berries(&self, state: &mut PlatformerState) { - for index in 0..BERRY_COUNT { - let berry_id = FIRST_BERRY_BODY_ID + index as u16; - state - .world - .set_body_active_deferred(berry_id, state.remaining_berries & (1u8 << index) != 0); - } - } - - fn collect_berries_from_contacts(&self, state: &mut PlatformerState) -> Reward { - let mut reward = 0; - for index in 0..BERRY_COUNT { - let berry_bit = 1u8 << index; - let berry_id = FIRST_BERRY_BODY_ID + index as u16; - if state.remaining_berries & berry_bit != 0 - && state.world.has_contact(PLAYER_BODY_ID, berry_id) - { - state.remaining_berries &= !berry_bit; - state.world.set_body_active(berry_id, false); - reward += self.config.berry_reward; - } - } - if state.remaining_berries == 0 { - reward += self.config.finish_bonus; - } - reward - } - - fn observation_from_state(&self, state: &PlatformerState) -> PlatformerObservation { - let (x, y) = self.player_position(state); - PlatformerObservation { - x, - y, - remaining_berries: state.remaining_berries, - terminal: Self::is_terminal_state(state), - winner: Self::winner(state), - } - } - - fn build_world(&self) -> PhysicsWorld2d { - let mut world = PhysicsWorld2d::new(self.config.arena_bounds()); - world.add_body_deferred(PhysicsBody2d { - id: PLAYER_BODY_ID, - kind: BodyKind::Kinematic, - position: self.config.player_center(0, 0), - half_extents: self.config.player_half_extents(), - active: true, - }); - for index in 0..BERRY_COUNT { - world.add_body_deferred(PhysicsBody2d { - id: FIRST_BERRY_BODY_ID + index as u16, - kind: BodyKind::Trigger, - position: self.config.berry_center(index), - half_extents: Vec2::new(StrictF64::new(0.0), StrictF64::new(0.0)), - active: true, - }); - } - world.refresh_contacts(); - world - } -} - -impl Game for Platformer { - type State = PlatformerState; - type Action = PlatformerAction; - type PlayerObservation = PlatformerObservation; - type SpectatorObservation = PlatformerObservation; - type WorldView = PlatformerWorldView; - type PlayerBuf = FixedVec; - type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; - type WordBuf = FixedVec; - - fn name(&self) -> &'static str { - "platformer" - } - - fn player_count(&self) -> usize { - 1 - } - - fn init(&self, _seed: Seed) -> Self::State { - assert!(self.config.invariant()); - PlatformerState { - world: self.build_world(), - remaining_berries: ALL_BERRIES_MASK, - } - } - - fn is_terminal(&self, state: &Self::State) -> bool { - Self::is_terminal_state(state) - } - - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !self.is_terminal(state) { - out.push(0).unwrap(); - } - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { - out.clear(); - if player != 0 || self.is_terminal(state) { - return; - } - out.push(PlatformerAction::Stay).unwrap(); - out.push(PlatformerAction::Left).unwrap(); - out.push(PlatformerAction::Right).unwrap(); - out.push(PlatformerAction::Jump).unwrap(); - } - - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { - self.observation_from_state(state) - } - - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { - self.observation_from_state(state) - } - - fn world_view(&self, state: &Self::State) -> Self::WorldView { - let mut berries = default_array::(); - let mut index = 0usize; - while index < BERRY_COUNT { - berries[index] = BerryView { - id: FIRST_BERRY_BODY_ID + index as u16, - x: self.config.berry_xs[index], - y: self.config.berry_y, - collected: (state.remaining_berries & (1u8 << index)) == 0, - }; - index += 1; - } - PlatformerWorldView { - config: self.config, - physics: state.world.clone(), - berries, - } - } - - fn step_in_place( - &self, - state: &mut Self::State, - joint_actions: &Self::JointActionBuf, - rng: &mut DeterministicRng, - out: &mut StepOutcome, - ) { - let actions = joint_actions.as_slice(); - let mut action = PlatformerAction::Stay; - let mut action_index = 0usize; - while action_index < actions.len() { - let candidate = &actions[action_index]; - if candidate.player == 0 { - action = candidate.action; - break; - } - action_index += 1; - } - - let mut reward = 0; - if self.is_terminal(state) { - out.termination = Termination::Terminal { - winner: Self::winner(state), - }; - } else { - let (current_x, _) = self.player_position(state); - let (x, y) = match action { - PlatformerAction::Stay => (current_x, 0), - PlatformerAction::Left => (current_x.saturating_sub(1), 0), - PlatformerAction::Right => ( - if current_x + self.config.player_width < self.config.width { - current_x + 1 - } else { - current_x - }, - 0, - ), - PlatformerAction::Jump => { - if rng.gen_bool_ratio( - self.config.sprain_numerator, - self.config.sprain_denominator, - ) { - reward -= 1; - } - (current_x, self.config.jump_delta) - } - }; - - state - .world - .set_body_position_deferred(PLAYER_BODY_ID, self.config.player_center(x, y)); - state.world.refresh_contacts(); - reward += self.collect_berries_from_contacts(state); - self.sync_berries(state); - state.world.step(); - - out.termination = if self.is_terminal(state) { - Termination::Terminal { - winner: Self::winner(state), - } - } else { - Termination::Ongoing - }; - } - - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); - } - - fn state_invariant(&self, state: &Self::State) -> bool { - if !self.config.invariant() - || state.remaining_berries & !ALL_BERRIES_MASK != 0 - || !state.world.invariant() - || state.world.bodies.len() != PLATFORMER_BODIES - { - return false; - } - - let player = Self::player_body(state); - if player.kind != BodyKind::Kinematic - || !player.active - || player.half_extents != self.config.player_half_extents() - { - return false; - } - - let (x, y) = self.player_position(state); - if x + self.config.player_width > self.config.width || y > self.config.jump_delta { - return false; - } - - for index in 0..BERRY_COUNT { - let berry = state.world.require_body(FIRST_BERRY_BODY_ID + index as u16); - let expected_active = state.remaining_berries & (1u8 << index) != 0; - if berry.kind != BodyKind::Trigger - || berry.position != self.config.berry_center(index) - || berry.active != expected_active - { - return false; - } - } - - true - } - - fn player_observation_invariant( - &self, - state: &Self::State, - _player: PlayerId, - observation: &Self::PlayerObservation, - ) -> bool { - observation == &self.observation_from_state(state) - } - - fn spectator_observation_invariant( - &self, - state: &Self::State, - observation: &Self::SpectatorObservation, - ) -> bool { - observation == &self.observation_from_state(state) - } - - fn world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { - if world.config != self.config || world.physics != state.world { - return false; - } - - let mut index = 0usize; - while index < world.berries.len() { - let berry = world.berries[index]; - if berry.id != FIRST_BERRY_BODY_ID + index as u16 - || berry.x != self.config.berry_xs[index] - || berry.y != self.config.berry_y - || berry.collected != ((state.remaining_berries & (1u8 << index)) == 0) - { - return false; - } - index += 1; - } - - true - } - - fn transition_postcondition( - &self, - _pre: &Self::State, - _actions: &Self::JointActionBuf, - post: &Self::State, - outcome: &StepOutcome, - ) -> bool { - matches!(outcome.reward_for(0), -1..=11) - && (post.remaining_berries == 0) == outcome.is_terminal() - } -} - -impl CompactGame for Platformer { - fn compact_spec(&self) -> CompactSpec { - CompactSpec { - action_count: 4, - observation_bits: 12, - observation_stream_len: 1, - reward_bits: 4, - min_reward: -1, - max_reward: 11, - reward_offset: 1, - } - } - - fn encode_action(&self, action: &Self::Action) -> u64 { - match action { - PlatformerAction::Stay => 0, - PlatformerAction::Left => 1, - PlatformerAction::Right => 2, - PlatformerAction::Jump => 3, - } - } - - fn decode_action(&self, encoded: u64) -> Option { - match encoded { - 0 => Some(PlatformerAction::Stay), - 1 => Some(PlatformerAction::Left), - 2 => Some(PlatformerAction::Right), - 3 => Some(PlatformerAction::Jump), - _ => None, - } - } - - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { - out.clear(); - let packed = u64::from(observation.x) - | (u64::from(observation.y) << 4) - | (u64::from(observation.remaining_berries) << 5) - | ((observation.terminal as u64) << 11); - out.push(packed).unwrap(); - } - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - self.encode_player_observation(observation, out); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - #[test] - fn movement_clamps_at_walls() { - let game = Platformer::default(); - let mut state = game.init(1); - let mut rng = DeterministicRng::from_seed_and_stream(1, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Left, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(game.observe_spectator(&state).x, 0); - - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); - outcome.clear(); - actions.clear(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Right, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(game.observe_spectator(&state).x, 11); - } - - #[test] - fn berry_collection_is_idempotent() { - let game = Platformer::default(); - let mut state = game.init(1); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(1, 0)); - let mut rng = DeterministicRng::from_seed_and_stream(1, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - let remaining = state.remaining_berries; - outcome.clear(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert_eq!(state.remaining_berries, remaining); - } - - #[test] - fn final_berry_terminates_with_bonus() { - let game = Platformer::default(); - let mut state = game.init(9); - state.remaining_berries = 1u8 << 5; - game.sync_berries(&mut state); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(11, 0)); - let mut rng = DeterministicRng::from_seed_and_stream(9, 1); - let mut outcome = StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert!(game.is_terminal(&state)); - assert!(outcome.reward_for(0) >= 10); - } - - #[test] - fn seeded_sessions_replay_exactly() { - let mut left = Session::new(Platformer::default(), 3); - let mut right = Session::new(Platformer::default(), 3); - let actions = [ - PlayerAction { - player: 0, - action: PlatformerAction::Right, - }, - PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }, - PlayerAction { - player: 0, - action: PlatformerAction::Right, - }, - ]; - for action in actions { - left.step(std::slice::from_ref(&action)); - right.step(std::slice::from_ref(&action)); - } - assert_eq!(left.trace(), right.trace()); - assert_eq!(left.state(), right.state()); - } - - #[test] - fn verification_helpers_hold_for_jump() { - let game = Platformer::default(); - let state = game.init(3); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 3); - assert_observation_contracts(&game, &state); - assert_compact_roundtrip(&game, &PlatformerAction::Jump); - } - - #[test] - fn physics_world_tracks_actor_and_berries() { - let state = Platformer::default().init(3); - let world = Platformer::default().world_view(&state); - assert_eq!(world.physics.bodies.len(), PLATFORMER_BODIES); - assert!(world.physics.invariant()); - } -} - -#[cfg(kani)] -mod proofs { - use super::{ALL_BERRIES_MASK, PLAYER_BODY_ID, Platformer, PlatformerAction, PlatformerState}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(64)] - fn wall_clamps_hold_for_all_edge_positions() { - let game = Platformer::default(); - let mut state = PlatformerState::default(); - let x: u8 = kani::any(); - kani::assume(x < game.config.width); - state - .world - .set_body_position(PLAYER_BODY_ID, game.config.player_center(x, 0)); - let mut rng = crate::rng::DeterministicRng::from_seed(1); - let mut outcome = - crate::types::StepOutcome::>::default(); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Left, - }) - .unwrap(); - game.step_in_place(&mut state, &actions, &mut rng, &mut outcome); - assert!(game.observe_spectator(&state).x < game.config.width); - } - - #[kani::proof] - #[kani::unwind(64)] - fn jump_reward_is_bounded() { - let state = Platformer::default().init(1); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: PlatformerAction::Jump, - }) - .unwrap(); - crate::verification::assert_transition_contracts( - &Platformer::default(), - &state, - &actions, - 1, - ); - } - - #[kani::proof] - #[kani::unwind(64)] - fn initial_observation_and_world_contracts_hold() { - let game = Platformer::default(); - let state = game.init(1); - crate::verification::assert_observation_contracts(&game, &state); - } - - #[kani::proof] - #[kani::unwind(64)] - fn berry_mask_tracks_trigger_activation() { - let mut state = PlatformerState::default(); - state.remaining_berries = ALL_BERRIES_MASK ^ 0b000001; - Platformer::default().sync_berries(&mut state); - assert!(!state.world.require_body(super::FIRST_BERRY_BODY_ID).active); - } -} diff --git a/src/games/tictactoe.rs b/src/games/tictactoe.rs deleted file mode 100644 index 905e32c..0000000 --- a/src/games/tictactoe.rs +++ /dev/null @@ -1,468 +0,0 @@ -use crate::buffer::FixedVec; -use crate::compact::{CompactGame, CompactSpec}; -use crate::game::Game; -use crate::rng::DeterministicRng; -use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome, Termination}; - -const WIN_LINES: [(usize, usize, usize); 8] = [ - (0, 1, 2), - (3, 4, 5), - (6, 7, 8), - (0, 3, 6), - (1, 4, 7), - (2, 5, 8), - (0, 4, 8), - (2, 4, 6), -]; - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub enum TicTacToeCell { - #[default] - Empty, - Player, - Opponent, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct TicTacToeAction(pub u8); - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct TicTacToeState { - pub board: [TicTacToeCell; 9], - pub terminal: bool, - pub winner: Option, -} - -pub type TicTacToeObservation = TicTacToeState; -pub type TicTacToeWorldView = TicTacToeState; - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -pub struct TicTacToe; - -impl TicTacToe { - fn find_winner(board: &[TicTacToeCell; 9]) -> Option { - for (a, b, c) in WIN_LINES { - let cells = (board[a], board[b], board[c]); - if cells - == ( - TicTacToeCell::Player, - TicTacToeCell::Player, - TicTacToeCell::Player, - ) - { - return Some(0); - } - if cells - == ( - TicTacToeCell::Opponent, - TicTacToeCell::Opponent, - TicTacToeCell::Opponent, - ) - { - return Some(1); - } - } - None - } - - fn is_full(board: &[TicTacToeCell; 9]) -> bool { - let mut index = 0usize; - while index < board.len() { - if board[index] == TicTacToeCell::Empty { - return false; - } - index += 1; - } - true - } - - pub fn packed_board(board: &[TicTacToeCell; 9]) -> u64 { - let mut packed = 0u64; - let mut index = 0usize; - while index < board.len() { - let value = match board[index] { - TicTacToeCell::Empty => 0, - TicTacToeCell::Player => 1, - TicTacToeCell::Opponent => 2, - }; - packed |= value << (index * 2); - index += 1; - } - packed - } -} - -impl Game for TicTacToe { - type State = TicTacToeState; - type Action = TicTacToeAction; - type PlayerObservation = TicTacToeObservation; - type SpectatorObservation = TicTacToeObservation; - type WorldView = TicTacToeWorldView; - type PlayerBuf = FixedVec; - type ActionBuf = FixedVec; - type JointActionBuf = FixedVec, 1>; - type RewardBuf = FixedVec; - type WordBuf = FixedVec; - - fn name(&self) -> &'static str { - "tictactoe" - } - - fn player_count(&self) -> usize { - 1 - } - - fn init(&self, _seed: Seed) -> Self::State { - TicTacToeState::default() - } - - fn is_terminal(&self, state: &Self::State) -> bool { - state.terminal - } - - fn players_to_act(&self, state: &Self::State, out: &mut Self::PlayerBuf) { - out.clear(); - if !state.terminal { - out.push(0).unwrap(); - } - } - - fn legal_actions(&self, state: &Self::State, player: PlayerId, out: &mut Self::ActionBuf) { - out.clear(); - if player != 0 || state.terminal { - return; - } - let mut index = 0usize; - while index < state.board.len() { - if state.board[index] == TicTacToeCell::Empty { - out.push(TicTacToeAction(index as u8)).unwrap(); - } - index += 1; - } - } - - fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::PlayerObservation { - *state - } - - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { - *state - } - - fn world_view(&self, state: &Self::State) -> Self::WorldView { - *state - } - - fn step_in_place( - &self, - state: &mut Self::State, - joint_actions: &Self::JointActionBuf, - rng: &mut DeterministicRng, - out: &mut StepOutcome, - ) { - let mut action = None; - let actions = joint_actions.as_slice(); - let mut action_index = 0usize; - while action_index < actions.len() { - let candidate = &actions[action_index]; - if candidate.player == 0 { - action = Some(candidate.action.0 as usize); - break; - } - action_index += 1; - } - - let reward = if state.terminal { - out.termination = Termination::Terminal { - winner: state.winner, - }; - 0 - } else if let Some(index) = action { - if index >= 9 || state.board[index] != TicTacToeCell::Empty { - -3 - } else { - state.board[index] = TicTacToeCell::Player; - if let Some(winner) = Self::find_winner(&state.board) { - state.terminal = true; - state.winner = Some(winner); - out.termination = Termination::Terminal { - winner: state.winner, - }; - 2 - } else if Self::is_full(&state.board) { - state.terminal = true; - state.winner = None; - out.termination = Termination::Terminal { winner: None }; - 1 - } else { - let mut empty_positions = [0usize; 9]; - let mut empty_len = 0usize; - let mut cell_index = 0usize; - while cell_index < state.board.len() { - if state.board[cell_index] == TicTacToeCell::Empty { - empty_positions[empty_len] = cell_index; - empty_len += 1; - } - cell_index += 1; - } - let opponent_index = empty_positions[rng.gen_range(empty_len)]; - state.board[opponent_index] = TicTacToeCell::Opponent; - if let Some(winner) = Self::find_winner(&state.board) { - state.terminal = true; - state.winner = Some(winner); - out.termination = Termination::Terminal { - winner: state.winner, - }; - -2 - } else if Self::is_full(&state.board) { - state.terminal = true; - state.winner = None; - out.termination = Termination::Terminal { winner: None }; - 1 - } else { - 0 - } - } - } - } else { - -3 - }; - - out.rewards - .push(PlayerReward { player: 0, reward }) - .unwrap(); - if !state.terminal { - out.termination = Termination::Ongoing; - } - } - - fn state_invariant(&self, state: &Self::State) -> bool { - let winner = Self::find_winner(&state.board); - let full = Self::is_full(&state.board); - state.terminal == (winner.is_some() || full) - && (state.winner == winner || (winner.is_none() && state.winner.is_none())) - } - - fn action_invariant(&self, action: &Self::Action) -> bool { - action.0 < 9 - } - - fn transition_postcondition( - &self, - pre: &Self::State, - _actions: &Self::JointActionBuf, - post: &Self::State, - outcome: &StepOutcome, - ) -> bool { - if pre.terminal { - return post == pre && outcome.reward_for(0) == 0 && outcome.is_terminal(); - } - let reward = outcome.reward_for(0); - matches!(reward, -3..=2) && (!post.terminal || outcome.is_terminal()) - } -} - -impl CompactGame for TicTacToe { - fn compact_spec(&self) -> CompactSpec { - CompactSpec { - action_count: 9, - observation_bits: 18, - observation_stream_len: 1, - reward_bits: 3, - min_reward: -3, - max_reward: 2, - reward_offset: 3, - } - } - - fn encode_action(&self, action: &Self::Action) -> u64 { - u64::from(action.0) - } - - fn decode_action(&self, encoded: u64) -> Option { - (encoded < 9).then_some(TicTacToeAction(encoded as u8)) - } - - fn encode_player_observation( - &self, - observation: &Self::PlayerObservation, - out: &mut Self::WordBuf, - ) { - out.clear(); - out.push(Self::packed_board(&observation.board)).unwrap(); - } - - fn encode_spectator_observation( - &self, - observation: &Self::SpectatorObservation, - out: &mut Self::WordBuf, - ) { - self.encode_player_observation(observation, out); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::session::Session; - use crate::verification::{ - assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, - }; - - #[test] - fn illegal_move_preserves_state_and_penalizes() { - let mut session = Session::new(TicTacToe, 7); - session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - let before = *session.state(); - let outcome = session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - assert_eq!(outcome.reward_for(0), -3); - assert_eq!(session.state(), &before); - } - - #[test] - fn legal_actions_match_empty_cells_exhaustively() { - let game = TicTacToe; - for encoded in 0..3u32.pow(9) { - let mut board = [TicTacToeCell::Empty; 9]; - let mut value = encoded; - for cell in &mut board { - *cell = match value % 3 { - 0 => TicTacToeCell::Empty, - 1 => TicTacToeCell::Player, - _ => TicTacToeCell::Opponent, - }; - value /= 3; - } - let winner = TicTacToe::find_winner(&board); - let terminal = winner.is_some() || TicTacToe::is_full(&board); - let state = TicTacToeState { - board, - terminal, - winner, - }; - let mut legal = FixedVec::::default(); - game.legal_actions(&state, 0, &mut legal); - let expected: Vec<_> = if terminal { - Vec::new() - } else { - state - .board - .iter() - .enumerate() - .filter_map(|(index, cell)| { - (*cell == TicTacToeCell::Empty).then_some(TicTacToeAction(index as u8)) - }) - .collect() - }; - assert_eq!( - legal.as_slice(), - expected.as_slice(), - "encoded board state {encoded}" - ); - assert_observation_contracts(&game, &state); - } - } - - #[test] - fn verification_helpers_hold_for_opening_move() { - let game = TicTacToe; - let state = game.init(7); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: TicTacToeAction(0), - }) - .unwrap(); - assert_transition_contracts(&game, &state, &actions, 7); - assert_compact_roundtrip(&game, &TicTacToeAction(0)); - } -} - -#[cfg(kani)] -mod proofs { - use super::{TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeState}; - use crate::buffer::FixedVec; - use crate::game::Game; - use crate::session::{FixedHistory, SessionKernel}; - use crate::types::PlayerAction; - - #[kani::proof] - #[kani::unwind(16)] - fn legal_actions_are_exactly_empty_cells() { - let encoded: u32 = kani::any(); - kani::assume(encoded < 3u32.pow(9)); - let mut board = [TicTacToeCell::Empty; 9]; - let mut value = encoded; - for cell in &mut board { - *cell = match value % 3 { - 0 => TicTacToeCell::Empty, - 1 => TicTacToeCell::Player, - _ => TicTacToeCell::Opponent, - }; - value /= 3; - } - let winner = TicTacToe::find_winner(&board); - let terminal = winner.is_some() || TicTacToe::is_full(&board); - let state = TicTacToeState { - board, - terminal, - winner, - }; - let mut legal = FixedVec::::default(); - TicTacToe.legal_actions(&state, 0, &mut legal); - let mut legal_count = 0usize; - let mut legal_index = 0usize; - while legal_index < legal.len() { - let action = legal.as_slice()[legal_index]; - assert_eq!(state.board[action.0 as usize], TicTacToeCell::Empty); - legal_count += 1; - legal_index += 1; - } - - let mut empty_count = 0usize; - let mut board_index = 0usize; - while board_index < state.board.len() { - if state.board[board_index] == TicTacToeCell::Empty { - if !terminal { - assert!( - legal - .as_slice() - .contains(&TicTacToeAction(board_index as u8)) - ); - } - empty_count += 1; - } - board_index += 1; - } - assert_eq!(legal_count, if terminal { 0 } else { empty_count }); - } - - #[kani::proof] - #[kani::unwind(16)] - fn invalid_move_never_mutates_board() { - type ProofSession = SessionKernel>; - - let mut session = ProofSession::new(TicTacToe, 1); - session.step(&[PlayerAction { - player: 0, - action: TicTacToeAction(0), - }]); - let mut actions = FixedVec::, 1>::default(); - actions - .push(PlayerAction { - player: 0, - action: TicTacToeAction(0), - }) - .unwrap(); - let before = *session.state(); - session.step_with_joint_actions(&actions); - assert_eq!(*session.state(), before); - } -} diff --git a/src/lib.rs b/src/lib.rs index 3ef5e68..025d114 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,17 @@ +//! Deterministic game engine core with compact codecs, verification hooks, and render adapters. + +pub mod core; +pub mod proof; +#[cfg(feature = "builtin")] +pub mod registry; + pub mod buffer; +#[cfg(feature = "builtin")] +pub mod builtin; +#[cfg(feature = "cli")] +pub mod cli; pub mod compact; pub mod game; -#[cfg(feature = "builtin-games")] -pub mod games; pub mod math; #[cfg(feature = "parallel")] pub mod parallel; @@ -17,7 +26,8 @@ pub mod types; pub mod verification; pub use buffer::{BitWords, Buffer, CapacityError, FixedVec}; -pub use compact::{CompactGame, CompactSpec}; +pub use compact::CompactSpec; +pub use core::single_player::SinglePlayerGame; pub use game::Game; pub use policy::{FirstLegalPolicy, FnPolicy, Policy, RandomPolicy, ScriptedPolicy}; pub use rng::{DeterministicRng, SplitMix64}; diff --git a/src/math.rs b/src/math.rs index 61fcca5..19260c9 100644 --- a/src/math.rs +++ b/src/math.rs @@ -1,13 +1,19 @@ +//! Deterministic math primitives used by simulation and rendering layers. + use std::cmp::Ordering; use std::ops::{Add, AddAssign, Div, Mul, Sub, SubAssign}; +/// 2D vector. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Vec2 { + /// X coordinate. pub x: T, + /// Y coordinate. pub y: T, } impl Vec2 { + /// Creates a 2D vector. pub const fn new(x: T, y: T) -> Self { Self { x, y } } @@ -55,14 +61,19 @@ where } } +/// 3D vector. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Vec3 { + /// X coordinate. pub x: T, + /// Y coordinate. pub y: T, + /// Z coordinate. pub z: T, } impl Vec3 { + /// Creates a 3D vector. pub const fn new(x: T, y: T, z: T) -> Self { Self { x, y, z } } @@ -90,9 +101,12 @@ where } } +/// Axis-aligned bounding box in 2D. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Aabb2 { + /// Minimum corner. pub min: Vec2, + /// Maximum corner. pub max: Vec2, } @@ -100,10 +114,12 @@ impl Aabb2 where T: Copy + Ord, { + /// Creates a 2D AABB. pub const fn new(min: Vec2, max: Vec2) -> Self { Self { min, max } } + /// Returns whether `point` is inside or on bounds. pub fn contains(&self, point: Vec2) -> bool { point.x >= self.min.x && point.x <= self.max.x @@ -111,6 +127,7 @@ where && point.y <= self.max.y } + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x @@ -119,9 +136,12 @@ where } } +/// Axis-aligned bounding box in 3D. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Aabb3 { + /// Minimum corner. pub min: Vec3, + /// Maximum corner. pub max: Vec3, } @@ -129,10 +149,12 @@ impl Aabb3 where T: Copy + Ord, { + /// Creates a 3D AABB. pub const fn new(min: Vec3, max: Vec3) -> Self { Self { min, max } } + /// Returns whether `point` is inside or on bounds. pub fn contains(&self, point: Vec3) -> bool { point.x >= self.min.x && point.x <= self.max.x @@ -142,6 +164,7 @@ where && point.z <= self.max.z } + /// Returns whether this AABB intersects `other`. pub fn intersects(&self, other: &Self) -> bool { self.min.x <= other.max.x && self.max.x >= other.min.x @@ -152,30 +175,36 @@ where } } +/// Fixed-point numeric wrapper with `FRACTION_BITS` fractional bits. #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Fixed { raw: i64, } impl Fixed { + /// Creates a fixed-point value from raw representation. pub const fn from_raw(raw: i64) -> Self { Self { raw } } + /// Creates a fixed-point value from integer input. pub const fn from_int(value: i64) -> Self { Self { raw: value << FRACTION_BITS, } } + /// Returns raw fixed-point representation. pub const fn raw(self) -> i64 { self.raw } + /// Floors value toward negative infinity and returns integer part. pub const fn floor_to_int(self) -> i64 { self.raw >> FRACTION_BITS } + /// Converts to `f64`. pub fn to_f64(self) -> f64 { self.raw as f64 / ((1u64 << FRACTION_BITS) as f64) } @@ -215,59 +244,71 @@ impl Div for Fixed { } } +/// `f32` wrapper with deterministic bitwise equality/hash semantics. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct StrictF32 { bits: u32, } impl StrictF32 { + /// Creates from raw IEEE-754 bits. pub const fn from_bits(bits: u32) -> Self { Self { bits } } + /// Creates from floating value by preserving raw bits. pub fn new(value: f32) -> Self { Self { bits: value.to_bits(), } } + /// Returns raw IEEE-754 bits. pub const fn to_bits(self) -> u32 { self.bits } + /// Converts to `f32`. pub fn to_f32(self) -> f32 { f32::from_bits(self.bits) } } +/// `f64` wrapper with deterministic total ordering and bitwise equality. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct StrictF64 { bits: u64, } impl StrictF64 { + /// Creates from raw IEEE-754 bits. pub const fn from_bits(bits: u64) -> Self { Self { bits } } + /// Creates from floating value by preserving raw bits. pub fn new(value: f64) -> Self { Self { bits: value.to_bits(), } } + /// Returns raw IEEE-754 bits. pub const fn to_bits(self) -> u64 { self.bits } + /// Converts to `f64`. pub fn to_f64(self) -> f64 { f64::from_bits(self.bits) } + /// Returns whether value is finite. pub fn is_finite(self) -> bool { self.to_f64().is_finite() } + /// Clamps this value to `[min, max]`. pub fn clamp(self, min: Self, max: Self) -> Self { let value = self.to_f64().clamp(min.to_f64(), max.to_f64()); Self::new(value) diff --git a/src/parallel.rs b/src/parallel.rs index f3b34d0..f07e0ec 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -1,15 +1,19 @@ +//! Parallel deterministic replay helpers. + use rayon::prelude::*; use crate::game::Game; -use crate::session::Session; -use crate::types::{PlayerAction, ReplayTrace, Seed}; +use crate::session::InteractiveSession; +use crate::types::{DynamicReplayTrace, PlayerAction, Seed}; +/// Sequence of staged joint actions used for one replay execution. pub type JointActionTrace = Vec>>; +/// Replays many deterministic traces in parallel and returns resulting replay traces. pub fn replay_many( game: &G, traces: &[(Seed, JointActionTrace)], -) -> Vec> +) -> Vec> where G: Game + Copy + Send + Sync, G::Action: Send + Sync, @@ -19,7 +23,7 @@ where traces .par_iter() .map(|(seed, steps)| { - let mut session = Session::new(*game, *seed); + let mut session = InteractiveSession::new(*game, *seed); for step in steps { if session.is_terminal() { break; diff --git a/src/physics.rs b/src/physics.rs index e587d8b..9c52de4 100644 --- a/src/physics.rs +++ b/src/physics.rs @@ -1,25 +1,38 @@ +//! Fixed-capacity deterministic 2D AABB physics world primitives. + use crate::buffer::FixedVec; use crate::math::{Aabb2, StrictF64, Vec2}; use crate::types::Tick; +/// Physics body behavior mode. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum BodyKind { + /// Non-moving collidable body. #[default] Static, + /// Externally controlled moving body. Kinematic, + /// Contact-only body that does not block movement. Trigger, } +/// One body in the 2D physics world. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct PhysicsBody2d { + /// Stable body identifier. pub id: u16, + /// Body behavior kind. pub kind: BodyKind, + /// Body center position. pub position: Vec2, + /// Half extents of the AABB shape. pub half_extents: Vec2, + /// Whether the body participates in contacts. pub active: bool, } impl PhysicsBody2d { + /// Returns body axis-aligned bounding box. pub fn aabb(&self) -> Aabb2 { Aabb2::new( self.position - self.half_extents, @@ -27,6 +40,7 @@ impl PhysicsBody2d { ) } + /// Returns whether body geometry is finite and non-negative sized. pub fn invariant(&self) -> bool { self.position.x.is_finite() && self.position.y.is_finite() @@ -37,28 +51,42 @@ impl PhysicsBody2d { } } +/// Contact pair represented by sorted body ids. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct Contact2d { + /// Lower body id. pub a: u16, + /// Higher body id. pub b: u16, } +/// Read-only physics world oracle view. pub trait PhysicsOracleView2d { + /// Returns world bounds. fn bounds(&self) -> Aabb2; + /// Returns current world tick. fn tick(&self) -> Tick; + /// Returns active body storage slice. fn bodies(&self) -> &[PhysicsBody2d]; + /// Returns cached contact pairs. fn contacts(&self) -> &[Contact2d]; } +/// Deterministic 2D AABB world with fixed-capacity storage. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct PhysicsWorld2d { + /// World bounds used for clamping bodies. pub bounds: Aabb2, + /// Bodies sorted by id. pub bodies: FixedVec, + /// Cached sorted contact pairs. pub contacts: FixedVec, + /// Simulation tick. pub tick: Tick, } impl PhysicsWorld2d { + /// Creates an empty world with specified bounds. pub fn new(bounds: Aabb2) -> Self { Self { bounds, @@ -68,6 +96,7 @@ impl PhysicsWorld2d bool { if !self.bounds.min.x.is_finite() || !self.bounds.min.y.is_finite() @@ -112,6 +141,7 @@ impl PhysicsWorld2d PhysicsWorld2d Option<&PhysicsBody2d> { let bodies = self.bodies.as_slice(); let mut index = 0usize; @@ -144,10 +175,12 @@ impl PhysicsWorld2d &PhysicsBody2d { self.body(id).expect("missing physics body") } + /// Returns mutable body by id. pub fn body_mut(&mut self, id: u16) -> Option<&mut PhysicsBody2d> { let bodies = self.bodies.as_mut_slice(); let mut index = 0usize; @@ -160,6 +193,7 @@ impl PhysicsWorld2d PhysicsWorld2d) { self.set_body_position_deferred(id, position); self.refresh_contacts(); @@ -183,6 +218,7 @@ impl PhysicsWorld2d) { self.translate_body_deferred(id, delta); self.refresh_contacts(); @@ -195,11 +231,13 @@ impl PhysicsWorld2d bool { let (left, right) = if a <= b { (a, b) } else { (b, a) }; let contacts = self.contacts.as_slice(); @@ -229,21 +267,111 @@ impl PhysicsWorld2d= body_min_x { + active[write] = active_index; + write += 1; } - if intersects(bodies[left].aabb(), bodies[right].aabb()) { + read += 1; + } + active_len = write; + + let mut active_index = 0usize; + while active_index < active_len { + let other_index = active[active_index]; + if intersects(aabbs[body_index], aabbs[other_index]) { + let (a, b) = if bodies[other_index].id <= bodies[body_index].id { + (bodies[other_index].id, bodies[body_index].id) + } else { + (bodies[body_index].id, bodies[other_index].id) + }; self.contacts - .push(Contact2d { - a: bodies[left].id, - b: bodies[right].id, - }) + .push(Contact2d { a, b }) .expect("physics contact capacity exceeded"); } + active_index += 1; } + + active[active_len] = body_index; + active_len += 1; + sorted_index += 1; } + + self.contacts + .as_mut_slice() + .sort_by_key(|contact| (contact.a, contact.b)); } } @@ -267,6 +395,52 @@ impl PhysicsOracleView2d } } +/// Synchronize a contiguous trigger-id range to `active_mask` bits without refreshing contacts. +pub fn set_trigger_mask_deferred( + world: &mut PhysicsWorld2d, + first_trigger_id: u16, + trigger_count: usize, + active_mask: u64, +) { + assert!( + trigger_count <= u64::BITS as usize, + "trigger_count {trigger_count} exceeds 64-bit trigger mask capacity" + ); + let mut index = 0usize; + while index < trigger_count { + let active = (active_mask & (1u64 << index)) != 0; + world.set_body_active_deferred(first_trigger_id + index as u16, active); + index += 1; + } +} + +/// Collect active trigger bits contacted by `actor_id`, deactivating collected trigger bodies. +pub fn collect_actor_trigger_contacts( + world: &mut PhysicsWorld2d, + actor_id: u16, + first_trigger_id: u16, + trigger_count: usize, + remaining_mask: &mut u64, +) -> u8 { + assert!( + trigger_count <= u64::BITS as usize, + "trigger_count {trigger_count} exceeds 64-bit trigger mask capacity" + ); + let mut collected = 0u8; + let mut index = 0usize; + while index < trigger_count { + let bit = 1u64 << index; + let trigger_id = first_trigger_id + index as u16; + if (*remaining_mask & bit) != 0 && world.has_contact(actor_id, trigger_id) { + *remaining_mask &= !bit; + world.set_body_active(trigger_id, false); + collected += 1; + } + index += 1; + } + collected +} + fn intersects(left: Aabb2, right: Aabb2) -> bool { left.min.x <= right.max.x && left.max.x >= right.min.x diff --git a/src/policy.rs b/src/policy.rs index 80c66ba..4b189be 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -1,21 +1,26 @@ +//! Policy interfaces and builtin policy strategies. + use std::marker::PhantomData; use crate::game::Game; use crate::rng::DeterministicRng; use crate::types::PlayerId; +/// Policy interface for selecting actions for active players. pub trait Policy { + /// Chooses one legal action for `player`. fn choose_action( &mut self, game: &G, state: &G::State, player: PlayerId, - observation: &G::PlayerObservation, + observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action; } +/// Deterministic policy that always selects the first legal action. #[derive(Clone, Copy, Debug, Default)] pub struct FirstLegalPolicy; @@ -25,7 +30,7 @@ impl Policy for FirstLegalPolicy { _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], _rng: &mut DeterministicRng, ) -> G::Action { @@ -36,6 +41,7 @@ impl Policy for FirstLegalPolicy { } } +/// Uniform-random policy over legal actions. #[derive(Clone, Copy, Debug, Default)] pub struct RandomPolicy; @@ -45,7 +51,7 @@ impl Policy for RandomPolicy { _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action { @@ -54,17 +60,30 @@ impl Policy for RandomPolicy { } } +/// Deterministic scripted policy with fallback to first legal action. #[derive(Clone, Debug)] pub struct ScriptedPolicy { script: Vec, position: usize, + strict: bool, } impl ScriptedPolicy { + /// Creates a scripted policy from a full action script. pub fn new(script: Vec) -> Self { Self { script, position: 0, + strict: false, + } + } + + /// Creates a strict scripted policy that fails fast on illegal or exhausted scripts. + pub fn new_strict(script: Vec) -> Self { + Self { + script, + position: 0, + strict: true, } } } @@ -78,7 +97,7 @@ where _game: &G, _state: &G::State, _player: PlayerId, - _observation: &G::PlayerObservation, + _observation: &G::Obs, legal_actions: &[G::Action], _rng: &mut DeterministicRng, ) -> G::Action { @@ -87,6 +106,17 @@ where if legal_actions.contains(action) { return *action; } + if self.strict { + panic!( + "strict scripted policy action at index {} is illegal for current state", + self.position - 1 + ); + } + } else if self.strict { + panic!( + "strict scripted policy exhausted at index {}", + self.position + ); } legal_actions .first() @@ -95,12 +125,14 @@ where } } +/// Policy adapter built from a closure. pub struct FnPolicy { f: F, _marker: PhantomData, } impl FnPolicy { + /// Creates a closure-backed policy. pub fn new(f: F) -> Self { Self { f, @@ -112,21 +144,14 @@ impl FnPolicy { impl Policy for FnPolicy where G: Game, - F: FnMut( - &G, - &G::State, - PlayerId, - &G::PlayerObservation, - &[G::Action], - &mut DeterministicRng, - ) -> G::Action, + F: FnMut(&G, &G::State, PlayerId, &G::Obs, &[G::Action], &mut DeterministicRng) -> G::Action, { fn choose_action( &mut self, game: &G, state: &G::State, player: PlayerId, - observation: &G::PlayerObservation, + observation: &G::Obs, legal_actions: &[G::Action], rng: &mut DeterministicRng, ) -> G::Action { diff --git a/src/proof/liveness.rs b/src/proof/liveness.rs new file mode 100644 index 0000000..2c81c66 --- /dev/null +++ b/src/proof/liveness.rs @@ -0,0 +1,100 @@ +//! Liveness-oriented proof scaffolding layered on top of executable model semantics. + +use core::fmt::Debug; + +use crate::buffer::Buffer; +use crate::proof::model::ModelGame; +use crate::rng::DeterministicRng; +use crate::types::{StepOutcome, Termination}; + +/// Ranking-function based termination witness over the executable model. +pub trait TerminationWitness: ModelGame { + /// Returns a natural-number rank that must decrease on non-terminal progress steps. + fn model_rank(&self, state: &Self::ModelState) -> u64; + + /// Returns whether terminal states are exactly the rank-zero states. + fn terminal_rank_is_exact(&self, state: &Self::ModelState) -> bool { + self.model_is_terminal(state) == (self.model_rank(state) == 0) + } +} + +/// Checks the ranking-function progress obligation for one model transition. +pub fn assert_ranked_progress( + game: &G, + pre: &G::ModelState, + actions: &G::JointActionBuf, + seed: u64, +) { + let mut post = pre.clone(); + let mut rng = DeterministicRng::from_seed_and_stream(seed, 777); + let mut outcome = StepOutcome::::default(); + let pre_rank = game.model_rank(pre); + game.model_step_in_place(&mut post, actions, &mut rng, &mut outcome); + + assert!(game.terminal_rank_is_exact(pre)); + assert!(game.terminal_rank_is_exact(&post)); + + if !game.model_is_terminal(pre) { + assert!(game.model_is_terminal(&post) || game.model_rank(&post) < pre_rank); + } else { + assert_eq!(game.model_rank(&post), 0); + assert!(outcome.termination.is_terminal()); + } +} + +/// Declarative fairness witness scaffold for future game-specific obligations. +pub trait FairnessWitness: ModelGame { + /// Returns the fairness assumptions required by the game's liveness claims. + fn fairness_assumptions(&self) -> &'static [&'static str] { + &[] + } +} + +/// One weighted model outcome in a finite-support stochastic step. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] +pub struct FiniteSupportOutcome { + /// Successor state for this support point. + pub state: S, + /// Reward buffer emitted for this support point. + pub rewards: R, + /// Termination status emitted for this support point. + pub termination: Termination, + /// Relative support weight for this outcome. + pub weight: u64, +} + +/// Finite-support stochastic witness scaffold for probabilistic liveness proofs. +pub trait ProbabilisticWitness: ModelGame { + /// Buffer type that stores all finite-support outcomes for one model step. + type SupportBuf: Buffer> + + Clone + + Debug + + Default + + Eq + + PartialEq; + + /// Enumerates the finite support of one model step for the given state and actions. + fn model_step_support( + &self, + state: &Self::ModelState, + actions: &Self::JointActionBuf, + out: &mut Self::SupportBuf, + ); +} + +/// Checks that a probabilistic witness exposes a non-empty, positive-weight finite support. +pub fn assert_finite_support_is_valid( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, +) { + let mut support = G::SupportBuf::default(); + game.model_step_support(state, actions, &mut support); + assert!(!support.as_slice().is_empty()); + let mut total_weight = 0u64; + for outcome in support.as_slice() { + assert!(outcome.weight > 0); + total_weight = total_weight.saturating_add(outcome.weight); + } + assert!(total_weight > 0); +} diff --git a/src/proof/macros.rs b/src/proof/macros.rs new file mode 100644 index 0000000..b3a54f9 --- /dev/null +++ b/src/proof/macros.rs @@ -0,0 +1,63 @@ +/// Declares the standard Kani refinement harness triplet for a verified game. +#[macro_export] +macro_rules! declare_refinement_harnesses { + ( + game = $game:expr, + params = $params:expr, + seed = $seed:expr, + actions = $actions:expr, + trace = $trace:expr, + init = $init_name:ident, + step = $step_name:ident, + replay = $replay_name:ident $(,)? + ) => { + #[kani::proof] + fn $init_name() { + let game = $game; + let params = $params; + $crate::proof::assert_model_init_refinement(&game, $seed, ¶ms); + let state = game.init_with_params($seed, ¶ms); + $crate::proof::assert_model_observation_refinement(&game, &state); + } + + #[kani::proof] + fn $step_name() { + let game = $game; + let params = $params; + let state = game.init_with_params($seed, ¶ms); + let actions = $actions; + $crate::proof::assert_model_step_refinement(&game, &state, &actions, $seed); + } + + #[kani::proof] + fn $replay_name() { + let game = $game; + let params = $params; + let trace = $trace; + $crate::proof::assert_model_replay_refinement(game, $seed, params, &trace); + } + }; + ( + game = $game:expr, + params = $params:expr, + seed = $seed:expr, + actions = $actions:expr, + init = $init_name:ident, + step = $step_name:ident, + replay = $replay_name:ident $(,)? + ) => { + $crate::declare_refinement_harnesses!( + game = $game, + params = $params, + seed = $seed, + actions = $actions, + trace = { + let actions = $actions; + [actions] + }, + init = $init_name, + step = $step_name, + replay = $replay_name, + ); + }; +} diff --git a/src/proof/manifest.rs b/src/proof/manifest.rs new file mode 100644 index 0000000..572114d --- /dev/null +++ b/src/proof/manifest.rs @@ -0,0 +1,388 @@ +//! Parsed proof manifest and claim-status helpers. + +use std::sync::OnceLock; + +/// Classification for how strongly a component is covered by the proof system. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum ProofStatus { + /// Backed by bounded checks over the Rust implementation. + Checked, + /// Backed by an abstract Verus model only. + Model, + /// Backed by both Verus model lemmas and Kani implementation/refinement proofs. + Refined, + /// Backed by runtime tests and checks, not formal proofs. + Runtime, + /// Explicitly outside the formal proof boundary. + OutOfScope, +} + +impl ProofStatus { + /// Parses a manifest status token. + fn parse(raw: &str) -> Option { + match raw { + "checked" => Some(Self::Checked), + "model" => Some(Self::Model), + "refined" => Some(Self::Refined), + "runtime" => Some(Self::Runtime), + "out_of_scope" => Some(Self::OutOfScope), + _ => None, + } + } + + /// Returns the markdown heading used for this status in the claim matrix. + pub fn heading(self) -> &'static str { + match self { + Self::Checked => "Implementation-Checked Claims", + Self::Model => "Model-Only Claims", + Self::Refined => "Refined Claims", + Self::Runtime => "Runtime-Tested Claims", + Self::OutOfScope => "Out Of Scope", + } + } +} + +/// Kind of verification harness referenced by the proof manifest. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum HarnessKind { + /// A Kani harness over compiled Rust code. + Kani, + /// A Verus proof file or model-checking target. + Verus, +} + +/// One proof harness entry declared in the manifest. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestHarness { + /// Verification technology used by the harness. + pub kind: HarnessKind, + /// Stable manifest identifier for the harness. + pub id: &'static str, + /// Logical scope or component group the harness belongs to. + pub scope: &'static str, + /// Concrete target invoked by tooling for this harness. + pub target: &'static str, +} + +/// One claim about a component inside the verified boundary. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestClaim { + /// Strength of the claim. + pub status: ProofStatus, + /// Stable component identifier used in reports. + pub component: &'static str, + /// Human-readable statement of what is claimed. + pub text: &'static str, + /// Proof harness identifiers that justify the claim. + pub links: &'static [&'static str], +} + +/// One explicit assumption required by a proof claim. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ManifestAssumption { + /// Component the assumption applies to. + pub component: &'static str, + /// Human-readable statement of the assumption. + pub text: &'static str, +} + +/// Parsed proof manifest used by reporting and verification tooling. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VerificationManifest { + boundary: &'static str, + harnesses: Vec, + claims: Vec, + assumptions: Vec, +} + +impl VerificationManifest { + /// Returns the crate's statically embedded proof manifest. + pub fn current() -> &'static Self { + static MANIFEST: OnceLock = OnceLock::new(); + MANIFEST.get_or_init(|| { + let manifest = Self::parse(include_str!("../../proofs/manifest.txt")); + manifest.validate().expect("proof manifest is invalid"); + manifest + }) + } + + /// Parses a manifest file into a structured representation. + pub fn parse(raw: &'static str) -> Self { + let mut boundary = "kernel+builtins"; + let mut harnesses = Vec::new(); + let mut claims = Vec::new(); + let mut assumptions = Vec::new(); + + for line in raw.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + let parts: Vec<&'static str> = line.split('|').collect(); + match parts.as_slice() { + ["boundary", value] => boundary = value, + ["kani", id, scope, target] => harnesses.push(ManifestHarness { + kind: HarnessKind::Kani, + id, + scope, + target, + }), + ["verus", id, target] => harnesses.push(ManifestHarness { + kind: HarnessKind::Verus, + id, + scope: "global", + target, + }), + ["claim", status, component, text, links] => { + let status = + ProofStatus::parse(status).expect("proof manifest claim status is invalid"); + let links = parse_links(links); + claims.push(ManifestClaim { + status, + component, + text, + links, + }); + } + ["assumption", component, text] => { + assumptions.push(ManifestAssumption { component, text }) + } + _ => panic!("invalid proof manifest line: {line}"), + } + } + + Self { + boundary, + harnesses, + claims, + assumptions, + } + } + + /// Returns the declared proof boundary label. + pub fn boundary(&self) -> &'static str { + self.boundary + } + + /// Returns every declared proof harness. + pub fn harnesses(&self) -> &[ManifestHarness] { + &self.harnesses + } + + /// Returns every declared proof claim. + pub fn claims(&self) -> &[ManifestClaim] { + &self.claims + } + + /// Returns every explicit assumption listed in the manifest. + pub fn assumptions(&self) -> &[ManifestAssumption] { + &self.assumptions + } + + /// Returns the Kani harnesses belonging to one manifest scope. + pub fn kani_harnesses_for_scope(&self, scope: &str) -> impl Iterator { + self.harnesses + .iter() + .filter(move |harness| harness.kind == HarnessKind::Kani && harness.scope == scope) + } + + /// Returns all Verus entries in the manifest. + pub fn verus_models(&self) -> impl Iterator { + self.harnesses + .iter() + .filter(|harness| harness.kind == HarnessKind::Verus) + } + + /// Renders the manifest into the public proof-claim markdown summary. + pub fn render_claim_markdown(&self) -> String { + let mut output = String::new(); + output.push_str("# Proof Claim Matrix\n\n"); + output.push_str( + "This document is derived from `proofs/manifest.txt` and states the current proof boundary.\n\n", + ); + output.push_str("## Verified Boundary\n\n"); + output.push_str("- "); + output.push_str(self.boundary); + output.push('\n'); + + for status in [ + ProofStatus::Refined, + ProofStatus::Checked, + ProofStatus::Model, + ProofStatus::Runtime, + ProofStatus::OutOfScope, + ] { + let mut first = true; + for claim in self.claims.iter().filter(|claim| claim.status == status) { + if first { + output.push_str("\n## "); + output.push_str(status.heading()); + output.push_str("\n\n"); + first = false; + } + output.push_str("- `"); + output.push_str(claim.component); + output.push_str("`: "); + output.push_str(claim.text); + if !claim.links.is_empty() { + output.push_str(" (proof ids: "); + let mut first_link = true; + for link in claim.links { + if !first_link { + output.push_str(", "); + } + output.push('`'); + output.push_str(link); + output.push('`'); + first_link = false; + } + output.push(')'); + } + output.push('\n'); + } + } + + if !self.assumptions.is_empty() { + output.push_str("\n## Assumptions\n\n"); + for assumption in &self.assumptions { + output.push_str("- `"); + output.push_str(assumption.component); + output.push_str("`: "); + output.push_str(assumption.text); + output.push('\n'); + } + } + + output + } + + /// Validates manifest consistency, proof links, and claim/status coherence. + pub fn validate(&self) -> Result<(), String> { + let mut harness_ids = Vec::new(); + for harness in &self.harnesses { + if harness_ids.contains(&harness.id) { + return Err(format!( + "duplicate harness id `{}` in proof manifest", + harness.id + )); + } + harness_ids.push(harness.id); + } + + let mut claim_components = Vec::new(); + for claim in &self.claims { + if claim_components.contains(&claim.component) { + return Err(format!( + "duplicate claim component `{}` in proof manifest", + claim.component + )); + } + claim_components.push(claim.component); + + for link in claim.links { + if !harness_ids.contains(link) { + return Err(format!( + "claim `{}` references unknown proof id `{link}`", + claim.component + )); + } + } + + let has_kani = claim.links.iter().any(|link| { + self.harnesses + .iter() + .any(|harness| harness.id == *link && harness.kind == HarnessKind::Kani) + }); + let has_verus = claim.links.iter().any(|link| { + self.harnesses + .iter() + .any(|harness| harness.id == *link && harness.kind == HarnessKind::Verus) + }); + + match claim.status { + ProofStatus::Refined => { + if !has_kani || !has_verus { + return Err(format!( + "refined claim `{}` must link both Kani and Verus proofs", + claim.component + )); + } + } + ProofStatus::Checked => { + if !has_kani || has_verus { + return Err(format!( + "checked claim `{}` must link Kani proofs only", + claim.component + )); + } + } + ProofStatus::Model => { + if !has_verus || has_kani { + return Err(format!( + "model claim `{}` must link Verus proofs only", + claim.component + )); + } + } + ProofStatus::Runtime | ProofStatus::OutOfScope => { + if has_kani || has_verus { + return Err(format!( + "{} claim `{}` must not link formal proof ids", + match claim.status { + ProofStatus::Runtime => "runtime", + ProofStatus::OutOfScope => "out_of_scope", + _ => unreachable!(), + }, + claim.component + )); + } + } + } + } + + Ok(()) + } +} + +fn parse_links(raw: &'static str) -> &'static [&'static str] { + let links: Vec<&'static str> = raw + .split(',') + .map(str::trim) + .filter(|link| !link.is_empty()) + .collect(); + Box::leak(links.into_boxed_slice()) +} + +#[cfg(test)] +mod tests { + use super::{ProofStatus, VerificationManifest}; + + #[test] + fn manifest_is_valid() { + VerificationManifest::current().validate().unwrap(); + } + + #[test] + fn rendered_claims_include_refined_section() { + let rendered = VerificationManifest::current().render_claim_markdown(); + assert!(rendered.contains(ProofStatus::Refined.heading())); + } + + #[test] + fn checked_claims_require_kani_only_links() { + let manifest = VerificationManifest::parse( + "kani|k|default|k\nverus|v|proofs/verus/core_model.rs\nclaim|checked|engine.bad|bad claim|v\n", + ); + let error = manifest.validate().unwrap_err(); + assert!(error.contains("must link Kani proofs only")); + } + + #[test] + fn runtime_claims_reject_formal_links() { + let manifest = + VerificationManifest::parse("kani|k|default|k\nclaim|runtime|engine.bad|bad claim|k\n"); + let error = manifest.validate().unwrap_err(); + assert!(error.contains("must not link formal proof ids")); + } +} diff --git a/src/proof/mod.rs b/src/proof/mod.rs new file mode 100644 index 0000000..a0cf6de --- /dev/null +++ b/src/proof/mod.rs @@ -0,0 +1,112 @@ +//! Proof-facing manifests, model/refinement traits, and reusable harness helpers. + +#[macro_use] +mod macros; + +pub mod liveness; +pub mod manifest; +pub mod model; +pub mod refinement; + +use crate::buffer::Buffer; +use crate::game::Game; +use crate::types::{ReplayStep, Seed}; + +/// Rendered proof claim matrix generated from the current manifest. +pub const PROOF_CLAIM: &str = include_str!("../../proofs/claim.md"); +/// Raw proof manifest used to drive Kani, Verus, and claim reporting. +pub const PROOF_MANIFEST_RAW: &str = include_str!("../../proofs/manifest.txt"); + +pub use crate::verification::{ + assert_compact_roundtrip, assert_observation_contracts, assert_transition_contracts, +}; +pub use liveness::{ + FairnessWitness, FiniteSupportOutcome, ProbabilisticWitness, TerminationWitness, + assert_finite_support_is_valid, assert_ranked_progress, +}; +pub use manifest::{ + ManifestAssumption, ManifestClaim, ManifestHarness, ProofStatus, VerificationManifest, +}; +pub use model::{ModelGame, RefinementWitness, SafetyWitness, VerifiedGame}; +pub use refinement::{ + assert_model_init_refinement, assert_model_observation_refinement, + assert_model_replay_refinement, assert_model_step_refinement, +}; + +/// Returns the parsed proof manifest for this crate. +pub fn verification_manifest() -> &'static VerificationManifest { + VerificationManifest::current() +} + +/// Runs the historical generated-game safety surface checks. +pub fn assert_generated_game_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert_transition_contracts(game, state, actions, seed); + assert_observation_contracts(game, state); + if game.compact_spec().action_count > 0 + && let Some(first) = actions.as_slice().first() + { + assert_compact_roundtrip(game, &first.action); + } +} + +/// Runs the strengthened safety/init/step proof surface for an explicitly verified game. +pub fn assert_verified_game_safety_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + params: &G::Params, + seed: Seed, +) { + assert_generated_game_surface(game, state, actions, seed); + assert_model_init_refinement(game, seed, params); + assert_model_observation_refinement(game, state); + assert_model_step_refinement(game, state, actions, seed); +} + +/// Runs the replay/rewind refinement surface for an explicitly verified game. +pub fn assert_verified_game_replay_surface( + game: G, + params: G::Params, + seed: Seed, + trace: &[G::JointActionBuf], +) where + G: VerifiedGame + Clone, + ReplayStep: Default, +{ + assert_model_replay_refinement(game, seed, params, trace); +} + +/// Runs the ranking-based liveness surface for a verified game. +pub fn assert_verified_termination_surface( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert_ranked_progress(game, state, actions, seed); +} + +/// Runs the finite-support stochastic surface for a verified game. +pub fn assert_verified_probabilistic_surface( + game: &G, + state: &G::ModelState, + actions: &G::JointActionBuf, +) { + assert_finite_support_is_valid(game, state, actions); +} + +/// Backwards-compatible alias for the safety/init/step surface. +pub fn assert_verified_game_surface( + game: &G, + state: &G::State, + actions: &G::JointActionBuf, + params: &G::Params, + seed: Seed, +) { + assert_verified_game_safety_surface(game, state, actions, params, seed); +} diff --git a/src/proof/model.rs b/src/proof/model.rs new file mode 100644 index 0000000..1fae85f --- /dev/null +++ b/src/proof/model.rs @@ -0,0 +1,141 @@ +//! Proof traits that separate runtime semantics from executable reference models. + +use core::fmt::Debug; + +use crate::compact::CompactSpec; +use crate::game::Game; +use crate::rng::DeterministicRng; +use crate::types::{PlayerId, Seed, StepOutcome}; + +/// Safety contracts lifted out of the runtime trait surface. +pub trait SafetyWitness: Game { + /// Returns whether the runtime state satisfies the game's safety invariant. + fn safety_state_invariant(&self, state: &Self::State) -> bool { + self.state_invariant(state) + } + + /// Returns whether an action value is valid for safety-oriented proofs. + fn safety_action_invariant(&self, action: &Self::Action) -> bool { + self.action_invariant(action) + } + + /// Returns whether a player-facing observation satisfies the declared invariant. + fn safety_player_observation_invariant( + &self, + state: &Self::State, + player: PlayerId, + observation: &Self::Obs, + ) -> bool { + self.player_observation_invariant(state, player, observation) + } + + /// Returns whether a spectator observation satisfies the declared invariant. + fn safety_spectator_observation_invariant( + &self, + state: &Self::State, + observation: &Self::Obs, + ) -> bool { + self.spectator_observation_invariant(state, observation) + } + + /// Returns whether the world view satisfies the declared invariant. + fn safety_world_view_invariant(&self, state: &Self::State, world: &Self::WorldView) -> bool { + self.world_view_invariant(state, world) + } + + /// Returns whether the step satisfied the declared transition postcondition. + fn safety_transition_postcondition( + &self, + pre: &Self::State, + actions: &Self::JointActionBuf, + post: &Self::State, + outcome: &StepOutcome, + ) -> bool { + self.transition_postcondition(pre, actions, post, outcome) + } +} + +impl SafetyWitness for T {} + +/// Executable reference semantics for a runtime `Game` implementation. +pub trait ModelGame: Game { + /// Model state used by refinement and liveness proofs. + type ModelState: Clone + Debug + Eq + PartialEq; + /// Model observation used by refinement and liveness proofs. + type ModelObs: Clone + Debug + Eq + PartialEq; + /// Model world view used by refinement and liveness proofs. + type ModelWorldView: Clone + Debug + Eq + PartialEq; + + /// Initializes the model state for a seed and parameter set. + fn model_init_with_params(&self, seed: Seed, params: &Self::Params) -> Self::ModelState; + /// Returns whether the model state is terminal. + fn model_is_terminal(&self, state: &Self::ModelState) -> bool; + /// Collects the model players that must act from the given state. + fn model_players_to_act(&self, state: &Self::ModelState, out: &mut Self::PlayerBuf); + /// Collects the legal actions for one player in the given model state. + fn model_legal_actions( + &self, + state: &Self::ModelState, + player: PlayerId, + out: &mut Self::ActionBuf, + ); + /// Returns the player-facing observation for the model state. + fn model_observe_player(&self, state: &Self::ModelState, player: PlayerId) -> Self::ModelObs; + /// Returns the spectator observation for the model state. + fn model_observe_spectator(&self, state: &Self::ModelState) -> Self::ModelObs; + /// Returns the world view for the model state. + fn model_world_view(&self, state: &Self::ModelState) -> Self::ModelWorldView; + /// Applies one model transition in place using the same action/rng surface as runtime. + fn model_step_in_place( + &self, + state: &mut Self::ModelState, + actions: &Self::JointActionBuf, + rng: &mut DeterministicRng, + out: &mut StepOutcome, + ); + + /// Returns the compact encoding contract for the given parameters. + fn model_compact_spec_for_params(&self, params: &Self::Params) -> CompactSpec { + self.compact_spec_for_params(params) + } +} + +/// Refinement witness between runtime values and executable model values. +pub trait RefinementWitness: ModelGame + SafetyWitness { + /// Projects a runtime state into the proof model. + fn runtime_state_to_model(&self, state: &Self::State) -> Self::ModelState; + /// Projects a runtime observation into the proof model. + fn runtime_observation_to_model(&self, observation: &Self::Obs) -> Self::ModelObs; + /// Projects a runtime world view into the proof model. + fn runtime_world_view_to_model(&self, world: &Self::WorldView) -> Self::ModelWorldView; + + /// Returns whether the runtime state matches the provided model state. + fn state_refines_model(&self, state: &Self::State, model: &Self::ModelState) -> bool { + self.runtime_state_to_model(state) == *model + } + + /// Returns whether the runtime observation matches the provided model observation. + fn observation_refines_model(&self, observation: &Self::Obs, model: &Self::ModelObs) -> bool { + self.runtime_observation_to_model(observation) == *model + } + + /// Returns whether the runtime world view matches the provided model world view. + fn world_view_refines_model( + &self, + world: &Self::WorldView, + model: &Self::ModelWorldView, + ) -> bool { + self.runtime_world_view_to_model(world) == *model + } + + /// Returns whether the runtime compact schema matches the model compact schema. + fn compact_spec_refines_model(&self, params: &Self::Params) -> bool { + self.compact_spec_for_params(params) == self.model_compact_spec_for_params(params) + } +} + +/// Explicit marker for games that opt into the stronger proof/refinement surface. +/// +/// This is intentionally not blanket-implemented: a game should opt in only after +/// its verification surface and manifest claim are deliberate. +pub trait VerifiedGame: RefinementWitness {} diff --git a/src/proof/refinement.rs b/src/proof/refinement.rs new file mode 100644 index 0000000..423f793 --- /dev/null +++ b/src/proof/refinement.rs @@ -0,0 +1,173 @@ +//! Helpers that compare runtime game behavior against executable model semantics. + +use crate::buffer::Buffer; +use crate::proof::model::RefinementWitness; +use crate::rng::DeterministicRng; +use crate::session::{FixedHistory, SessionKernel}; +use crate::types::{ReplayStep, Seed, StepOutcome}; + +/// Checks that runtime initialization agrees with the executable proof model. +pub fn assert_model_init_refinement( + game: &G, + seed: Seed, + params: &G::Params, +) { + let state = game.init_with_params(seed, params); + let model = game.model_init_with_params(seed, params); + assert!(game.safety_state_invariant(&state)); + assert!(game.state_refines_model(&state, &model)); + assert_eq!(game.is_terminal(&state), game.model_is_terminal(&model)); + assert!(game.compact_spec_refines_model(params)); + + let mut runtime_players = G::PlayerBuf::default(); + let mut model_players = G::PlayerBuf::default(); + game.players_to_act(&state, &mut runtime_players); + game.model_players_to_act(&model, &mut model_players); + assert_eq!(runtime_players, model_players); + + for player in 0..game.player_count() { + let mut runtime_actions = G::ActionBuf::default(); + let mut model_actions = G::ActionBuf::default(); + game.legal_actions(&state, player, &mut runtime_actions); + game.model_legal_actions(&model, player, &mut model_actions); + assert_eq!(runtime_actions, model_actions); + } +} + +/// Checks that runtime observations and world views agree with the proof model. +pub fn assert_model_observation_refinement(game: &G, state: &G::State) { + let model = game.runtime_state_to_model(state); + for player in 0..game.player_count() { + let observation = game.observe_player(state, player); + let model_observation = game.model_observe_player(&model, player); + assert!(game.safety_player_observation_invariant(state, player, &observation)); + assert!(game.observation_refines_model(&observation, &model_observation)); + } + + let spectator = game.observe_spectator(state); + let model_spectator = game.model_observe_spectator(&model); + assert!(game.safety_spectator_observation_invariant(state, &spectator)); + assert!(game.observation_refines_model(&spectator, &model_spectator)); + + let world = game.world_view(state); + let model_world = game.model_world_view(&model); + assert!(game.safety_world_view_invariant(state, &world)); + assert!(game.world_view_refines_model(&world, &model_world)); +} + +/// Checks that one runtime transition agrees with the executable proof model. +pub fn assert_model_step_refinement( + game: &G, + pre: &G::State, + actions: &G::JointActionBuf, + seed: Seed, +) { + assert!(game.safety_state_invariant(pre)); + for action in actions.as_slice() { + assert!(game.safety_action_invariant(&action.action)); + } + + let mut runtime_state = pre.clone(); + let mut model_state = game.runtime_state_to_model(pre); + let mut runtime_rng = DeterministicRng::from_seed_and_stream(seed, 99); + let mut model_rng = runtime_rng; + let mut runtime_outcome = StepOutcome::::default(); + let mut model_outcome = StepOutcome::::default(); + + game.step_in_place( + &mut runtime_state, + actions, + &mut runtime_rng, + &mut runtime_outcome, + ); + game.model_step_in_place( + &mut model_state, + actions, + &mut model_rng, + &mut model_outcome, + ); + + assert_eq!(runtime_rng, model_rng); + assert_eq!(runtime_outcome, model_outcome); + assert!(game.safety_state_invariant(&runtime_state)); + assert!(game.state_refines_model(&runtime_state, &model_state)); + assert_model_observation_refinement(game, &runtime_state); + assert!(game.safety_transition_postcondition(pre, actions, &runtime_state, &runtime_outcome,)); +} + +/// Checks that session replay/rewind semantics agree with repeated model execution. +pub fn assert_model_replay_refinement( + game: G, + seed: Seed, + params: G::Params, + trace: &[G::JointActionBuf], +) where + G: RefinementWitness + Clone, + ReplayStep: Default, +{ + type ProofHistory = FixedHistory; + + let mut session = + SessionKernel::>::new_with_params(game.clone(), seed, params.clone()); + let mut model_state = game.model_init_with_params(seed, ¶ms); + let mut model_rng = DeterministicRng::from_seed_and_stream(seed, 1); + + for actions in trace { + if session.is_terminal() { + break; + } + let outcome = session.step_with_joint_actions(actions).clone(); + let mut model_outcome = StepOutcome::::default(); + game.model_step_in_place( + &mut model_state, + actions, + &mut model_rng, + &mut model_outcome, + ); + model_outcome.tick = session.current_tick(); + assert_eq!(outcome, model_outcome); + assert_eq!(session.rng(), model_rng); + assert!(game.state_refines_model(session.state(), &model_state)); + assert_model_observation_refinement(&game, session.state()); + + let recorded = &session.trace().steps[(session.current_tick() - 1) as usize]; + assert_eq!(recorded.tick, outcome.tick); + assert_eq!(&recorded.actions, actions); + assert_eq!(&recorded.rewards, &outcome.rewards); + assert_eq!(recorded.termination, outcome.termination); + } + + let executed_ticks = session.trace().len() as u64; + let mut target_tick = 0u64; + while target_tick <= executed_ticks { + let restored_state = session + .state_at(target_tick) + .expect("recorded tick must be restorable"); + let fork = session + .fork_at(target_tick) + .expect("recorded tick must produce a rewound fork"); + let mut replay_state = game.model_init_with_params(seed, ¶ms); + let mut replay_rng = DeterministicRng::from_seed_and_stream(seed, 1); + let mut replay_tick = 0usize; + while replay_tick < target_tick as usize { + let mut replay_outcome = StepOutcome::::default(); + game.model_step_in_place( + &mut replay_state, + &trace[replay_tick], + &mut replay_rng, + &mut replay_outcome, + ); + replay_tick += 1; + } + + assert!(game.safety_state_invariant(&restored_state)); + assert!(game.state_refines_model(&restored_state, &replay_state)); + assert_model_observation_refinement(&game, &restored_state); + assert_eq!(fork.current_tick(), target_tick); + assert_eq!(*fork.state(), restored_state); + assert_eq!(fork.rng(), replay_rng); + assert_model_observation_refinement(&game, fork.state()); + + target_tick += 1; + } +} diff --git a/src/registry/mod.rs b/src/registry/mod.rs new file mode 100644 index 0000000..eacbe5c --- /dev/null +++ b/src/registry/mod.rs @@ -0,0 +1,134 @@ +//! Static registry describing builtin games and policy metadata. + +/// Policy metadata surfaced by CLI and UI. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct PolicyDescriptor { + /// Stable policy identifier. + pub name: &'static str, + /// Human-facing policy description. + pub description: &'static str, +} + +/// Control prompt metadata for interactive play. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ControlMap { + /// Human input prompt shown by the CLI. + pub prompt: &'static str, +} + +/// Full static descriptor for one builtin game. +#[derive(Clone, Copy, Debug)] +pub struct GameDescriptor { + /// Stable external game name. + pub name: &'static str, + /// CLI runner callback used by descriptor-driven dispatch. + #[cfg(feature = "cli")] + pub(crate) runner: fn(crate::cli::CliConfig, crate::cli::RunMode) -> Result<(), String>, + /// Optional controls metadata for interactive frontends. + pub controls: Option<&'static ControlMap>, + /// True when the default renderer supports this game. + pub default_renderer: bool, + /// True when the physics renderer supports this game. + pub physics_renderer: bool, + /// Supported policy descriptors. + pub policies: &'static [PolicyDescriptor], +} + +const STANDARD_POLICIES: [PolicyDescriptor; 4] = [ + PolicyDescriptor { + name: "human", + description: "Interactive stdin policy", + }, + PolicyDescriptor { + name: "random", + description: "Uniform random legal actions", + }, + PolicyDescriptor { + name: "first", + description: "Always pick the first legal action", + }, + PolicyDescriptor { + name: "script:", + description: "Comma-separated deterministic action script", + }, +]; + +const TICTACTOE_CONTROLS: ControlMap = ControlMap { + prompt: "choose move [0-8]", +}; +const BLACKJACK_CONTROLS: ControlMap = ControlMap { + prompt: "choose action [hit/stand]", +}; +#[cfg(feature = "physics")] +const PLATFORMER_CONTROLS: ControlMap = ControlMap { + prompt: "choose action [stay/left/right/jump]", +}; + +/// Returns all builtin game descriptors enabled for the current feature set. +pub fn all_games() -> &'static [GameDescriptor] { + #[cfg(feature = "physics")] + { + static GAMES: [GameDescriptor; 3] = [ + GameDescriptor { + name: "tictactoe", + #[cfg(feature = "cli")] + runner: crate::cli::run_tictactoe, + controls: Some(&TICTACTOE_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + name: "blackjack", + #[cfg(feature = "cli")] + runner: crate::cli::run_blackjack, + controls: Some(&BLACKJACK_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + name: "platformer", + #[cfg(feature = "cli")] + runner: crate::cli::run_platformer, + controls: Some(&PLATFORMER_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: cfg!(feature = "render"), + policies: &STANDARD_POLICIES, + }, + ]; + &GAMES + } + + #[cfg(not(feature = "physics"))] + { + static GAMES: [GameDescriptor; 2] = [ + GameDescriptor { + name: "tictactoe", + #[cfg(feature = "cli")] + runner: crate::cli::run_tictactoe, + controls: Some(&TICTACTOE_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + GameDescriptor { + name: "blackjack", + #[cfg(feature = "cli")] + runner: crate::cli::run_blackjack, + controls: Some(&BLACKJACK_CONTROLS), + default_renderer: cfg!(feature = "render"), + physics_renderer: false, + policies: &STANDARD_POLICIES, + }, + ]; + &GAMES + } +} + +/// Finds a builtin game descriptor by stable name. +pub fn find_game(name: &str) -> Option<&'static GameDescriptor> { + all_games() + .iter() + .find(|descriptor| descriptor.name == name) +} diff --git a/src/render/builtin.rs b/src/render/builtin.rs index 9abe59d..1e440cf 100644 --- a/src/render/builtin.rs +++ b/src/render/builtin.rs @@ -1,12 +1,14 @@ +//! Builtin presenters for builtin environments. + use winit::event::{ElementState, MouseButton, WindowEvent}; use winit::keyboard::{KeyCode, PhysicalKey}; -use crate::games::{ +use crate::builtin::{ Blackjack, BlackjackAction, BlackjackObservation, BlackjackPhase, TicTacToe, TicTacToeAction, TicTacToeCell, TicTacToeObservation, }; #[cfg(feature = "physics")] -use crate::games::{Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation}; +use crate::builtin::{Platformer, PlatformerAction, PlatformerConfig, PlatformerObservation}; #[cfg(feature = "physics")] use crate::physics::PhysicsOracleView2d; @@ -29,6 +31,7 @@ const DANGER: Color = Color::from_rgb8(248, 113, 113); const TEXT: Color = Color::from_rgb8(241, 245, 249); const MUTED: Color = Color::from_rgb8(148, 163, 184); +/// Observation presenter for tic-tac-toe. #[derive(Clone, Copy, Debug, Default)] pub struct TicTacToePresenter { cursor: Point2, @@ -224,6 +227,7 @@ impl Presenter for TicTacToePresenter { impl ObservationPresenter for TicTacToePresenter {} +/// Observation presenter for blackjack. #[derive(Clone, Copy, Debug, Default)] pub struct BlackjackPresenter { cursor: Point2, @@ -393,8 +397,10 @@ impl Presenter for BlackjackPresenter { impl ObservationPresenter for BlackjackPresenter {} #[cfg(feature = "physics")] +/// Observation presenter for platformer. #[derive(Clone, Copy, Debug)] pub struct PlatformerPresenter { + /// Platformer configuration used for scene scaling. pub config: PlatformerConfig, cursor: Point2, left_held: bool, @@ -556,6 +562,7 @@ impl Presenter for PlatformerPresenter { impl ObservationPresenter for PlatformerPresenter {} #[cfg(feature = "physics")] +/// Oracle/world presenter for platformer physics debugging. #[derive(Clone, Copy, Debug, Default)] pub struct PlatformerPhysicsPresenter { inner: PlatformerPresenter, @@ -563,6 +570,7 @@ pub struct PlatformerPhysicsPresenter { #[cfg(feature = "physics")] impl PlatformerPhysicsPresenter { + /// Creates a physics presenter with explicit platformer config. pub fn new(config: PlatformerConfig) -> Self { Self { inner: PlatformerPresenter { @@ -793,7 +801,7 @@ mod tests { use super::{ BlackjackPresenter, PlatformerPhysicsPresenter, PlatformerPresenter, TicTacToePresenter, }; - use crate::games::{Blackjack, Platformer, TicTacToe}; + use crate::builtin::{Blackjack, Platformer, TicTacToe}; use crate::render::{ FrameMetrics, Presenter, RealtimeDriver, RenderGameView, Scene2d, TickDriver, TurnBasedDriver, @@ -801,7 +809,7 @@ mod tests { use crate::session::Session; type TicTacToeDriver = - TurnBasedDriver>; + TurnBasedDriver>; fn tictactoe_view() -> (TicTacToeDriver, FrameMetrics) { ( @@ -846,7 +854,7 @@ mod tests { #[test] fn platformer_presenters_emit_geometry() { let session = Session::new(Platformer::default(), 1); - let driver = RealtimeDriver::new(session, crate::games::PlatformerAction::Stay); + let driver = RealtimeDriver::new(session, crate::builtin::PlatformerAction::Stay); let metrics = FrameMetrics { width: 1180, height: 620, @@ -856,7 +864,7 @@ mod tests { let view = RenderGameView::from_cache(driver.session().game(), &cache); let mut observation_presenter = PlatformerPresenter::default(); let mut oracle_presenter = - PlatformerPhysicsPresenter::new(crate::games::PlatformerConfig::default()); + PlatformerPhysicsPresenter::new(crate::builtin::PlatformerConfig::default()); let mut observation_scene = Scene2d::default(); let mut oracle_scene = Scene2d::default(); observation_presenter.populate_scene(&mut observation_scene, metrics, &view); diff --git a/src/render/mod.rs b/src/render/mod.rs index 0588a48..f8a6d04 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -1,8 +1,10 @@ +//! Rendering subsystem exposing scene commands, presenters, and runtime loop. + mod pacer; mod runtime; mod scene; -#[cfg(feature = "builtin-games")] +#[cfg(feature = "builtin")] pub mod builtin; pub use pacer::TickPacer; diff --git a/src/render/pacer.rs b/src/render/pacer.rs index 3e3ae02..6cd263b 100644 --- a/src/render/pacer.rs +++ b/src/render/pacer.rs @@ -1,5 +1,8 @@ +//! Wall-clock to simulation-tick pacing helper. + use std::time::Instant; +/// Converts wall-clock frame deltas into bounded simulation tick counts. #[derive(Clone, Debug)] pub struct TickPacer { tick_period_seconds: f64, diff --git a/src/render/runtime.rs b/src/render/runtime.rs index 47d7fd6..68a0437 100644 --- a/src/render/runtime.rs +++ b/src/render/runtime.rs @@ -1,3 +1,5 @@ +//! Runtime renderer abstractions, drivers, and native window integration. + use std::fmt; #[cfg(not(target_arch = "wasm32"))] use std::mem; @@ -71,20 +73,31 @@ fn fs_main(input: VertexOutput) -> @location(0) vec4 { } "#; +/// Presentation mode used by render presenters. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum RenderMode { + /// Player-observation-oriented presentation. Observation, + /// Full oracle/world-view presentation. OracleWorld, } +/// Renderer timing and window configuration. #[derive(Clone, Copy, Debug)] pub struct RenderConfig { + /// Target simulation tick rate. pub tick_rate_hz: f64, + /// Maximum simulation ticks processed per frame. pub max_catch_up_ticks: usize, + /// Enables display vsync when true. pub vsync: bool, + /// Enables debug overlay panel. pub show_debug_overlay: bool, + /// Presenter mode selector. pub mode: RenderMode, + /// Initial window width in pixels. pub window_width: u32, + /// Initial window height in pixels. pub window_height: u32, } @@ -102,39 +115,58 @@ impl Default for RenderConfig { } } +/// Per-frame viewport metrics supplied to presenters. #[derive(Clone, Copy, Debug, Default)] pub struct FrameMetrics { + /// Drawable width in pixels. pub width: u32, + /// Drawable height in pixels. pub height: u32, + /// Platform scale factor. pub scale_factor: f64, } +/// Action stream command consumed by runtime drivers. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ActionCommand { + /// Submit a one-shot action for the next tick. Pulse(A), + /// Set a continuous action held across ticks. SetContinuous(A), + /// Clear the current continuous action. ClearContinuous, } +/// Sink for presenter-generated input commands. pub trait ActionSink { + /// Submits an input command to the driver. fn submit_command(&mut self, command: ActionCommand); } +/// Simulation driver interface consumed by the renderer. pub trait TickDriver { + /// History backend used by the underlying session. type History: HistoryStore; + /// Returns immutable access to the current session. fn session(&self) -> &SessionKernel; + /// Returns most recent transition outcome, if any. fn last_outcome(&self) -> Option<&StepOutcome>; + /// Advances simulation by up to `due_ticks`. fn advance_ticks(&mut self, due_ticks: usize); } +/// Presenter contract for translating game state into scene commands. pub trait Presenter { + /// Returns window title text. fn title(&self, game: &G) -> String; + /// Preferred initial window size. fn preferred_window_size(&self) -> (u32, u32) { (960, 640) } + /// Handles one window/input event. fn on_window_event( &mut self, event: &WindowEvent, @@ -143,6 +175,7 @@ pub trait Presenter { actions: &mut dyn ActionSink, ); + /// Populates scene commands for the current frame. fn populate_scene( &mut self, scene: &mut Scene2d, @@ -151,15 +184,17 @@ pub trait Presenter { ); } +/// Marker trait for observation-mode presenters. pub trait ObservationPresenter: Presenter {} +/// Marker trait for oracle/world-mode presenters. pub trait OraclePresenter: Presenter {} #[derive(Debug)] pub(crate) struct ViewCache { tick: Tick, - player_observation: G::PlayerObservation, - spectator_observation: G::SpectatorObservation, + player_observation: G::Obs, + spectator_observation: G::Obs, world_view: G::WorldView, previous_world_view: Option, last_outcome: Option>, @@ -197,6 +232,7 @@ impl ViewCache { } } +/// Read-only frame view combining game descriptor and cached session-derived data. pub struct RenderGameView<'a, G: Game> { game: &'a G, cache: &'a ViewCache, @@ -207,43 +243,53 @@ impl<'a, G: Game> RenderGameView<'a, G> { Self { game, cache } } + /// Returns game descriptor. pub fn game(&self) -> &'a G { self.game } + /// Returns current simulation tick. pub fn tick(&self) -> Tick { self.cache.tick } - pub fn player_observation(&self) -> &G::PlayerObservation { + /// Returns player-local observation. + pub fn player_observation(&self) -> &G::Obs { &self.cache.player_observation } - pub fn spectator_observation(&self) -> &G::SpectatorObservation { + /// Returns spectator observation. + pub fn spectator_observation(&self) -> &G::Obs { &self.cache.spectator_observation } + /// Returns world/oracle view. pub fn world_view(&self) -> &G::WorldView { &self.cache.world_view } + /// Returns previous world view when interpolation is active. pub fn previous_world_view(&self) -> Option<&G::WorldView> { self.cache.previous_world_view.as_ref() } + /// Returns most recent transition outcome. pub fn last_outcome(&self) -> Option<&StepOutcome> { self.cache.last_outcome.as_ref() } + /// Returns reward for `player` in the most recent outcome. pub fn reward_for(&self, player: usize) -> Reward { self.last_outcome() .map_or(0, |outcome| outcome.reward_for(player)) } + /// Returns whether current state is terminal. pub fn is_terminal(&self) -> bool { self.cache.is_terminal } + /// Returns interpolation alpha in `[0, 1]`. pub fn interpolation_alpha(&self) -> f32 { self.cache.interpolation_alpha } @@ -295,6 +341,7 @@ where scene } +/// Driver that advances only when explicit actions are provided. #[derive(Debug)] pub struct TurnBasedDriver> { session: SessionKernel, @@ -303,6 +350,7 @@ pub struct TurnBasedDriver> { } impl> TurnBasedDriver { + /// Creates a turn-based driver from a session. pub fn new(session: SessionKernel) -> Self { Self { session, @@ -348,6 +396,7 @@ impl> TickDriver for TurnBasedDriver { } } +/// Driver for realtime input with neutral and continuous actions. #[derive(Debug)] pub struct RealtimeDriver> { session: SessionKernel, @@ -358,6 +407,7 @@ pub struct RealtimeDriver> { } impl> RealtimeDriver { + /// Creates a realtime driver with a neutral fallback action. pub fn new(session: SessionKernel, neutral_action: G::Action) -> Self { Self { session, @@ -412,6 +462,7 @@ impl> TickDriver for RealtimeDriver { } } +/// Driver that advances using an internal policy, ignoring user input. #[derive(Debug)] pub struct PassivePolicyDriver, P: Policy> { session: SessionKernel, @@ -420,6 +471,7 @@ pub struct PassivePolicyDriver, P: Policy> { } impl, P: Policy> PassivePolicyDriver { + /// Creates a passive-policy driver. pub fn new(session: SessionKernel, policy: P) -> Self { Self { session, @@ -455,6 +507,7 @@ impl, P: Policy> TickDriver for PassivePolicyD } } +/// Error returned by native renderer setup or frame execution. #[derive(Debug)] pub struct RenderError { message: String, @@ -476,6 +529,7 @@ impl fmt::Display for RenderError { impl std::error::Error for RenderError {} +/// Top-level renderer application wrapper. pub struct RendererApp + ActionSink, P: Presenter> { config: RenderConfig, driver: D, @@ -484,6 +538,7 @@ pub struct RendererApp + ActionSink, P: Presenter + ActionSink, P: Presenter> RendererApp { + /// Creates a renderer application from config, driver, and presenter. pub fn new(config: RenderConfig, driver: D, presenter: P) -> Self { Self { config, @@ -498,6 +553,7 @@ impl + ActionSink, P: Presenter> RendererApp + ActionSink + 'static, P: Presenter + 'static> RendererApp { + /// Runs the native window event loop. pub fn run_native(self) -> Result<(), RenderError> { let event_loop = EventLoop::new().map_err(|error| RenderError::new(error.to_string()))?; let mut app = NativeApp::new(self.config, self.driver, self.presenter); @@ -509,6 +565,7 @@ impl + ActionSink + 'static, P: Presenter #[cfg(target_arch = "wasm32")] impl + ActionSink, P: Presenter> RendererApp { + /// Returns an error because native window rendering is unavailable on `wasm32`. pub fn run_native(self) -> Result<(), RenderError> { let RendererApp { config, @@ -619,6 +676,19 @@ struct WindowState + ActionSink, P: Presenter> { gpu: GpuState, } +#[cfg(not(target_arch = "wasm32"))] +#[derive(Default)] +struct QueuedActions { + commands: Vec>, +} + +#[cfg(not(target_arch = "wasm32"))] +impl ActionSink for QueuedActions { + fn submit_command(&mut self, command: ActionCommand) { + self.commands.push(command); + } +} + #[cfg(not(target_arch = "wasm32"))] impl + ActionSink, P: Presenter> WindowState { async fn new( @@ -661,28 +731,33 @@ impl + ActionSink, P: Presenter> WindowState::default(); + { + let view = RenderGameView::from_cache(self.driver.session().game(), &self.cache); + self.presenter + .on_window_event(event, metrics, &view, &mut queued); + } + for command in queued.commands { + self.driver.submit_command(command); + } self.request_redraw(); } @@ -699,11 +774,7 @@ impl + ActionSink, P: Presenter> WindowState, + text_order: Vec, + geometry_order: Vec, window: Arc, } @@ -891,6 +964,8 @@ impl GpuState { let text_renderer = TextRenderer::new(&mut atlas, &device, MultisampleState::default(), None); let text_buffers = Vec::with_capacity(16); + let text_order = Vec::with_capacity(16); + let geometry_order = Vec::with_capacity(128); surface_config.width = surface_config.width.max(1); surface_config.height = surface_config.height.max(1); @@ -912,6 +987,8 @@ impl GpuState { atlas, text_renderer, text_buffers, + text_order, + geometry_order, window, }) } @@ -1020,9 +1097,13 @@ impl GpuState { )); } - let mut texts = scene.texts.clone(); - texts.sort_by_key(|text| text.layer); - for (index, text) in texts.iter().enumerate() { + self.text_order.clear(); + self.text_order.extend(0..scene.texts.len()); + self.text_order + .sort_by_key(|&index| scene.texts[index].layer); + + for (index, text_index) in self.text_order.iter().copied().enumerate() { + let text = &scene.texts[text_index]; let buffer = &mut self.text_buffers[index]; *buffer = GlyphBuffer::new( &mut self.font_system, @@ -1044,12 +1125,11 @@ impl GpuState { } let mut text_areas = Vec::with_capacity(scene.texts.len()); - for (index, text) in texts.iter().enumerate() { - // SAFETY: - // Each loop iteration accesses a distinct buffer slot by index, so the returned mutable - // references do not alias each other while `text_areas` is alive for the immediate - // `prepare` call below. - let buffer = unsafe { &mut *self.text_buffers.as_mut_ptr().add(index) }; + for (buffer, text_index) in self.text_buffers[..scene.texts.len()] + .iter_mut() + .zip(self.text_order.iter().copied()) + { + let text = &scene.texts[text_index]; text_areas.push(TextArea { buffer, left: text.position.x, @@ -1081,29 +1161,48 @@ impl GpuState { fn prepare_geometry(&mut self, scene: &Scene2d) { self.staging_vertices.clear(); - let mut geometry = Vec::with_capacity( + self.geometry_order.clear(); + self.geometry_order.reserve( scene.panels.len() + scene.lines.len() + scene.circles.len() + scene.textured_quads.len(), ); - for panel in &scene.panels { - geometry.push(GeometryPrimitive::Panel(panel)); + + for (index, panel) in scene.panels.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: panel.layer, + kind: GeometryKind::Panel, + index, + }); } - for textured in &scene.textured_quads { - geometry.push(GeometryPrimitive::TexturedQuad(textured)); + for (index, textured) in scene.textured_quads.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: textured.layer, + kind: GeometryKind::TexturedQuad, + index, + }); } - for line in &scene.lines { - geometry.push(GeometryPrimitive::Line(line)); + for (index, line) in scene.lines.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: line.layer, + kind: GeometryKind::Line, + index, + }); } - for circle in &scene.circles { - geometry.push(GeometryPrimitive::Circle(circle)); + for (index, circle) in scene.circles.iter().enumerate() { + self.geometry_order.push(GeometryOrderEntry { + layer: circle.layer, + kind: GeometryKind::Circle, + index, + }); } - geometry.sort_by_key(GeometryPrimitive::layer); + self.geometry_order.sort_by_key(|entry| entry.layer); - for primitive in geometry { - match primitive { - GeometryPrimitive::Panel(panel) => { + for entry in &self.geometry_order { + match entry.kind { + GeometryKind::Panel => { + let panel = &scene.panels[entry.index]; push_rect( &mut self.staging_vertices, panel.rect, @@ -1122,7 +1221,8 @@ impl GpuState { ); } } - GeometryPrimitive::TexturedQuad(quad) => { + GeometryKind::TexturedQuad => { + let quad = &scene.textured_quads[entry.index]; // The render layer keeps the textured-quad command available for future sprite // pipelines. Until a texture atlas is bound, it degrades to a tinted panel. push_rect( @@ -1133,15 +1233,15 @@ impl GpuState { self.surface_config.height, ); } - GeometryPrimitive::Line(line) => push_line( + GeometryKind::Line => push_line( &mut self.staging_vertices, - *line, + scene.lines[entry.index], self.surface_config.width, self.surface_config.height, ), - GeometryPrimitive::Circle(circle) => push_circle( + GeometryKind::Circle => push_circle( &mut self.staging_vertices, - *circle, + scene.circles[entry.index], self.surface_config.width, self.surface_config.height, ), @@ -1166,23 +1266,20 @@ impl GpuState { } #[cfg(not(target_arch = "wasm32"))] -enum GeometryPrimitive<'a> { - Panel(&'a super::scene::PanelRegion), - TexturedQuad(&'a super::scene::TexturedQuad), - Line(&'a LineCommand), - Circle(&'a CircleCommand), +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +enum GeometryKind { + Panel, + TexturedQuad, + Line, + Circle, } #[cfg(not(target_arch = "wasm32"))] -impl GeometryPrimitive<'_> { - fn layer(&self) -> i32 { - match self { - Self::Panel(panel) => panel.layer, - Self::TexturedQuad(quad) => quad.layer, - Self::Line(line) => line.layer, - Self::Circle(circle) => circle.layer, - } - } +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +struct GeometryOrderEntry { + layer: i32, + kind: GeometryKind, + index: usize, } #[cfg(not(target_arch = "wasm32"))] @@ -1388,10 +1485,10 @@ mod tests { } impl Game for CounterGame { + type Params = (); type State = CounterState; type Action = u8; - type PlayerObservation = CounterState; - type SpectatorObservation = CounterState; + type Obs = CounterState; type WorldView = CounterState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -1407,7 +1504,7 @@ mod tests { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { CounterState::default() } @@ -1433,15 +1530,11 @@ mod tests { out.push(1).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } diff --git a/src/render/scene.rs b/src/render/scene.rs index 2d480e8..57f228c 100644 --- a/src/render/scene.rs +++ b/src/render/scene.rs @@ -1,27 +1,40 @@ +//! Immediate-mode 2D scene command structures used by the renderer. + +/// RGBA color in normalized `[0, 1]` channels. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Color { + /// Red channel. pub r: f32, + /// Green channel. pub g: f32, + /// Blue channel. pub b: f32, + /// Alpha channel. pub a: f32, } impl Color { + /// Opaque white color. pub const WHITE: Self = Self::rgba(1.0, 1.0, 1.0, 1.0); + /// Opaque black color. pub const BLACK: Self = Self::rgba(0.0, 0.0, 0.0, 1.0); + /// Creates an opaque RGB color. pub const fn rgb(r: f32, g: f32, b: f32) -> Self { Self::rgba(r, g, b, 1.0) } + /// Creates an RGBA color. pub const fn rgba(r: f32, g: f32, b: f32, a: f32) -> Self { Self { r, g, b, a } } + /// Creates an opaque color from 8-bit channels. pub const fn from_rgb8(r: u8, g: u8, b: u8) -> Self { Self::from_rgba8(r, g, b, 255) } + /// Creates a color from 8-bit channels. pub const fn from_rgba8(r: u8, g: u8, b: u8, a: u8) -> Self { Self { r: r as f32 / 255.0, @@ -32,27 +45,37 @@ impl Color { } } +/// 2D point in screen space. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Point2 { + /// X coordinate. pub x: f32, + /// Y coordinate. pub y: f32, } impl Point2 { + /// Creates a point. pub const fn new(x: f32, y: f32) -> Self { Self { x, y } } } +/// Axis-aligned rectangle in screen space. #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Rect { + /// Left coordinate. pub x: f32, + /// Top coordinate. pub y: f32, + /// Rectangle width. pub width: f32, + /// Rectangle height. pub height: f32, } impl Rect { + /// Creates a rectangle. pub const fn new(x: f32, y: f32, width: f32, height: f32) -> Self { Self { x, @@ -62,26 +85,32 @@ impl Rect { } } + /// Returns left edge. pub fn left(self) -> f32 { self.x } + /// Returns right edge. pub fn right(self) -> f32 { self.x + self.width } + /// Returns top edge. pub fn top(self) -> f32 { self.y } + /// Returns bottom edge. pub fn bottom(self) -> f32 { self.y + self.height } + /// Returns rectangle center. pub fn center(self) -> Point2 { Point2::new(self.x + self.width * 0.5, self.y + self.height * 0.5) } + /// Returns whether `point` lies inside the rectangle bounds. pub fn contains(self, point: Point2) -> bool { point.x >= self.left() && point.x <= self.right() @@ -90,68 +119,110 @@ impl Rect { } } +/// Handle to a texture resource. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct TextureHandle(pub u32); +/// Filled panel draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct PanelRegion { + /// Panel rectangle. pub rect: Rect, + /// Fill color. pub fill: Color, + /// Optional stroke `(color, thickness)`. pub stroke: Option<(Color, f32)>, + /// Layer ordering key. pub layer: i32, } +/// Thick line draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct LineCommand { + /// Start point. pub start: Point2, + /// End point. pub end: Point2, + /// Line thickness in pixels. pub thickness: f32, + /// Line color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Filled circle draw command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct CircleCommand { + /// Circle center. pub center: Point2, + /// Circle radius in pixels. pub radius: f32, + /// Fill color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Text draw command. #[derive(Clone, Debug, PartialEq)] pub struct FrameText { + /// Anchor position for text layout. pub position: Point2, + /// Text clipping/layout bounds. pub bounds: Rect, + /// UTF-8 content. pub content: String, + /// Font size in pixels. pub size: f32, + /// Text color. pub color: Color, + /// Layer ordering key. pub layer: i32, } +/// Textured rectangle command. #[derive(Clone, Copy, Debug, PartialEq)] pub struct TexturedQuad { + /// Destination rectangle. pub rect: Rect, + /// Source UV rectangle. pub uv_rect: Rect, + /// Texture handle. pub texture: TextureHandle, + /// Multiplicative tint color. pub tint: Color, + /// Layer ordering key. pub layer: i32, } +/// Input hit-test region metadata. #[derive(Clone, Copy, Debug, PartialEq)] pub struct HitRegion { + /// Stable region id. pub id: u64, + /// Hit-test bounds. pub rect: Rect, + /// Debug label. pub label: &'static str, } +/// Full frame scene command buffer. #[derive(Clone, Debug, PartialEq)] pub struct Scene2d { + /// Clear color for the frame. pub clear_color: Color, + /// Panel commands. pub panels: Vec, + /// Line commands. pub lines: Vec, + /// Circle commands. pub circles: Vec, + /// Text commands. pub texts: Vec, + /// Textured quad commands. pub textured_quads: Vec, + /// Hit regions for interaction logic. pub hit_regions: Vec, } @@ -162,6 +233,7 @@ impl Default for Scene2d { } impl Scene2d { + /// Creates a scene with explicit command-buffer capacities. pub fn with_capacities( panels: usize, lines: usize, @@ -181,6 +253,7 @@ impl Scene2d { } } + /// Clears all commands while preserving allocated capacities. pub fn clear(&mut self) { self.clear_color = Color::BLACK; self.panels.clear(); @@ -191,10 +264,12 @@ impl Scene2d { self.hit_regions.clear(); } + /// Sets frame clear color. pub fn set_clear_color(&mut self, color: Color) { self.clear_color = color; } + /// Enqueues a filled panel command. pub fn panel(&mut self, rect: Rect, fill: Color, stroke: Option<(Color, f32)>, layer: i32) { self.panels.push(PanelRegion { rect, @@ -204,6 +279,7 @@ impl Scene2d { }); } + /// Enqueues a thick line command. pub fn line(&mut self, start: Point2, end: Point2, thickness: f32, color: Color, layer: i32) { self.lines.push(LineCommand { start, @@ -214,6 +290,7 @@ impl Scene2d { }); } + /// Enqueues a filled circle command. pub fn circle(&mut self, center: Point2, radius: f32, color: Color, layer: i32) { self.circles.push(CircleCommand { center, @@ -223,6 +300,7 @@ impl Scene2d { }); } + /// Enqueues a text command. pub fn text( &mut self, position: Point2, @@ -242,6 +320,7 @@ impl Scene2d { }); } + /// Enqueues a textured-quad command. pub fn textured_quad( &mut self, rect: Rect, @@ -259,6 +338,7 @@ impl Scene2d { }); } + /// Registers a hit-test region. pub fn hit_region(&mut self, id: u64, rect: Rect, label: &'static str) { self.hit_regions.push(HitRegion { id, rect, label }); } diff --git a/src/rng.rs b/src/rng.rs index 8f5bc37..6376272 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -1,18 +1,23 @@ +//! Deterministic RNG primitives used across simulation and policy execution. + use crate::types::Seed; const ZERO_STATE_REPLACEMENT: u64 = 0xCAFEBABEDEADBEEF; const STREAM_XOR: u64 = 0x9E3779B97F4A7C15; +/// SplitMix64 mixer used to derive per-stream RNG states. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct SplitMix64 { state: u64, } impl SplitMix64 { + /// Creates a mixer from `seed`. pub const fn new(seed: Seed) -> Self { Self { state: seed } } + /// Advances the mixer and returns one 64-bit value. pub fn next_u64(&mut self) -> u64 { self.state = self.state.wrapping_add(STREAM_XOR); let mut z = self.state; @@ -22,6 +27,7 @@ impl SplitMix64 { } } +/// Deterministic xorshift-style RNG with stable stream forking. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct DeterministicRng { root_seed: Seed, @@ -35,10 +41,12 @@ impl Default for DeterministicRng { } impl DeterministicRng { + /// Creates an RNG from a root seed using stream id `0`. pub fn from_seed(seed: Seed) -> Self { Self::from_seed_and_stream(seed, 0) } + /// Creates an RNG from `seed` and stable `stream_id`. pub fn from_seed_and_stream(seed: Seed, stream_id: u64) -> Self { let mut mixer = SplitMix64::new(seed ^ stream_id.wrapping_mul(STREAM_XOR)); let state = sanitize_state(mixer.next_u64()); @@ -48,18 +56,22 @@ impl DeterministicRng { } } + /// Returns the root seed used to derive this RNG stream. pub const fn root_seed(self) -> Seed { self.root_seed } + /// Returns internal RNG state for reproducibility/testing. pub const fn raw_state(self) -> u64 { self.state } + /// Derives a sibling stream from the same root seed. pub fn fork(&self, stream_id: u64) -> Self { Self::from_seed_and_stream(self.root_seed, stream_id) } + /// Generates the next 64-bit random value. pub fn next_u64(&mut self) -> u64 { let mut x = self.state; x ^= x >> 12; @@ -69,6 +81,7 @@ impl DeterministicRng { x.wrapping_mul(0x2545F4914F6CDD1D) } + /// Samples uniformly in `[0, end)`. pub fn gen_range(&mut self, end: usize) -> usize { if end <= 1 { return 0; @@ -83,6 +96,7 @@ impl DeterministicRng { } } + /// Samples a Bernoulli outcome with probability `numerator / denominator`. pub fn gen_bool_ratio(&mut self, numerator: u64, denominator: u64) -> bool { debug_assert!(denominator > 0); if numerator == 0 { @@ -94,11 +108,13 @@ impl DeterministicRng { (self.next_u64() % denominator) < numerator } + /// Samples a floating-point number in `[0, 1)`. pub fn gen_unit_f64(&mut self) -> f64 { let value = self.next_u64() >> 11; (value as f64) * (1.0 / 9007199254740992.0) } + /// In-place Fisher-Yates shuffle using deterministic randomness. pub fn shuffle(&mut self, slice: &mut [T]) { for index in (1..slice.len()).rev() { let swap_index = self.gen_range(index + 1); diff --git a/src/session.rs b/src/session.rs index 985d082..12b4096 100644 --- a/src/session.rs +++ b/src/session.rs @@ -1,4 +1,7 @@ +//! Session kernel, history stores, and replay/rewind utilities. + use core::fmt::Debug; +use std::collections::VecDeque; use crate::buffer::{Buffer, default_array}; use crate::game::Game; @@ -6,18 +9,27 @@ use crate::policy::Policy; use crate::rng::DeterministicRng; use crate::types::{DynamicReplayTrace, PlayerAction, ReplayTrace, Seed, StepOutcome, Tick}; +/// Saved checkpoint used by history implementations for rewind. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct HistorySnapshot { + /// Tick represented by this snapshot. pub tick: Tick, + /// Cloned game state. pub state: S, + /// RNG state associated with `state`. pub rng: DeterministicRng, } +/// Storage backend for session traces and rewind snapshots. pub trait HistoryStore: Clone { + /// Trace representation emitted by this history backend. type Trace: Clone + Debug + Eq + PartialEq; + /// Creates a history store from initial session state. fn from_seed(seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng) -> Self; + /// Resets history to initial session state. fn reset(&mut self, seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng); + /// Records one transition and optional snapshot. fn record( &mut self, tick: Tick, @@ -26,20 +38,26 @@ pub trait HistoryStore: Clone { actions: &G::JointActionBuf, outcome: &StepOutcome, ); + /// Returns recorded transition count. fn len(&self) -> usize; + /// Returns whether no transitions are recorded. fn is_empty(&self) -> bool; + /// Returns immutable trace view. fn trace(&self) -> &Self::Trace; + /// Consumes history and returns owned trace. fn into_trace(self) -> Self::Trace; + /// Restores state/RNG at `target_tick` when available. fn restore(&self, game: &G, target_tick: Tick) -> Option<(G::State, DeterministicRng)>; } +/// Dynamically-sized history with bounded checkpoint deque. #[derive(Debug, Eq, PartialEq)] pub struct DynamicHistory { seed: Seed, initial_state: G::State, initial_rng: DeterministicRng, trace: DynamicReplayTrace, - snapshots: Vec>, + snapshots: VecDeque>, } impl Clone @@ -67,9 +85,9 @@ impl return; } if self.snapshots.len() == SNAPSHOTS { - self.snapshots.remove(0); + let _ = self.snapshots.pop_front(); } - self.snapshots.push(HistorySnapshot { + self.snapshots.push_back(HistorySnapshot { tick, state: state.clone(), rng, @@ -90,9 +108,9 @@ impl HistoryStore type Trace = DynamicReplayTrace; fn from_seed(seed: Seed, initial_state: &G::State, initial_rng: DeterministicRng) -> Self { - let mut snapshots = Vec::with_capacity(SNAPSHOTS); + let mut snapshots = VecDeque::with_capacity(SNAPSHOTS); if SNAPSHOTS > 0 { - snapshots.push(HistorySnapshot { + snapshots.push_back(HistorySnapshot { tick: 0, state: initial_state.clone(), rng: initial_rng, @@ -114,7 +132,7 @@ impl HistoryStore self.trace.clear(seed); self.snapshots.clear(); if SNAPSHOTS > 0 { - self.snapshots.push(HistorySnapshot { + self.snapshots.push_back(HistorySnapshot { tick: 0, state: initial_state.clone(), rng: initial_rng, @@ -179,6 +197,7 @@ impl HistoryStore } } +/// Fixed-capacity history with ring-buffer checkpoints. #[derive(Debug, Eq, Hash, PartialEq)] pub struct FixedHistory where @@ -333,9 +352,11 @@ where } } +/// Deterministic session kernel for stepping, tracing, and rewinding games. #[derive(Clone, Debug)] pub struct SessionKernel> { game: G, + params: G::Params, state: G::State, rng: DeterministicRng, tick: Tick, @@ -346,17 +367,32 @@ pub struct SessionKernel> { outcome: StepOutcome, } -pub type Session = SessionKernel>; +/// Default dynamic-history session alias. +pub type Session = SessionKernel>; +/// Interactive dynamic-history session alias. pub type InteractiveSession = SessionKernel>; impl> SessionKernel { + /// Creates a new session initialized from `seed`. pub fn new(game: G, seed: Seed) -> Self { - let state = game.init(seed); + let params = game.default_params(); + Self::new_with_params(game, seed, params) + } + + /// Creates a new session initialized from `seed` and explicit params. + pub fn new_with_params(game: G, seed: Seed, params: G::Params) -> Self { + assert!( + game.params_invariant(¶ms), + "invalid params for game `{}`", + game.name() + ); + let state = game.init_with_params(seed, ¶ms); assert!(game.state_invariant(&state)); let rng = DeterministicRng::from_seed_and_stream(seed, 1); let history = H::from_seed(seed, &state, rng); Self { game, + params, state, rng, tick: 0, @@ -368,8 +404,21 @@ impl> SessionKernel { } } + /// Resets session state and history to `seed`. pub fn reset(&mut self, seed: Seed) { - self.state = self.game.init(seed); + let params = self.params.clone(); + self.reset_with_params(seed, params); + } + + /// Resets session state/history to `seed` and updates active params. + pub fn reset_with_params(&mut self, seed: Seed, params: G::Params) { + assert!( + self.game.params_invariant(¶ms), + "invalid params for game `{}`", + self.game.name() + ); + self.params = params; + self.state = self.game.init_with_params(seed, &self.params); self.rng = DeterministicRng::from_seed_and_stream(seed, 1); self.tick = 0; self.history.reset(seed, &self.state, self.rng); @@ -379,78 +428,163 @@ impl> SessionKernel { self.outcome.clear(); } + /// Returns the game instance. pub fn game(&self) -> &G { &self.game } + /// Returns active parameter bundle used by resets and initial state creation. + pub fn params(&self) -> &G::Params { + &self.params + } + + /// Returns current game state. pub fn state(&self) -> &G::State { &self.state } + /// Returns current tick. pub fn current_tick(&self) -> Tick { self.tick } + /// Returns current RNG snapshot. pub fn rng(&self) -> DeterministicRng { self.rng } + /// Returns the active compact codec descriptor for current params. + pub fn compact_spec(&self) -> crate::compact::CompactSpec { + self.game.compact_spec_for_params(&self.params) + } + + /// Returns immutable trace view. pub fn trace(&self) -> &H::Trace { self.history.trace() } + /// Consumes session and returns owned trace. pub fn into_trace(self) -> H::Trace { self.history.into_trace() } + /// Returns whether current state is terminal. pub fn is_terminal(&self) -> bool { self.game.is_terminal(&self.state) } - pub fn player_observation(&self, player: usize) -> G::PlayerObservation { + /// Returns player-local observation. + pub fn player_observation(&self, player: usize) -> G::Obs { self.game.observe_player(&self.state, player) } - pub fn spectator_observation(&self) -> G::SpectatorObservation { + /// Returns spectator observation. + pub fn spectator_observation(&self) -> G::Obs { self.game.observe_spectator(&self.state) } + /// Returns world/debug view. pub fn world_view(&self) -> G::WorldView { self.game.world_view(&self.state) } + /// Returns legal actions for `player` in current state. pub fn legal_actions_for(&mut self, player: usize) -> &[G::Action] { self.game .legal_actions(&self.state, player, &mut self.legal_actions); self.legal_actions.as_slice() } - pub fn step(&mut self, actions: &[PlayerAction]) -> &StepOutcome { + #[inline(always)] + fn step_core(&mut self, actions: &G::JointActionBuf) { + assert!( + !self.game.is_terminal(&self.state), + "cannot step a terminal session", + ); + self.outcome.clear(); + self.game + .step_in_place(&mut self.state, actions, &mut self.rng, &mut self.outcome); + self.tick += 1; + self.outcome.tick = self.tick; + } + + #[inline(always)] + fn record_step(&mut self, actions: &G::JointActionBuf) { + self.history + .record(self.tick, &self.state, self.rng, actions, &self.outcome); + } + + fn collect_policy_actions(&mut self, policies: &mut [&mut dyn Policy]) { + self.players_to_act.clear(); + self.game + .players_to_act(&self.state, &mut self.players_to_act); + self.joint_actions.clear(); - self.joint_actions - .extend_from_slice(actions) - .expect("joint action buffer capacity exceeded"); - let joint_actions = self.joint_actions.clone(); - self.step_with_joint_actions(&joint_actions) + for &player in self.players_to_act.as_slice() { + self.game + .legal_actions(&self.state, player, &mut self.legal_actions); + let observation = self.game.observe_player(&self.state, player); + let policy = policies + .get_mut(player) + .expect("missing policy for active player"); + let action = policy.choose_action( + &self.game, + &self.state, + player, + &observation, + self.legal_actions.as_slice(), + &mut self.rng, + ); + self.joint_actions + .push(PlayerAction { player, action }) + .expect("joint action buffer capacity exceeded"); + } } - pub fn step_with_joint_actions( - &mut self, - actions: &G::JointActionBuf, - ) -> &StepOutcome { + #[inline(always)] + fn step_staged_joint_actions(&mut self) -> &StepOutcome { + assert!( + !self.game.is_terminal(&self.state), + "cannot step a terminal session", + ); + self.outcome.clear(); + self.game.step_in_place( + &mut self.state, + &self.joint_actions, + &mut self.rng, + &mut self.outcome, + ); + self.tick += 1; + self.outcome.tick = self.tick; + self.history.record( + self.tick, + &self.state, + self.rng, + &self.joint_actions, + &self.outcome, + ); + &self.outcome + } + + #[inline(always)] + fn step_staged_joint_actions_checked(&mut self) -> &StepOutcome { assert!( !self.game.is_terminal(&self.state), "cannot step a terminal session", ); assert!(self.game.state_invariant(&self.state)); - for action in actions.as_slice() { + for action in self.joint_actions.as_slice() { assert!(self.game.action_invariant(&action.action)); } let pre_state = self.state.clone(); self.outcome.clear(); - self.game - .step_in_place(&mut self.state, actions, &mut self.rng, &mut self.outcome); + self.game.step_in_place( + &mut self.state, + &self.joint_actions, + &mut self.rng, + &mut self.outcome, + ); self.tick += 1; self.outcome.tick = self.tick; @@ -471,49 +605,111 @@ impl> SessionKernel { } assert!(self.game.transition_postcondition( &pre_state, - actions, + &self.joint_actions, &self.state, &self.outcome )); - self.history - .record(self.tick, &self.state, self.rng, actions, &self.outcome); + self.history.record( + self.tick, + &self.state, + self.rng, + &self.joint_actions, + &self.outcome, + ); &self.outcome } - pub fn step_with_policies( + /// Steps using externally supplied action slice. + pub fn step(&mut self, actions: &[PlayerAction]) -> &StepOutcome { + self.joint_actions.clear(); + self.joint_actions + .extend_from_slice(actions) + .expect("joint action buffer capacity exceeded"); + self.step_staged_joint_actions() + } + + /// Steps using externally supplied action slice with contract checks. + pub fn step_checked( &mut self, - policies: &mut [&mut dyn Policy], + actions: &[PlayerAction], ) -> &StepOutcome { - self.players_to_act.clear(); - self.game - .players_to_act(&self.state, &mut self.players_to_act); self.joint_actions.clear(); + self.joint_actions + .extend_from_slice(actions) + .expect("joint action buffer capacity exceeded"); + self.step_staged_joint_actions_checked() + } - for &player in self.players_to_act.as_slice() { + /// Steps with prebuilt joint-action buffer. + #[inline(always)] + pub fn step_with_joint_actions( + &mut self, + actions: &G::JointActionBuf, + ) -> &StepOutcome { + self.step_core(actions); + self.record_step(actions); + &self.outcome + } + + /// Steps with contract checks enabled. + pub fn step_with_joint_actions_checked( + &mut self, + actions: &G::JointActionBuf, + ) -> &StepOutcome { + assert!(self.game.state_invariant(&self.state)); + for action in actions.as_slice() { + assert!(self.game.action_invariant(&action.action)); + } + + let pre_state = self.state.clone(); + self.step_core(actions); + + assert!(self.game.state_invariant(&self.state)); + let spectator = self.game.observe_spectator(&self.state); + assert!( self.game - .legal_actions(&self.state, player, &mut self.legal_actions); + .spectator_observation_invariant(&self.state, &spectator) + ); + let world = self.game.world_view(&self.state); + assert!(self.game.world_view_invariant(&self.state, &world)); + for player in 0..self.game.player_count() { let observation = self.game.observe_player(&self.state, player); - let policy = policies - .get_mut(player) - .expect("missing policy for active player"); - let action = policy.choose_action( - &self.game, - &self.state, - player, - &observation, - self.legal_actions.as_slice(), - &mut self.rng, + assert!( + self.game + .player_observation_invariant(&self.state, player, &observation) ); - self.joint_actions - .push(PlayerAction { player, action }) - .expect("joint action buffer capacity exceeded"); } + assert!(self.game.transition_postcondition( + &pre_state, + actions, + &self.state, + &self.outcome + )); + + self.record_step(actions); + &self.outcome + } + + /// Collects actions from policies and steps once. + pub fn step_with_policies( + &mut self, + policies: &mut [&mut dyn Policy], + ) -> &StepOutcome { + self.collect_policy_actions(policies); + self.step_staged_joint_actions() + } - let actions = self.joint_actions.clone(); - self.step_with_joint_actions(&actions) + /// Collects actions from policies and steps once with checks. + pub fn step_with_policies_checked( + &mut self, + policies: &mut [&mut dyn Policy], + ) -> &StepOutcome { + self.collect_policy_actions(policies); + self.step_staged_joint_actions_checked() } + /// Runs until terminal state or `max_ticks` is reached. pub fn run_until_terminal( &mut self, policies: &mut [&mut dyn Policy], @@ -525,6 +721,19 @@ impl> SessionKernel { self.trace() } + /// Runs checked stepping until terminal state or `max_ticks`. + pub fn run_until_terminal_checked( + &mut self, + policies: &mut [&mut dyn Policy], + max_ticks: usize, + ) -> &H::Trace { + while !self.is_terminal() && (self.tick as usize) < max_ticks { + self.step_with_policies_checked(policies); + } + self.trace() + } + + /// Rewinds session state to `target_tick` when restorable. pub fn rewind_to(&mut self, target_tick: Tick) -> bool { let Some((state, rng)) = self.history.restore(&self.game, target_tick) else { return false; @@ -536,16 +745,19 @@ impl> SessionKernel { true } + /// Alias of `rewind_to` for replay-oriented call sites. pub fn replay_to(&mut self, target_tick: Tick) -> bool { self.rewind_to(target_tick) } + /// Returns reconstructed state at `target_tick`. pub fn state_at(&self, target_tick: Tick) -> Option { self.history .restore(&self.game, target_tick) .map(|(state, _)| state) } + /// Returns a cloned session fork rewound to `target_tick`. pub fn fork_at(&self, target_tick: Tick) -> Option where G: Clone, @@ -573,10 +785,10 @@ mod tests { } impl Game for SpinnerGame { + type Params = (); type State = SpinnerState; type Action = u8; - type PlayerObservation = SpinnerState; - type SpectatorObservation = SpinnerState; + type Obs = SpinnerState; type WorldView = SpinnerState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -592,7 +804,7 @@ mod tests { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { SpinnerState { tick: 0 } } @@ -615,15 +827,11 @@ mod tests { out.push(0).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } @@ -689,10 +897,10 @@ mod proofs { } impl Game for CounterGame { + type Params = (); type State = CounterState; type Action = u8; - type PlayerObservation = CounterState; - type SpectatorObservation = CounterState; + type Obs = CounterState; type WorldView = CounterState; type PlayerBuf = FixedVec; type ActionBuf = FixedVec; @@ -708,7 +916,7 @@ mod proofs { 1 } - fn init(&self, _seed: Seed) -> Self::State { + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { CounterState { value: 0, terminal: false, @@ -737,15 +945,11 @@ mod proofs { out.push(1).unwrap(); } - fn observe_player( - &self, - state: &Self::State, - _player: PlayerId, - ) -> Self::PlayerObservation { + fn observe_player(&self, state: &Self::State, _player: PlayerId) -> Self::Obs { *state } - fn observe_spectator(&self, state: &Self::State) -> Self::SpectatorObservation { + fn observe_spectator(&self, state: &Self::State) -> Self::Obs { *state } diff --git a/src/types.rs b/src/types.rs index 2261059..4f5c271 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,43 +1,64 @@ +//! Core scalar types and replay data structures used across the engine. + use core::hash::{Hash, Hasher}; use crate::buffer::{Buffer, FixedVec}; +/// Scalar reward type used by games. pub type Reward = i64; +/// Monotonic simulation tick counter. pub type Tick = u64; +/// Stable player identifier within one game. pub type PlayerId = usize; +/// Deterministic seed type. pub type Seed = u64; +/// Reward assigned to one player for a single transition. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct PlayerReward { + /// Recipient player id. pub player: PlayerId, + /// Reward value for that player. pub reward: Reward, } +/// Action submitted by a specific player. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct PlayerAction { + /// Acting player id. pub player: PlayerId, + /// Concrete chosen action. pub action: A, } +/// Episode termination state after a step. #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub enum Termination { + /// Episode continues. #[default] Ongoing, + /// Episode reached a terminal state. Terminal { + /// Winner id for terminal outcomes, when applicable. winner: Option, }, } impl Termination { + /// Returns `true` when the outcome is terminal. pub const fn is_terminal(self) -> bool { matches!(self, Self::Terminal { .. }) } } +/// Output bundle from one transition. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct StepOutcome { + /// Tick at which this outcome was produced. pub tick: Tick, + /// Per-player rewards. pub rewards: R, + /// Termination state. pub termination: Termination, } @@ -58,12 +79,14 @@ impl StepOutcome where R: Buffer, { + /// Resets outcome to default ongoing state. pub fn clear(&mut self) { self.tick = 0; self.rewards.clear(); self.termination = Termination::Ongoing; } + /// Returns reward for `player`, or `0` when no entry exists. pub fn reward_for(&self, player: PlayerId) -> Reward { let rewards = self.rewards.as_slice(); let mut index = 0usize; @@ -77,25 +100,34 @@ where 0 } + /// Returns whether this outcome is terminal. pub fn is_terminal(&self) -> bool { self.termination.is_terminal() } } +/// One recorded replay step. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct ReplayStep { + /// Tick at which step was recorded. pub tick: Tick, + /// Joint action applied at `tick`. pub actions: JA, + /// Reward bundle emitted by the transition. pub rewards: R, + /// Termination state after the transition. pub termination: Termination, } +/// Fixed-capacity replay trace. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct ReplayTrace where ReplayStep: Default, { + /// Seed used to initialize the session. pub seed: Seed, + /// Recorded transition log. pub steps: FixedVec, LOG>, } @@ -103,6 +135,7 @@ impl ReplayTrace where ReplayStep: Default, { + /// Creates an empty trace initialized with `seed`. pub fn new(seed: Seed) -> Self { Self { seed, @@ -110,27 +143,34 @@ where } } + /// Clears the trace and updates seed metadata. pub fn clear(&mut self, seed: Seed) { self.seed = seed; self.steps.clear(); } + /// Returns number of recorded steps. pub fn len(&self) -> usize { self.steps.len() } + /// Returns whether no steps are recorded. pub fn is_empty(&self) -> bool { self.steps.is_empty() } } +/// Dynamically-sized replay trace. #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] pub struct DynamicReplayTrace { + /// Seed used to initialize the session. pub seed: Seed, + /// Recorded transition log. pub steps: Vec>, } impl DynamicReplayTrace { + /// Creates an empty dynamic trace. pub fn new(seed: Seed) -> Self { Self { seed, @@ -138,15 +178,18 @@ impl DynamicReplayTrace { } } + /// Clears the trace and updates seed metadata. pub fn clear(&mut self, seed: Seed) { self.seed = seed; self.steps.clear(); } + /// Returns number of recorded steps. pub fn len(&self) -> usize { self.steps.len() } + /// Returns whether no steps are recorded. pub fn is_empty(&self) -> bool { self.steps.is_empty() } @@ -157,6 +200,7 @@ where JA: Clone, R: Clone, { + /// Appends one replay step cloned from the given references. pub fn record(&mut self, tick: Tick, actions: &JA, rewards: &R, termination: Termination) { self.steps.push(ReplayStep { tick, @@ -172,6 +216,7 @@ where JA: Clone + Default, R: Clone + Default, { + /// Appends one replay step to the fixed-capacity log. pub fn record(&mut self, tick: Tick, actions: &JA, rewards: &R, termination: Termination) { self.steps .push(ReplayStep { @@ -213,6 +258,7 @@ impl Hasher for StableHasher { } } +/// Computes a stable 64-bit hash using an internal FNV-1a variant. pub fn stable_hash(value: &T) -> u64 { let mut hasher = StableHasher::new(); value.hash(&mut hasher); diff --git a/src/verification.rs b/src/verification.rs index b9ed0df..36441bc 100644 --- a/src/verification.rs +++ b/src/verification.rs @@ -1,9 +1,22 @@ +//! Runtime contract-check helpers for transitions, observations, and compact codecs. + use crate::buffer::Buffer; -use crate::compact::CompactGame; use crate::game::Game; use crate::rng::DeterministicRng; -use crate::types::{Seed, StepOutcome}; +use crate::types::{Reward, Seed, StepOutcome}; + +/// Returns true when a reward stays in range and terminal flags remain consistent. +pub fn reward_and_terminal_postcondition( + reward: Reward, + min_reward: Reward, + max_reward: Reward, + post_terminal: bool, + outcome_terminal: bool, +) -> bool { + (min_reward..=max_reward).contains(&reward) && (post_terminal == outcome_terminal) +} +/// Asserts deterministic transition and postcondition contracts for one step. pub fn assert_transition_contracts( game: &G, pre: &G::State, @@ -37,6 +50,7 @@ pub fn assert_transition_contracts( assert!(game.transition_postcondition(pre, actions, &left_state, &left_outcome,)); } +/// Asserts player, spectator, and world-view observation contracts. pub fn assert_observation_contracts(game: &G, state: &G::State) { assert!(game.state_invariant(state)); for player in 0..game.player_count() { @@ -49,7 +63,125 @@ pub fn assert_observation_contracts(game: &G, state: &G::State) { assert!(game.world_view_invariant(state, &world)); } -pub fn assert_compact_roundtrip(game: &G, action: &G::Action) { +/// Asserts compact action encoding roundtrips through decode. +pub fn assert_compact_roundtrip(game: &G, action: &G::Action) { + if game.compact_spec().action_count == 0 { + return; + } let encoded = game.encode_action(action); assert_eq!(game.decode_action(encoded), Some(*action)); } + +#[cfg(test)] +mod tests { + use super::assert_compact_roundtrip; + use crate::buffer::FixedVec; + use crate::compact::CompactSpec; + use crate::game::Game; + use crate::rng::DeterministicRng; + use crate::types::{PlayerAction, PlayerId, PlayerReward, Seed, StepOutcome}; + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct MinimalGame { + compact_actions: u64, + } + + #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] + struct MinimalState; + + impl Game for MinimalGame { + type Params = (); + type State = MinimalState; + type Action = u8; + type Obs = u8; + type WorldView = u8; + type PlayerBuf = FixedVec; + type ActionBuf = FixedVec; + type JointActionBuf = FixedVec, 1>; + type RewardBuf = FixedVec; + type WordBuf = FixedVec; + + fn name(&self) -> &'static str { + "minimal" + } + + fn player_count(&self) -> usize { + 1 + } + + fn init_with_params(&self, _seed: Seed, _params: &Self::Params) -> Self::State { + MinimalState + } + + fn is_terminal(&self, _state: &Self::State) -> bool { + false + } + + fn players_to_act(&self, _state: &Self::State, out: &mut Self::PlayerBuf) { + out.clear(); + out.push(0).unwrap(); + } + + fn legal_actions( + &self, + _state: &Self::State, + _player: PlayerId, + out: &mut Self::ActionBuf, + ) { + out.clear(); + out.push(0).unwrap(); + } + + fn observe_player(&self, _state: &Self::State, _player: PlayerId) -> Self::Obs { + 0 + } + + fn observe_spectator(&self, _state: &Self::State) -> Self::Obs { + 0 + } + + fn world_view(&self, _state: &Self::State) -> Self::WorldView { + 0 + } + + fn step_in_place( + &self, + _state: &mut Self::State, + _joint_actions: &Self::JointActionBuf, + _rng: &mut DeterministicRng, + out: &mut StepOutcome, + ) { + out.rewards + .push(PlayerReward { + player: 0, + reward: 0, + }) + .unwrap(); + } + + fn compact_spec(&self) -> CompactSpec { + CompactSpec { + action_count: self.compact_actions, + observation_bits: 0, + observation_stream_len: 0, + reward_bits: 1, + min_reward: 0, + max_reward: 0, + reward_offset: 0, + } + } + } + + #[test] + fn compact_roundtrip_is_skipped_when_action_codec_is_absent() { + let game = MinimalGame { compact_actions: 0 }; + assert_compact_roundtrip(&game, &0); + } + + #[test] + #[should_panic] + fn compact_roundtrip_still_checks_declared_codec_surface() { + let game = MinimalGame { compact_actions: 1 }; + assert_compact_roundtrip(&game, &0); + } +} diff --git a/tests/validation.rs b/tests/validation.rs index 92f965c..9764848 100644 --- a/tests/validation.rs +++ b/tests/validation.rs @@ -1,17 +1,19 @@ -#![cfg(feature = "builtin-games")] +#![cfg(feature = "builtin")] use std::alloc::{GlobalAlloc, Layout, System}; use std::cell::Cell; use std::sync::Mutex; use std::sync::atomic::{AtomicUsize, Ordering}; +#[cfg(feature = "parallel")] +use gameengine::InteractiveSession; use gameengine::buffer::Buffer; -use gameengine::games::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; +use gameengine::builtin::{Blackjack, BlackjackAction, TicTacToe, TicTacToeAction}; #[cfg(feature = "physics")] -use gameengine::games::{Platformer, PlatformerAction}; +use gameengine::builtin::{Platformer, PlatformerAction}; use gameengine::{ - CompactGame, CompactSpec, DeterministicRng, FixedVec, Game, PlayerAction, PlayerReward, - Session, StepOutcome, stable_hash, + CompactSpec, DeterministicRng, FixedVec, Game, PlayerAction, PlayerReward, Session, + StepOutcome, stable_hash, }; struct CountingAllocator; @@ -79,7 +81,7 @@ fn capture_compact_trace( actions: &[Vec>], ) -> (Vec>, u64, u64) where - G: Game + CompactGame + Copy, + G: Game + Copy, { let mut session = Session::new(game, seed); let mut compact_trace = Vec::new(); @@ -278,7 +280,7 @@ fn golden_compact_traces_match_expected_values() { compact, vec![vec![8193], vec![139521], vec![141573], vec![141589]] ); - assert_eq!(trace_hash, 0xfcb1_5a37_9487_30e3); + assert_eq!(trace_hash, 0x5b96_1efc_b075_3027); let blackjack_actions = vec![vec![PlayerAction { player: 0, @@ -286,7 +288,7 @@ fn golden_compact_traces_match_expected_values() { }]]; let (compact, trace_hash, _) = capture_compact_trace(Blackjack, 11, &blackjack_actions); assert_eq!(compact, vec![vec![140693832466, 1449, 132, 0]]); - assert_eq!(trace_hash, 0xd6d3_8ce4_845f_4206); + assert_eq!(trace_hash, 0xfb29_3f00_ff61_bdc7); #[cfg(feature = "physics")] let platformer_actions = vec![ @@ -366,26 +368,26 @@ fn golden_compact_traces_match_expected_values() { assert_eq!( compact, vec![ - vec![2017], - vec![2001], - vec![1986], - vec![1987], - vec![1939], - vec![1924], - vec![1925], - vec![1813], - vec![1798], - vec![1799], - vec![1559], - vec![1544], - vec![1545], - vec![1049], - vec![1034], - vec![1035], - vec![2075], + vec![4128769], + vec![4063489], + vec![4063234], + vec![4063235], + vec![3932419], + vec![3932164], + vec![3932165], + vec![3670277], + vec![3670022], + vec![3670023], + vec![3145991], + vec![3145736], + vec![3145737], + vec![2097417], + vec![2097162], + vec![2097163], + vec![4194571], ] ); - assert_eq!(trace_hash, 0x1788_afb3_0dcd_0d2e); + assert_eq!(trace_hash, 0x1ee7_fb2e_3689_eabf); } } @@ -481,12 +483,23 @@ fn parallel_replay_matches_serial() { action: TicTacToeAction(0), }]], ), + ( + 13, + (0..320) + .map(|_| { + vec![PlayerAction { + player: 0, + action: TicTacToeAction(9), + }] + }) + .collect(), + ), ]; let parallel = replay_many(&TicTacToe, &traces); let serial: Vec<_> = traces .iter() .map(|(seed, steps)| { - let mut session = Session::new(TicTacToe, *seed); + let mut session = InteractiveSession::new(TicTacToe, *seed); for step in steps { if session.is_terminal() { break;