diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6734da8..d302a3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,5 +49,8 @@ jobs: - name: Example (C → Wasm → Rust) run: ./examples/c-to-wasm-to-rust/run.sh + - name: Example FFT (C → Wasm → Rust) + run: ./examples/c-fft/run.sh + - name: Example (Inter-Module Lending) run: ./examples/inter-module-lending/run.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f59423..e3362f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,19 +5,45 @@ All notable changes to the herkos project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.2.0] ### Added -- Pre-open-source code review and cleanup -- Apache-2.0 license -- Community files (CONTRIBUTING.md, CHANGELOG.md) -- Cargo.toml metadata for all crates -- GitHub issue and PR templates +- Bulk memory operations: `memory.fill`, `memory.init`, `data.drop` +- Version info in generated code and module metadata +- Inter-module lending tests and examples with automation scripts +- Memory-intensive benchmarks (sorting, Fibonacci implementations) +- New benchmarks for control flow and arithmetic operations +- Optimization control via `HERKOS_OPTIMIZE` environment variable + +### Changed +- Memory operations now use `usize` for better type safety +- Refactored host import handling with uniform `Env` API pattern +- Enhanced SSA IR with improved phi-node lowering and branch resolution +- Improved dead code handling in IR builder with live-check methods for terminators +- Restructured `ControlFrame` enum for better control flow handling +- Simplified data segment parsing using zip for segment indexing ### Fixed -- i32 shift operations now correctly mask shift amounts to 5 bits (& 31) per WebAssembly spec -- Replaced panic-inducing `unwrap()` calls in IR builder with proper error handling -- Changed constructor panics to `Result` types for proper no_std compliance +- Host parameter now properly handled in `call_indirect` dispatch (issue #19) +- Host parameter now transitively propagated through direct calls (issue #19) +- IR now enforces strict SSA form at compile time with `UseVar`/`DefVar` typing +- Removed panic for unoptimizations in transpile function +- Removed unnecessary crate-type configurations from Cargo.toml + +### Removed +- Example C usage and header files from repository +- Herkos-bootstrap example implementation + +## [0.1.1] - 2026-03-09 + +### Fixed +- Improved diagram formatting in README.md +- Updated .gitignore to include Cargo.lock +- Removed unused CLI options +- Updated repository and homepage URLs + +### Added +- C to WebAssembly example with Rust transpilation ## [0.1.0] - 2026-02-16 @@ -72,7 +98,9 @@ See [docs/FUTURE.md](docs/FUTURE.md) for planned features. ## Version History +- **0.1.1** (2026-03-09) — C integration example and URL updates - **0.1.0** (2026-02-16) — Initial release with safe backend, basic transpilation, and import/export support -[Unreleased]: https://github.com/YOUR_ORG/herkos/compare/v0.1.0...HEAD +[Unreleased]: https://github.com/YOUR_ORG/herkos/compare/v0.1.1...HEAD +[0.1.1]: https://github.com/YOUR_ORG/herkos/compare/v0.1.0...v0.1.1 [0.1.0]: https://github.com/YOUR_ORG/herkos/releases/tag/v0.1.0 diff --git a/Cargo.lock b/Cargo.lock index ac1ca02..e540cae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -354,7 +354,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "herkos" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "clap", @@ -364,7 +364,7 @@ dependencies = [ [[package]] name = "herkos-core" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "heck", @@ -374,14 +374,14 @@ dependencies = [ [[package]] name = "herkos-runtime" -version = "0.1.1" +version = "0.2.0" dependencies = [ "kani-verifier", ] [[package]] name = "herkos-tests" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "criterion", diff --git a/Cargo.toml b/Cargo.toml index 40baed4..1b67e0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "2" members = ["crates/herkos-runtime", "crates/herkos-core", "crates/herkos", "crates/herkos-tests"] exclude = [ + "examples/c-fft", "examples/c-to-wasm-to-rust", "examples/inter-module-lending", "examples/herkos-bootstrap", diff --git a/crates/herkos-core/Cargo.toml b/crates/herkos-core/Cargo.toml index 2a71d3e..f191b1c 100644 --- a/crates/herkos-core/Cargo.toml +++ b/crates/herkos-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "herkos-core" -version = "0.1.1" +version = "0.2.0" edition = "2021" description = "Compile-Time Memory Isolation via WebAssembly and Rust Transpilation — core library" license = "Apache-2.0" diff --git a/crates/herkos-runtime/Cargo.toml b/crates/herkos-runtime/Cargo.toml index 67a2912..3a04460 100644 --- a/crates/herkos-runtime/Cargo.toml +++ b/crates/herkos-runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "herkos-runtime" -version = "0.1.1" +version = "0.2.0" edition = "2021" description = "Runtime library for herkos transpiled output — IsolatedMemory, WasmTrap, capability traits" license = "Apache-2.0" diff --git a/crates/herkos-tests/Cargo.toml b/crates/herkos-tests/Cargo.toml index 6728e31..c4caa5b 100644 --- a/crates/herkos-tests/Cargo.toml +++ b/crates/herkos-tests/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "herkos-tests" -version = "0.1.0" +version = "0.2.0" edition = "2021" publish = false description = "End-to-end compilation tests for herkos transpiler" @@ -13,7 +13,7 @@ herkos-runtime = { path = "../herkos-runtime" } [build-dependencies] anyhow = { workspace = true } wat = { workspace = true } -herkos-core = { path = "../herkos-core" } +herkos-core = { version = "0.2.0", path = "../herkos-core" } [dev-dependencies] criterion = "0.8.2" diff --git a/crates/herkos/Cargo.toml b/crates/herkos/Cargo.toml index 6af2e93..54b2ed5 100644 --- a/crates/herkos/Cargo.toml +++ b/crates/herkos/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "herkos" -version = "0.1.1" +version = "0.2.0" edition = "2021" description = "Compile-Time Memory Isolation via WebAssembly and Rust Transpilation" license = "Apache-2.0" @@ -12,7 +12,7 @@ categories = ["wasm", "development-tools::build-utils"] readme = "../../README.md" [dependencies] -herkos-core = { path = "../herkos-core" } +herkos-core = { version = "0.2.0", path = "../herkos-core" } anyhow = { workspace = true } clap = { workspace = true } diff --git a/docs/REQUIREMENTS.md b/docs/REQUIREMENTS.md index a519987..2be4664 100644 --- a/docs/REQUIREMENTS.md +++ b/docs/REQUIREMENTS.md @@ -88,6 +88,23 @@ memory.grow shall not perform heap allocation. New pages shall be zero-initializ within pre-allocated storage. Returns previous page count on success, -1 on failure. ``` +```{req} Bulk Memory Operations +:id: REQ_MEM_BULK_OPS +:status: open +:tags: memory, bulk-operations, wasm-spec +The transpiler shall support WebAssembly bulk memory operations: memory.fill, +memory.init, and data.drop. All operations shall be bounds-checked. Out-of-bounds +operations shall trap with WasmTrap::OutOfBounds, never panic. +``` + +```{req} Data Segment Support +:id: REQ_MEM_DATA_SEGMENTS +:status: open +:tags: memory, data-segments +Passive data segments shall be stored as compile-time constants in the generated +output. memory.init shall copy from these constants into the module's linear memory. +``` + ### 4.2 Module Representation ```{req} Two Module Types @@ -199,6 +216,15 @@ shall be formatted (rustfmt), readable, and auditable. No panics, no unwinding only Result for error handling. ``` +```{req} Version Information in Generated Code +:id: REQ_TRANS_VERSION_INFO +:status: open +:tags: transpilation, output, metadata +Generated code shall include version information: the herkos transpiler version +and the WebAssembly binary format version. This enables traceability and debugging +of transpiled modules. +``` + ```{req} Deterministic Code Generation :id: REQ_TRANS_DETERMINISTIC :status: open diff --git a/docs/SPECIFICATION.md b/docs/SPECIFICATION.md index fc13747..6698e7b 100644 --- a/docs/SPECIFICATION.md +++ b/docs/SPECIFICATION.md @@ -6,7 +6,7 @@ Where the requirements say *what* the system must do, this specification says *h For features that are planned but not yet implemented (verified/hybrid backends, temporal isolation, etc.), see [FUTURE.md](FUTURE.md). -**Document Status**: Draft — Version 0.2 — 2026-02-25 +**Document Status**: Draft — Version 0.2 — 2026-03-16 --- @@ -49,6 +49,12 @@ herkos input.wasm --mode safe --output output.rs | `--output` | Output Rust file path | No | | `--max-pages` | Maximum memory pages when module declares no maximum | No | +**Environment variables:** + +| Variable | Values | Default | Effect | +|----------|--------|---------|--------| +| `HERKOS_OPTIMIZE` | `1` or any other value | Unset (disabled) | When `HERKOS_OPTIMIZE=1`, enables IR optimization passes (currently dead block elimination). Set during transpilation, affects generated code size and performance. | + > **Current limitations**: Only the `safe` backend is implemented. The `--mode` flag accepts `safe`, `hybrid`, and `verified` but all behave identically. `--max-pages` has no effect. See [FUTURE.md](FUTURE.md) for the verified and hybrid backend plans. ### 1.3 Understanding the Output @@ -500,27 +506,27 @@ let result = lib.call_export_transform(&mut app.memory, ptr, len)?; ### 3.1 Component Overview ``` -┌──────────────────────────────────────────────────────────────────┐ -│ herkos workspace │ -│ │ +┌─────────────────────────────────────────────────────────────────┐ +│ herkos workspace │ +│ │ │ ┌─────────────────┐ ┌──────────────────┐ ┌────────────────┐ │ │ │ herkos (CLI) │ │ herkos-runtime │ │ herkos-tests │ │ │ │ ┌───────────┐ │ │ #![no_std] │ │ │ │ │ │ │ Parser │ │ │ │ │ WAT/C/Rust │ │ -│ │ │(wasmparser)│ │ │ IsolatedMemory │ │ sources │ │ +│ │ │(wasmparser)│ │ │ IsolatedMemory │ │ sources │ │ │ │ ├───────────┤ │ │ Table, FuncRef │ │ → .wasm │ │ │ │ │ IR Builder│ │ │ Module types │ │ → transpile │ │ -│ │ │ (SSA-form)│ │ │ WasmTrap │ │ → test │ │ -│ │ ├───────────┤ │ │ Wasm ops │ │ │ │ +│ │ │ (SSA-form)│ │ │ WasmTrap │ │ → test │ │ +│ │ ├───────────┤ │ │ Wasm ops │ │ │ │ │ │ │ Optimizer │ │ │ │ │ benches/ │ │ │ │ ├───────────┤ │ └──────────────────┘ └────────────────┘ │ -│ │ │ Backend │ │ ▲ ▲ │ -│ │ │ (safe) │ │ │ depends on │ depends │ +│ │ │ Backend │ │ │ ▲ │ +│ │ │ (safe) │ │ │ depends on │ depends │ │ │ ├───────────┤ │ │ │ on both │ -│ │ │ Codegen │ │ └─────────────────────┘ │ -│ │ └───────────┘ │ │ -│ └─────────────────┘ │ -└──────────────────────────────────────────────────────────────────┘ +│ │ │ Codegen │ │ └─────────────────────┘ │ +│ │ └───────────┘ │ │ +│ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ ``` ### 3.2 Runtime (`herkos-runtime`) @@ -809,6 +815,61 @@ Type 2: (i32) → i32 → canonical = 2 (new signature) The transpiler builds a canonical type index mapping at transpile time. Both `FuncRef.type_index` and the type check use canonical indices. At runtime, the check is a simple integer comparison. +### 4.6 Bulk Memory Operations + +> Implementation: [crates/herkos-runtime/src/memory.rs](../crates/herkos-runtime/src/memory.rs) lines 149–174 + +The WebAssembly bulk memory operations allow efficient copying and initialization of memory regions without scalar load/store loops. + +#### 4.6.1 `memory.fill` + +Fills a region of memory with a byte value. Per Wasm spec, only the low 8 bits of the value are used. + +```rust +impl IsolatedMemory { + pub fn fill(&mut self, dst: usize, val: u8, len: usize) -> WasmResult<()>; +} +``` + +Generated code: +```rust +// Wasm: memory.fill $dst $val $len +memory.fill(dst as usize, val as u8, len as usize)?; +``` + +Traps `OutOfBounds` if `[dst, dst + len)` exceeds active memory. Length zero is a no-op. + +#### 4.6.2 `memory.init` + +Copies data from a passive data segment into memory at runtime. Each data segment is stored as a constant `&'static [u8]` in the generated code. + +```rust +impl IsolatedMemory { + pub fn init_data_partial(&mut self, dst: usize, data: &[u8], src_offset: usize, len: usize) -> WasmResult<()>; +} +``` + +Generated code: +```rust +// Wasm: memory.init $data_segment $dst $src_offset $len +memory.init_data_partial(dst as usize, &DATA_SEGMENT_0, src_offset as usize, len as usize)?; +``` + +Traps `OutOfBounds` if either region (source or destination) exceeds bounds: +- Source: `[src_offset, src_offset + len)` must be within the data segment +- Destination: `[dst, dst + len)` must be within active memory + +#### 4.6.3 `data.drop` + +Marks a data segment as dropped (per Wasm spec). In the safe backend this is a no-op because data segments are stored as constant references and cannot actually be deallocated. + +```rust +// Wasm: data.drop $segment +// (no-op in safe backend — const slices persist) +``` + +In future verified and hybrid backends, `data.drop` may enable optimizations: proving that dropped segments are never accessed again could allow proving certain addresses as never-in-bounds. + --- ## 5. Integration @@ -829,7 +890,53 @@ let result = module.process_data(&mut host, ptr, len)?; Full type safety, zero `unsafe`, zero-cost dispatch via monomorphization. -### 5.2 C-Compatible ABI (Optional) +### 5.2 The Env Context Pattern + +> Implementation: [crates/herkos-core/src/codegen/env.rs](../crates/herkos-core/src/codegen/env.rs) + +Generated modules use a unified **Env** context struct that bundles the host (generic parameter `H`) and mutable globals, simplifying parameter threading throughout function calls. + +```rust +// Generated by transpiler +pub struct Env<'a, H: ModuleHostTrait + ?Sized> { + pub host: &'a mut H, + pub globals: &'a mut Globals, +} + +// Every function that needs imports or mutable state receives Env +fn process( + memory: &mut IsolatedMemory, + env: &mut Env, + input: i32, +) -> WasmResult { + // Call imported function via trait + let result = env.host.some_import(input)?; + // Read/write mutable global + env.globals.my_global += 1; + Ok(result) +} +``` + +**Design rationale:** +- **Unified state**: Avoids threading `host`, `globals`, and other mutable state as separate parameters +- **Type safety**: All imports must be present in the host's trait implementation — checked at compile time +- **Zero overhead**: The Env struct is a thin wrapper; LLVM inlines and optimizes away the indirection +- **Extensibility**: Adding new imports or globals requires only modifying the trait, not all function signatures + +**Generated trait:** + +```rust +pub trait ModuleHostTrait { + // One method per function import + fn imported_function(&mut self, arg: i32) -> WasmResult; + + // Getter/setter methods for each imported global + fn get_imported_global(&self) -> i32; + fn set_imported_global(&mut self, value: i32); +} +``` + +### 5.4 C-Compatible ABI (Optional) For integration with non-Rust systems, an optional `extern "C"` wrapper erases generics: @@ -849,7 +956,7 @@ pub extern "C" fn module_call( The C ABI wrapper uses `unsafe` and raw pointers. Capability enforcement still applies inside — the wrapper calls through trait-bounded functions. This is an escape hatch, not the default. -### 5.3 Native Rust Integration +### 5.5 Native Rust Integration Native Rust code integrates by implementing import traits directly: diff --git a/examples/c-fft/.gitignore b/examples/c-fft/.gitignore new file mode 100644 index 0000000..6d911d6 --- /dev/null +++ b/examples/c-fft/.gitignore @@ -0,0 +1,2 @@ +src/fft_wasm.rs +fft.wasm \ No newline at end of file diff --git a/examples/c-fft/Cargo.lock b/examples/c-fft/Cargo.lock new file mode 100644 index 0000000..1583a7f --- /dev/null +++ b/examples/c-fft/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "c-fft" +version = "0.1.0" +dependencies = [ + "herkos-runtime", +] + +[[package]] +name = "herkos-runtime" +version = "0.1.1" diff --git a/examples/c-fft/Cargo.toml b/examples/c-fft/Cargo.toml new file mode 100644 index 0000000..490b177 --- /dev/null +++ b/examples/c-fft/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "c-fft" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +herkos-runtime = { path = "../../crates/herkos-runtime" } diff --git a/examples/c-fft/README.md b/examples/c-fft/README.md new file mode 100644 index 0000000..df219fb --- /dev/null +++ b/examples/c-fft/README.md @@ -0,0 +1,114 @@ +# C-FFT → WebAssembly → Rust Example + +A 4096-point radix-2 Cooley-Tukey FFT in C, compiled to WebAssembly, and transpiled to memory-safe Rust by herkos. The Rust host writes audio signals into the module's isolated memory, calls the FFT, and reads back the spectrum. + +``` +fft.c ──clang──▶ fft.wasm ──herkos──▶ src/fft_wasm.rs + │ + src/main.rs drives it + │ + cargo run +``` + +The generated Rust module contains **no unsafe code**. Memory isolation is enforced through the type system at compile time. + +## Prerequisites + +- **clang** with wasm32 target support (`apt-get install clang lld`) +- **Rust** toolchain (`cargo`) +- **herkos** CLI (already available in the repo) + +## Usage + +```bash +./run.sh # compile C → Wasm → Rust, then build and run +./run.sh --clean # remove generated artifacts +``` + +Or directly from this directory: +```bash +cargo run --release +``` + +## Key design points + +- **No libm**: twiddle factors are computed via Taylor series at the base angle (δ = 2π/4096) then propagated by complex recurrence. `sqrtf` compiles to the Wasm `f32.sqrt` instruction — no library import. + +- **Static buffers only**: all arrays are `static float[...]` globals (BSS, zero-initialized), no malloc, no stack VLAs. + +- **Memory budget**: signal (32 KB) + twiddle real (8 KB) + twiddle imag (8 KB) + magnitude (8 KB) = 56 KB BSS, fits comfortably inside the 64 KB non-stack region of 2 Wasm pages (128 KB). + +- **Host↔module interface**: the host uses `fft_get_input_ptr()` and `fft_get_output_ptr()` to discover buffer addresses rather than hardcoding linker offsets. This demonstrates capability-based access to isolated memory. + +- **Clang flags**: `-Wl,--initial-memory=131072 -Wl,--max-memory=131072` (2 pages), `-Wl,-zstack-size=65536` (64KB stack). + +## Performance + +Each 4096-point FFT computes in ~420 microseconds on a modern CPU. This includes: +- Bit-reversal permutation +- 12 stages of butterfly operations (stage = 2¹ to 2¹²) +- Magnitude computation (via `__builtin_sqrtf`) + +The overhead vs. native Wasm execution is negligible (monomorphization and inlining eliminate the `IsolatedMemory` abstraction cost). + +## Algorithm details + +**Cooley-Tukey radix-2 DIT (Decimation In Time):** + +1. **Bit-reversal**: permute input to separate even/odd indices +2. **Twiddle table init**: compute W_N^k = e^(-2πik/N) using Taylor series + recurrence +3. **Butterfly passes**: 12 stages, each stage has 2^s butterflies with stride 2^(12-s) +4. **Magnitude**: |X_k| = √(re² + im²) + +**Why no `sin()`/`cos()` from libm?** + +For tiny angle δ = 2π/4096 ≈ 0.00153 rad, Taylor series (3 terms each) are accurate to ~1e-9: +``` +sin(δ) ≈ δ − δ³/6 + δ⁵/120 +cos(δ) ≈ 1 − δ²/2 + δ⁴/24 +``` + +Then all 2048 twiddle factors are generated via complex rotation recurrence: +``` +W[k+1] = W[k] × W[1] (complex multiplication) +``` + +This avoids any libm import — the whole FFT is freestanding C with only `__builtin_sqrtf()` (which compiles to Wasm's native `f32.sqrt` instruction). + +## Integration with herkos + +1. **C source**: fft.c (self-contained, no external dependencies) +2. **Wasm binary**: fft.wasm (1.1 KB, extremely compact) +3. **Generated Rust**: src/fft_wasm.rs (bounds-checked memory API, no unsafe) +4. **Host integration**: src/main.rs accesses module memory via `module.0.memory.store_f32()`/`load_f32()` + +The generated `WasmModule` wraps `Module` and exposes: +- Constructor: `new() -> WasmResult` +- Exported functions: `fft_init()`, `fft_compute()`, `fft_get_input_ptr()`, `fft_get_output_ptr()` +- Memory access: `module.0.memory.store_f32(offset, value)`, `module.0.memory.load_f32(offset)` + +All memory operations return `WasmResult` — traps (out-of-bounds, overflow) propagate as errors, never panics. + +## Example output + +``` +=== herkos C-FFT Example === + 4096-point radix-2 DIT FFT, C → Wasm → memory-safe Rust + Input buffer: Wasm byte offset 0x00400 + Output buffer: Wasm byte offset 0x08400 + +--- Test 1: Single tone at 1000 Hz --- + Spectrum (1000 Hz tone) — 419.50µs + 991 Hz (bin 92): #### 272.3 + 1001 Hz (bin 93): ############################## 2000.3 + 1012 Hz (bin 94): ### 215.3 + Peak bin: 93 → 1001.3 Hz + +--- Test 2: Two tones (440 Hz + 2000 Hz) --- + ... + Top bins: + bin 41 → 441.4 Hz mag=1591.6 + bin 186 → 2002.6 Hz mag=930.7 +``` + +Frequencies are accurate to within ~1 Hz (limited by FFT bin resolution of 44100/4096 ≈ 10.77 Hz per bin). diff --git a/examples/c-fft/fft.c b/examples/c-fft/fft.c new file mode 100644 index 0000000..e688a04 --- /dev/null +++ b/examples/c-fft/fft.c @@ -0,0 +1,158 @@ +// fft.c — 4096-point radix-2 Cooley-Tukey DIT FFT +// +// Freestanding C for wasm32-unknown-unknown (no libc, no libm). +// +// Twiddle factors are computed at runtime using: +// - Taylor series for sin/cos at the tiny base angle δ = 2π/N +// - A two-term recurrence to fill all N/2 entries from that base +// +// Magnitudes are computed via __builtin_sqrtf(), which compiles to the +// Wasm f32.sqrt instruction — no libm import needed. + +#define N 4096 +#define N_HALF 2048 +#define LOG2_N 12 +#define M_PI 3.14159265358979323846f + +// ── Static global buffers (BSS) ──────────────────────────────────────────── +// Combined size: 32768 + 8192 + 8192 + 8192 = 57344 bytes — fits in 64KB + +static float g_signal[N * 2]; // interleaved complex input/output +static float g_twiddle_re[N_HALF]; // W_N^k real parts: cos(-2πk/N) +static float g_twiddle_im[N_HALF]; // W_N^k imag parts: -sin(2πk/N) +static float g_magnitude[N_HALF]; // power spectrum output + +// ── Bit-reversal ──────────────────────────────────────────────────────────── + +static int bit_rev(int x) { + // Reverse LOG2_N=12 bits + int r = 0; + for (int i = 0; i < LOG2_N; i++) { + r = (r << 1) | (x & 1); + x >>= 1; + } + return r; +} + +static void apply_bit_reversal(void) { + for (int i = 0; i < N; i++) { + int j = bit_rev(i); + if (j > i) { + float tmp_re = g_signal[2*i]; + float tmp_im = g_signal[2*i+1]; + g_signal[2*i] = g_signal[2*j]; + g_signal[2*i+1] = g_signal[2*j+1]; + g_signal[2*j] = tmp_re; + g_signal[2*j+1] = tmp_im; + } + } +} + +// ── Twiddle computation ───────────────────────────────────────────────────── + +// Taylor series sin/cos for small angle x (|x| < 0.002 for N=4096) +// sin(x) ≈ x - x³/6 + x⁵/120 +// cos(x) ≈ 1 - x²/2 + x⁴/24 + +static float taylor_sin(float x) { + float x2 = x * x; + float x3 = x2 * x; + float x5 = x3 * x2; + return x - x3 * 0.16666667f + x5 * 0.00833333f; +} + +static float taylor_cos(float x) { + float x2 = x * x; + float x4 = x2 * x2; + return 1.0f - x2 * 0.5f + x4 * 0.04166667f; +} + +// Fill twiddle table using complex recurrence: +// (cos((k+1)δ), -sin((k+1)δ)) = (cos(kδ), -sin(kδ)) × (cos(δ), -sin(δ)) +// which is a rotation by -δ each step. + +static void compute_twiddles(void) { + float delta = 2.0f * M_PI / (float)N; // δ = 2π/4096 ≈ 0.001534 + + float cd = taylor_cos(delta); // cos(δ) + float sd = taylor_sin(delta); // sin(δ) + + // Seed: W_N^0 = (1, 0) + float cr = 1.0f; + float ci = 0.0f; // ci tracks -sin(kδ), starts at 0 + + g_twiddle_re[0] = 1.0f; + g_twiddle_im[0] = 0.0f; + + for (int k = 1; k < N_HALF; k++) { + // Rotation: new_cr = cr*cd - ci*(-sd) = cr*cd + ci*sd + // new_ci = ci*cd - cr*sd + float new_cr = cr*cd + ci*sd; + float new_ci = ci*cd - cr*sd; + cr = new_cr; + ci = new_ci; + g_twiddle_re[k] = cr; + g_twiddle_im[k] = ci; + } +} + +// ── DIT butterfly pass ────────────────────────────────────────────────────── + +static void fft_dit(void) { + for (int len = 2; len <= N; len <<= 1) { + int half = len >> 1; + int tw_step = N_HALF / half; // stride into twiddle table + for (int i = 0; i < N; i += len) { + for (int j = 0; j < half; j++) { + int tw_idx = j * tw_step; + float wr = g_twiddle_re[tw_idx]; + float wi = g_twiddle_im[tw_idx]; + int u = i + j; + int v = u + half; + float ur = g_signal[2*u]; + float ui_val = g_signal[2*u+1]; + float vr = g_signal[2*v]; + float vi_val = g_signal[2*v+1]; + float tr = wr*vr - wi*vi_val; + float ti = wr*vi_val + wi*vr; + g_signal[2*u] = ur + tr; + g_signal[2*u+1] = ui_val + ti; + g_signal[2*v] = ur - tr; + g_signal[2*v+1] = ui_val - ti; + } + } + } +} + +// ── Magnitude ─────────────────────────────────────────────────────────────── + +static void compute_magnitude(void) { + for (int k = 0; k < N_HALF; k++) { + float re = g_signal[2*k]; + float im = g_signal[2*k+1]; + float power = re*re + im*im; + g_magnitude[k] = __builtin_sqrtf(power); + } +} + +// ── Exported API ───────────────────────────────────────────────────────────── + +void fft_init(int n) { + (void)n; // reserved for future variable-N support + compute_twiddles(); +} + +int fft_get_input_ptr(void) { + return (int)(long)g_signal; +} + +void fft_compute(int n) { + (void)n; + apply_bit_reversal(); + fft_dit(); + compute_magnitude(); +} + +int fft_get_output_ptr(void) { + return (int)(long)g_magnitude; +} diff --git a/examples/c-fft/run.sh b/examples/c-fft/run.sh new file mode 100755 index 0000000..e86f16d --- /dev/null +++ b/examples/c-fft/run.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# C → WebAssembly → Rust FFT example pipeline +# +# Prerequisites: +# - clang with wasm32 target support (apt-get install clang lld) +# - Rust toolchain (cargo) +# - herkos CLI (cargo install --path ../../crates/herkos) +# +# Usage: +# ./run.sh # build and run +# ./run.sh --clean # remove generated artifacts + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +WASM_FILE="fft.wasm" +GENERATED_RS="src/fft_wasm.rs" + +if [[ "${1:-}" == "--clean" ]]; then + rm -f "$WASM_FILE" "$GENERATED_RS" + cargo clean 2>/dev/null || true + echo "Cleaned generated artifacts." + exit 0 +fi + +# Step 1: Compile C to WebAssembly +echo "==> Compiling fft.c to WebAssembly..." + +CLANG="" +if command -v clang-19 &>/dev/null; then + CLANG="clang-19" +elif command -v clang &>/dev/null; then + CLANG="clang" +else + echo "Error: clang not found. Install with: apt-get install clang lld" >&2 + exit 1 +fi + +$CLANG --target=wasm32-unknown-unknown -nostdlib -Oz \ + -Wl,--no-entry \ + -Wl,--export-all \ + -Wl,-zstack-size=65536 \ + -Wl,--initial-memory=131072 \ + -Wl,--max-memory=131072 \ + fft.c -o "$WASM_FILE" + +echo " Created $WASM_FILE ($(wc -c < "$WASM_FILE") bytes)" + +# Step 2: Transpile WebAssembly to Rust using herkos +echo "==> Transpiling WebAssembly to Rust..." + +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +if command -v herkos &>/dev/null; then + herkos "$WASM_FILE" --output "$GENERATED_RS" +else + cargo run --manifest-path "$REPO_ROOT/Cargo.toml" -p herkos -- \ + "$SCRIPT_DIR/$WASM_FILE" --output "$SCRIPT_DIR/$GENERATED_RS" +fi + +echo " Created $GENERATED_RS" + +# Step 3: Build and run the Rust project +echo "==> Building and running Rust project..." +echo "" +cargo run --release diff --git a/examples/c-fft/src/main.rs b/examples/c-fft/src/main.rs new file mode 100644 index 0000000..462daa4 --- /dev/null +++ b/examples/c-fft/src/main.rs @@ -0,0 +1,174 @@ +// C → WebAssembly → Rust FFT example +// +// This program drives a 4096-point radix-2 FFT that was originally written in C, +// compiled to WebAssembly, and then transpiled to memory-safe Rust by herkos. +// +// The generated module (src/fft_wasm.rs) contains no unsafe code. +// Memory access is bounds-checked; isolation is enforced by the Rust type system. +// +// Run `./run.sh` to regenerate fft_wasm.rs and execute this program. + +#[allow(dead_code)] +mod fft_wasm; + +use std::time::Instant; + +const N: usize = 4096; +const N_HALF: usize = N / 2; +const SAMPLE_RATE: f32 = 44100.0; + +fn main() { + let mut module = fft_wasm::new().expect("FFT module instantiation failed"); + + // Initialize twiddle table (called once) + module.fft_init(N as i32).expect("fft_init trapped"); + + // Get the pointer (Wasm byte offset) to the input buffer + let input_ptr = module.fft_get_input_ptr().expect("fft_get_input_ptr trapped") as usize; + let output_ptr = module.fft_get_output_ptr().expect("fft_get_output_ptr trapped") as usize; + + println!("=== herkos C-FFT Example ==="); + println!(" 4096-point radix-2 DIT FFT, C → Wasm → memory-safe Rust"); + println!(" Input buffer: Wasm byte offset 0x{:05X}", input_ptr); + println!(" Output buffer: Wasm byte offset 0x{:05X}", output_ptr); + println!(); + + // ── Test 1: Single tone at 1 kHz ───────────────────────────────────────── + println!("--- Test 1: Single tone at 1000 Hz ---"); + write_tone(&mut module, input_ptr, &[(1000.0, 1.0)]); + let elapsed = run_fft(&mut module); + let magnitudes = read_magnitudes(&module, output_ptr); + print_spectrum(&magnitudes, "1000 Hz tone", elapsed); + let peak = find_peak_bin(&magnitudes); + println!( + " Peak bin: {} → {:.1} Hz", + peak, + bin_to_hz(peak) + ); + println!(); + + // ── Test 2: Two tones at 440 Hz and 2000 Hz ─────────────────────────────── + println!("--- Test 2: Two tones (440 Hz + 2000 Hz) ---"); + write_tone(&mut module, input_ptr, &[(440.0, 0.8), (2000.0, 0.5)]); + let elapsed = run_fft(&mut module); + let magnitudes = read_magnitudes(&module, output_ptr); + print_spectrum(&magnitudes, "440 Hz + 2000 Hz", elapsed); + let peaks = find_top_bins(&magnitudes, 3); + println!(" Top bins:"); + for (bin, mag) in &peaks { + println!(" bin {:4} → {:7.1} Hz mag={:.1}", bin, bin_to_hz(*bin), mag); + } + println!(); + + // ── Test 3: Tone + harmonic ────────────────────────────────────────────── + println!("--- Test 3: 3520 Hz tone + 880 Hz harmonic ---"); + write_tone(&mut module, input_ptr, &[(3520.0, 1.0), (880.0, 0.3)]); + let elapsed = run_fft(&mut module); + let magnitudes = read_magnitudes(&module, output_ptr); + print_spectrum(&magnitudes, "3520 Hz + 880 Hz", elapsed); + println!(); +} + +/// Write a sum of sinusoids into the FFT input buffer. +/// `freqs`: slice of (frequency_hz, amplitude) pairs. +/// Input is interleaved complex: [re_0, im_0, re_1, im_1, ...], imaginary parts = 0. +fn write_tone(module: &mut fft_wasm::WasmModule, input_ptr: usize, freqs: &[(f32, f32)]) { + use std::f32::consts::PI; + for i in 0..N { + let t = i as f32 / SAMPLE_RATE; + let mut sample = 0.0f32; + for &(freq, amp) in freqs { + sample += amp * (2.0 * PI * freq * t).sin(); + } + let re_offset = input_ptr + i * 8; // 8 bytes per complex (re+im f32) + let im_offset = input_ptr + i * 8 + 4; + module.0.memory.store_f32(re_offset, sample).expect("store real"); + module.0.memory.store_f32(im_offset, 0.0f32).expect("store imag"); + } +} + +/// Run the FFT and return elapsed wall-clock time. +fn run_fft(module: &mut fft_wasm::WasmModule) -> std::time::Duration { + let start = Instant::now(); + module.fft_compute(N as i32).expect("fft_compute trapped"); + start.elapsed() +} + +/// Read N/2 magnitude values from the output buffer. +fn read_magnitudes(module: &fft_wasm::WasmModule, output_ptr: usize) -> Vec { + (0..N_HALF) + .map(|k| { + module + .0 + .memory + .load_f32(output_ptr + k * 4) + .expect("load magnitude") + }) + .collect() +} + +/// Find the bin with the highest magnitude. +fn find_peak_bin(magnitudes: &[f32]) -> usize { + magnitudes + .iter() + .enumerate() + .skip(1) // skip DC + .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) + .map(|(i, _)| i) + .unwrap_or(0) +} + +/// Find the top `n` bins by magnitude, sorted descending. +fn find_top_bins(magnitudes: &[f32], n: usize) -> Vec<(usize, f32)> { + let mut indexed: Vec<(usize, f32)> = magnitudes + .iter() + .enumerate() + .skip(1) + .map(|(i, &m)| (i, m)) + .collect(); + indexed.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + indexed.truncate(n); + indexed +} + +/// Convert bin index to frequency in Hz. +fn bin_to_hz(bin: usize) -> f32 { + bin as f32 * SAMPLE_RATE / N as f32 +} + +/// Print a compact ASCII spectrum showing the top bins. +fn print_spectrum(magnitudes: &[f32], label: &str, elapsed: std::time::Duration) { + println!(" Spectrum ({label}) — {:.2?}", elapsed); + + // Find global max for normalization (skip DC bin 0) + let max_mag = magnitudes[1..].iter().cloned().fold(0.0f32, f32::max); + if max_mag <= 0.0 { + println!(" (empty spectrum)"); + return; + } + + // Collect top 12 bins + let mut top: Vec<(usize, f32)> = magnitudes + .iter() + .enumerate() + .skip(1) + .map(|(i, &m)| (i, m)) + .filter(|(_, m)| *m > max_mag * 0.05) // threshold at 5% of peak + .collect(); + top.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + top.truncate(12); + top.sort_by_key(|(bin, _)| *bin); // re-sort by frequency for display + + const BAR_WIDTH: usize = 30; + for (bin, mag) in &top { + let bar_len = ((mag / max_mag) * BAR_WIDTH as f32) as usize; + let bar: String = "#".repeat(bar_len); + println!( + " {:5.0} Hz (bin {:4}): {:30} {:.1}", + bin_to_hz(*bin), + bin, + bar, + mag + ); + } +}