m96-chan · m96-chan · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/packages/core/README.md b/packages/core/README.md
@@ -22,21 +22,21 @@
 
 ---
 
-0xBitNet runs BitNet b1.58 ternary LLMs on WebGPU. Custom WGSL compute kernels handle the ternary matrix operations, with bindings for TypeScript, Rust, and Python.
-
-Also available as: [`oxbitnet`](https://crates.io/crates/oxbitnet) (Rust) / [`oxbitnet`](https://pypi.org/project/oxbitnet/) (Python)
+0xBitNet runs BitNet b1.58 ternary LLMs on WebGPU. Custom WGSL compute kernels handle the ternary matrix operations, with bindings for TypeScript, Rust, and Python. Works in browsers, Node.js, and native apps.
 
 ## Highlights
 
 - **Pure WebGPU** — Custom WGSL kernels for ternary matrix operations (no WASM, no server)
-- **Multi-language** — TypeScript (`0xbitnet`), Rust (`oxbitnet`), Python (`oxbitnet`)
+- **Multi-language** — TypeScript (`0xbitnet`), Rust (`oxbitnet`), Python (`oxbitnet`), Swift (`OxBitNet`), Java/Android (`oxbitnet-java`), C (`oxbitnet-ffi`)
 - **Cross-platform** — Browsers, Node.js, Deno, native apps via wgpu
 - **Chat templates** — Built-in LLaMA 3 chat message formatting
 - **Automatic caching** — IndexedDB (browser) / disk cache (native)
 - **Streaming** — Token-by-token output via async generators / streams / callbacks
 
 ## Quick Start
 
+### TypeScript / JavaScript
+
 ```bash
 npm install 0xbitnet
 ```
@@ -56,7 +56,102 @@ for await (const token of model.generate("The meaning of life is")) {
 model.dispose();
 ```
 
-### Chat Messages
+### Rust
+
+```bash
+cargo add oxbitnet
+```
+
+```rust
+use oxbitnet::BitNet;
+use futures::StreamExt;
+
+let mut model = BitNet::load("model.gguf", Default::default()).await?;
+
+let mut stream = model.generate("Hello!", Default::default());
+while let Some(token) = stream.next().await {
+    print!("{token}");
+}
+
+model.dispose();
+```
+
+### Python
+
+```bash
+pip install oxbitnet
+```
+
+```python
+from oxbitnet import BitNet
+
+model = BitNet.load_sync("model.gguf")
+
+model.chat(
+    [("system", "You are a helpful assistant."), ("user", "Hello!")],
+    on_token=lambda t: print(t, end="", flush=True),
+    temperature=0.7,
+)
+
+model.dispose()
+```
+
+### Swift
+
+```swift
+import OxBitNet
+
+let model = try await BitNet.load("model.gguf")
+
+for try await token in model.chat([.user("Hello!")], options: .init(temperature: 0.7)) {
+    print(token, terminator: "")
+}
+
+model.dispose()
+```
+
+### Java
+
+```java
+import io.github.m96chan.oxbitnet.*;
+import java.util.List;
+
+try (BitNet model = BitNet.loadSync("model.gguf")) {
+    model.chat(
+        List.of(new ChatMessage("user", "Hello!")),
+        token -> {
+            System.out.print(token);
+            return true;
+        },
+        new GenerateOptions().temperature(0.7f)
+    );
+}
+```
+
+### C / FFI
+
+```c
+#include "oxbitnet.h"
+
+static int32_t on_token(const char *token, uintptr_t len, void *userdata) {
+    fwrite(token, 1, len, stdout);
+    return 0; /* 0 = continue, non-zero = stop */
+}
+
+int main(void) {
+    OxBitNet *model = oxbitnet_load("model.gguf", NULL);
+
+    OxBitNetChatMessage messages[] = {
+        { .role = "user", .content = "Hello!" },
+    };
+    OxBitNetGenerateOptions opts = oxbitnet_default_generate_options();
+
+    oxbitnet_chat(model, messages, 1, &opts, on_token, NULL);
+    oxbitnet_free(model);
+}
+```
+
+### Chat Messages (TypeScript)
 
 ```typescript
 const messages = [
@@ -69,19 +164,6 @@ for await (const token of model.generate(messages, { maxTokens: 128, temperature
 }
 ```
 
-### Cache Management
-
-In browsers, models are automatically cached in IndexedDB after the first download.
-
-```typescript
-import { listCachedModels, deleteCachedModel } from "0xbitnet";
-
-const cached = await listCachedModels();
-console.log("Cached models:", cached);
-
-await deleteCachedModel("https://example.com/model.gguf");
-```
-
 ## Supported Models
 
 | Model | GGUF | Parameters | VRAM |
@@ -90,9 +172,20 @@ await deleteCachedModel("https://example.com/model.gguf");
 
 More models are planned — see [#1](https://github.com/m96-chan/0xBitNet/issues/1) and [Model Compatibility](docs/model-compatibility.md) for GGUF requirements.
 
+## Install
+
+| Language | Package | Install |
+|----------|---------|---------|
+| TypeScript / JS | [`0xbitnet`](https://www.npmjs.com/package/0xbitnet) | `npm install 0xbitnet` |
+| Rust | [`oxbitnet`](https://crates.io/crates/oxbitnet) | `cargo add oxbitnet` |
+| Python | [`oxbitnet`](https://pypi.org/project/oxbitnet/) | `pip install oxbitnet` |
+| Swift / iOS | `OxBitNet` | Swift Package Manager (see [oxbitnet-swift](packages/rust/crates/oxbitnet-swift/)) |
+| Java / Android | `oxbitnet-java` | `cargo build -p oxbitnet-java --release` |
+| C / FFI | `oxbitnet-ffi` | `cargo build -p oxbitnet-ffi --release` |
+
 ## API Overview
 
-The main entry point is the `BitNet` class:
+### TypeScript
 
 | Method | Description |
 |--------|-------------|
@@ -101,9 +194,53 @@ The main entry point is the `BitNet` class:
 | `bitnet.diagnose(prompt?)` | Run GPU diagnostics on a forward pass |
 | `bitnet.dispose()` | Release all GPU resources |
 
-Standalone functions: `initGPU()`, `listCachedModels()`, `deleteCachedModel(url)`
+### Rust
+
+| Method | Description |
+|--------|-------------|
+| `BitNet::load(source, options).await` | Load a GGUF model |
+| `bitnet.generate(prompt, options)` | Stream tokens as `impl Stream<Item = String>` |
+| `bitnet.generate_chat(messages, options)` | Chat with template formatting |
+| `bitnet.dispose()` | Release all GPU resources |
+
+### Python
+
+| Method | Description |
+|--------|-------------|
+| `BitNet.load_sync(source)` | Load a GGUF model |
+| `model.chat(messages, on_token)` | Chat with streaming callback |
+| `model.generate(prompt, on_token)` | Generate with streaming callback |
+| `model.generate_sync(prompt)` | Generate, return full string |
+| `model.dispose()` | Release all GPU resources |
+
+### Swift
+
+| Method | Description |
+|--------|-------------|
+| `BitNet.load(source, options:)` | Load a GGUF model (async) |
+| `BitNet.loadSync(source, options:)` | Load a GGUF model (blocking) |
+| `model.generate(prompt, options:)` | Stream tokens as `AsyncThrowingStream<String, Error>` |
+| `model.chat(messages, options:)` | Chat with streaming via `AsyncThrowingStream` |
+| `model.dispose()` | Release all GPU resources (also called by `deinit`) |
 
-Full details in the [API Reference](docs/api-reference.md).
+### Java
+
+| Method | Description |
+|--------|-------------|
+| `BitNet.loadSync(source, options?)` | Load a GGUF model |
+| `model.chat(messages, callback, options?)` | Chat with streaming callback |
+| `model.generate(prompt, callback, options?)` | Generate with streaming callback |
+| `model.dispose()` / `model.close()` | Release all GPU resources (AutoCloseable) |
+
+### C / FFI
+
+| Function | Description |
+|----------|-------------|
+| `oxbitnet_load(source, options)` | Load a GGUF model, returns opaque handle |
+| `oxbitnet_chat(model, messages, n, opts, cb, ud)` | Chat with streaming callback |
+| `oxbitnet_generate(model, prompt, opts, cb, ud)` | Generate with streaming callback |
+| `oxbitnet_free(model)` | Release all GPU resources |
+| `oxbitnet_error_message()` | Get last error (thread-local) |
 
 ## Platform Support
 
@@ -114,9 +251,13 @@ Full details in the [API Reference](docs/api-reference.md).
 - Firefox Nightly (behind flag)
 - Safari 18+
 
-**Native:**
+**Native (Rust / Python):**
+- Uses [wgpu](https://wgpu.rs/) — Vulkan, Metal, DX12 backends automatically
+- No browser or WebGPU runtime needed
+
+**Native (Node.js / Deno):**
 - Deno (built-in WebGPU)
-- Node.js with [wgpu](https://github.com/gfx-rs/wgpu) or [Dawn](https://dawn.googlesource.com/dawn) bindings
+- Node.js with [`webgpu`](https://www.npmjs.com/package/webgpu) npm package (Dawn bindings) — see [Node.js CLI example](examples/node-cli/)
 - Any runtime exposing the WebGPU API (e.g., wgpu-native, Electron)
 
 A dedicated GPU with sufficient VRAM is required (see [Supported Models](#supported-models) for estimates).
@@ -131,34 +272,102 @@ A WebGPU-powered chat application. Downloads the model on first visit, then runs
 
 An offline-ready summarization widget. Provides LLM-powered TL;DR without any network dependency.
 
+### [Node.js CLI](examples/node-cli/)
+
+Run BitNet from the command line using Node.js and the [`webgpu`](https://www.npmjs.com/package/webgpu) npm package (Dawn bindings). Interactive chat with streaming output and tok/s metrics.
+
+```bash
+cd examples/node-cli
+npm install && npm start
+```
+
+### Rust CLI
+
+Interactive chat using native wgpu.
+
+```bash
+cd packages/rust
+cargo run --example chat --release
+```
+
+### Python CLI
+
+Interactive chat via Python bindings.
+
+```bash
+pip install oxbitnet
+python packages/rust/crates/oxbitnet-python/examples/chat.py
+```
+
+### Swift CLI
+
+Minimal Swift chat example wrapping the C FFI layer.
+
+```bash
+cd packages/rust
+cargo build -p oxbitnet-ffi --release
+cd crates/oxbitnet-swift
+swift run -Xlinker -L../../../../target/release Chat model.gguf "Hello!"
+```
+
+### Java CLI
+
+Minimal Java chat example using JNI bindings.
+
+```bash
+cd packages/rust
+cargo build -p oxbitnet-java --release
+cd crates/oxbitnet-java/examples
+javac -cp ../java/src/main/java:. Chat.java
+java -Djava.library.path=../../../../target/release -cp ../java/src/main/java:. Chat model.gguf "Hello!"
+```
+
+### C CLI
+
+Minimal C example using the FFI bindings.
+
+```bash
+cd packages/rust
+cargo build -p oxbitnet-ffi --release
+gcc crates/oxbitnet-ffi/examples/chat.c -Icrates/oxbitnet-ffi -Ltarget/release -loxbitnet_ffi -o chat
+LD_LIBRARY_PATH=target/release ./chat model.gguf "Hello!"
+```
+
 ## Architecture
 
 ```
 0xbitnet/
-├── packages/core/          # WGSL kernels + TypeScript API (npm: 0xbitnet)
-│   └── src/
-│       ├── gpu/            # WebGPU device init, buffer pool
-│       ├── model/          # GGUF/Safetensors parser, weight loader, config
-│       ├── nn/             # Transformer layers, attention, BitLinear
-│       ├── shaders/        # WGSL compute shaders
-│       ├── tokenizer/      # BPE tokenizer, chat templates
-│       └── worker/         # Worker thread support
+├── packages/
+│   ├── core/               # WGSL kernels + TypeScript API (npm: 0xbitnet)
+│   │   └── src/
+│   │       ├── gpu/        # WebGPU device init, buffer pool
+│   │       ├── model/      # GGUF parser, weight loader, config
+│   │       ├── nn/         # Transformer layers, attention, BitLinear
+│   │       ├── shaders/    # 12 WGSL compute shaders (shared with Rust)
+│   │       └── tokenizer/  # BPE tokenizer, chat templates
+│   └── rust/               # Rust + Python bindings
+│       └── crates/
+│           ├── oxbitnet/           # Rust library (crates.io: oxbitnet)
+│           ├── oxbitnet-python/    # Python bindings via PyO3 (PyPI: oxbitnet)
+│           ├── oxbitnet-swift/     # Swift bindings via C FFI (SPM package)
+│           ├── oxbitnet-java/      # Java/JNI bindings (Android-ready)
+│           └── oxbitnet-ffi/       # C FFI bindings (cdylib + staticlib)
 ├── examples/
 │   ├── web-chat/           # Chat app demo (Vite)
-│   └── tl-dr-widget/       # Offline TL;DR widget demo (Vite)
-└── docs/                   # Documentation
-    ├── getting-started.md
-    ├── api-reference.md
-    ├── architecture.md
-    └── model-compatibility.md
+│   ├── tl-dr-widget/       # Offline TL;DR widget demo (Vite)
+│   └── node-cli/           # Node.js CLI using Dawn WebGPU bindings
+└── docs/
 ```
 
 See [Architecture](docs/architecture.md) for data flow and internals.
 
 ## Prerequisites
 
-- Node.js 18+
-- A WebGPU-capable environment (see [Platform Support](#platform-support))
+- **TypeScript/JS**: Node.js 18+, a WebGPU-capable environment
+- **Rust**: Rust 1.75+, a Vulkan/Metal/DX12-capable GPU
+- **Swift**: Swift 5.9+, a Vulkan/Metal/DX12-capable GPU
+- **Java**: JDK 17+, a Vulkan/Metal/DX12-capable GPU
+- **Python**: Python 3.9+, `pip install oxbitnet`
 
 ## Contributing
 

diff --git a/packages/core/src/model/loader.ts b/packages/core/src/model/loader.ts
@@ -318,12 +318,13 @@ function configFromGGUFMetadata(
     (metadata["general.architecture"] as string) ?? "bitnet";
 
   // Try architecture-prefixed keys, then common fallbacks
-  // The BitNet 2B-4T GGUF uses arch "bitnet-25"
+  const isBitNet = arch.startsWith("bitnet");
   function get(suffix: string): unknown {
     return metadata[`${arch}.${suffix}`]
       ?? metadata[`llama.${suffix}`]
       ?? metadata[`bitnet.${suffix}`]
-      ?? metadata[`bitnet-25.${suffix}`];
+      ?? metadata[`bitnet-25.${suffix}`]
+      ?? metadata[`bitnet-b1.58.${suffix}`];
   }
 
   const hiddenSize = (get("embedding_length") as number) ?? 2560;
@@ -337,9 +338,9 @@ function configFromGGUFMetadata(
     128256;
   const intermediateSize = (get("feed_forward_length") as number) ?? 6912;
 
-  // Only BitNet 2B-4T (arch="bitnet-25") and derivatives use relu²
-  const activation = arch === "bitnet-25" ? "relu2" : "silu";
-  const ropeTheta = (get("rope.freq_base") as number) ?? (arch === "bitnet-25" ? 500000.0 : 10000.0);
+  // All BitNet variants (arch="bitnet-b1.58", "bitnet-25", etc.) use relu²
+  const activation = isBitNet ? "relu2" : "silu";
+  const ropeTheta = (get("rope.freq_base") as number) ?? (isBitNet ? 500000.0 : 10000.0);
 
   return {
     modelType: "bitnet",