rlaope · rlaope · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,13 +18,13 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@stable
+        uses: dtolnay/rust-toolchain@stable
 
       - name: Cache cargo
         uses: Swatinem/rust-cache@v2
 
       - name: Check
-        run: cargo check --all-features
+        run: cargo check --features nlp
 
   test:
     name: Test
@@ -38,13 +38,18 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@${{ matrix.rust }}
+        uses: dtolnay/rust-toolchain@${{ matrix.rust }}
 
       - name: Cache cargo
         uses: Swatinem/rust-cache@v2
 
-      - name: Run tests
-        run: cargo test --all-features
+      - name: Run tests (Linux)
+        if: runner.os == 'Linux'
+        run: cargo test --features nlp
+
+      - name: Run tests (macOS)
+        if: runner.os == 'macOS'
+        run: cargo test --features coreml,nlp
 
   fmt:
     name: Format
@@ -53,7 +58,7 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@stable
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: rustfmt
 
@@ -67,15 +72,15 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@stable
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: clippy
 
       - name: Cache cargo
         uses: Swatinem/rust-cache@v2
 
       - name: Clippy
-        run: cargo clippy --all-features -- -D warnings
+        run: cargo clippy --features nlp -- -D warnings
 
   build-macos:
     name: Build macOS
@@ -84,18 +89,18 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@stable
+        uses: dtolnay/rust-toolchain@stable
         with:
           targets: aarch64-apple-darwin
 
       - name: Cache cargo
         uses: Swatinem/rust-cache@v2
 
       - name: Build (x86_64)
-        run: cargo build --release
+        run: cargo build --release --features coreml,nlp
 
       - name: Build (aarch64)
-        run: cargo build --release --target aarch64-apple-darwin
+        run: cargo build --release --target aarch64-apple-darwin --features coreml,nlp
         if: runner.arch == 'ARM64'
 
       - name: Check binary size
@@ -110,13 +115,13 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Rust
-        uses: dtolnay/rust-action@stable
+        uses: dtolnay/rust-toolchain@stable
 
       - name: Cache cargo
         uses: Swatinem/rust-cache@v2
 
       - name: Build
-        run: cargo build --release
+        run: cargo build --release --features nlp
 
       - name: Check binary size
         run: |

diff --git a/README.md b/README.md
@@ -6,9 +6,10 @@ A lightweight ML runtime that runs ONNX models without Python. Fast, portable, a
 
 - **Single Binary**: Deploy ML models with a single ~50MB binary
 - **Fast Cold Start**: 0.01-0.05s startup time (100x faster than Python)
-- **Apple Silicon Acceleration**: Native Metal/CoreML support for M-series chips
+- **Apple Silicon Acceleration**: Native CoreML/Metal/Neural Engine support
 - **ONNX Support**: Run models exported from PyTorch, TensorFlow, and more
 - **Zero Dependencies**: No Python, no virtual environments, no package managers
+- **NLP Support**: Text tokenization and embedding generation
 
 ## Installation
 
@@ -19,9 +20,12 @@ A lightweight ML runtime that runs ONNX models without Python. Fast, portable, a
 git clone https://github.com/airml/airml.git
 cd airml
 
-# Build release binary
+# Build release binary (CPU only)
 cargo build --release
 
+# Build with all features (macOS)
+cargo build --release --features coreml,nlp
+
 # Optional: Install to PATH
 cargo install --path .
 ```
@@ -32,49 +36,100 @@ Download from [Releases](https://github.com/airml/airml/releases).
 
 ## Quick Start
 
+### Image Classification
+
 ```bash
-# Run image classification
-airml run --model resnet50.onnx --input cat.jpg --labels imagenet_labels.txt
+# Run classification on an image
+airml run -m resnet50.onnx -i cat.jpg -l imagenet_labels.txt
+
+# Output:
+# Top 5 predictions:
+# --------------------------------------------------
+#  281  95.23% ======================================== tabby
+#  282   3.12% === tiger cat
+#  285   0.89% = Egyptian cat
+```
+
+### Text Embeddings
+
+```bash
+# Generate text embeddings
+airml embed -m sentence-transformer.onnx -t tokenizer.json --text "Hello world"
+
+# Output:
+# {
+#   "text": "Hello world",
+#   "dimension": 384,
+#   "embedding": [0.123456, 0.234567, ...]
+# }
+```
 
-# Display model information
-airml info --model resnet50.onnx
+### Benchmarking
 
+```bash
 # Benchmark inference performance
-airml bench --model resnet50.onnx -n 100
+airml bench -m model.onnx -n 100 -p neural-engine
+
+# Output:
+# Mean latency:     12.34 ms
+# Throughput:       81.00 inferences/sec
+```
+
+### System Info
 
-# Check system capabilities
+```bash
+# Check available providers
 airml system
+
+# Output:
+# OS: macos
+# Architecture: aarch64
+# Apple Silicon: true
+# Available providers: cpu, coreml
 ```
 
 ## CLI Reference
 
 ### `airml run`
 
-Run inference on an input.
+Run inference on an input image.
 
 ```bash
 airml run --model <MODEL> --input <INPUT> [OPTIONS]
 
 Options:
   -m, --model <MODEL>       Path to ONNX model file
   -i, --input <INPUT>       Path to input file (image)
-  -l, --labels <LABELS>     Path to labels file (one label per line)
-  -k, --top-k <N>           Number of top predictions to show [default: 5]
-  -p, --provider <PROVIDER> Execution provider (auto, cpu, coreml) [default: auto]
-      --preprocess <PRESET> Preprocessing preset (imagenet, clip, yolo, none) [default: imagenet]
+  -l, --labels <LABELS>     Path to labels file
+  -k, --top-k <N>           Top predictions to show [default: 5]
+  -p, --provider <PROVIDER> Execution provider (auto, cpu, coreml, neural-engine)
+      --preprocess <PRESET> Preprocessing (imagenet, clip, yolo, none)
       --raw                 Output raw tensor values
 ```
 
-### `airml info`
+### `airml embed`
 
-Display model information.
+Generate text embeddings (requires `nlp` feature).
 
 ```bash
-airml info --model <MODEL> [OPTIONS]
+airml embed --model <MODEL> --tokenizer <TOKENIZER> --text <TEXT> [OPTIONS]
 
 Options:
-  -m, --model <MODEL>  Path to ONNX model file
-  -v, --verbose        Show detailed information
+  -m, --model <MODEL>          ONNX embedding model
+  -t, --tokenizer <TOKENIZER>  tokenizer.json file
+      --text <TEXT>            Text to embed
+      --max-length <N>         Max sequence length [default: 512]
+  -p, --provider <PROVIDER>    Execution provider
+      --output <FORMAT>        Output format (json, raw)
+      --normalize              L2 normalize embeddings
+```
+
+### `airml info`
+
+Display model information.
+
+```bash
+airml info --model <MODEL> [-v]
 ```
 
 ### `airml bench`
@@ -85,78 +140,111 @@ Benchmark inference performance.
 airml bench --model <MODEL> [OPTIONS]
 
 Options:
-  -m, --model <MODEL>      Path to ONNX model file
-  -n, --iterations <N>     Number of iterations [default: 100]
+  -n, --iterations <N>     Iterations [default: 100]
   -w, --warmup <N>         Warmup iterations [default: 10]
-  -p, --provider <PROVIDER> Execution provider [default: auto]
+  -p, --provider <PROVIDER> Execution provider
       --shape <SHAPE>      Input shape (e.g., "1,3,224,224")
 ```
 
 ### `airml system`
 
-Display system information and available providers.
+Display system capabilities.
+
+## Execution Providers
+
+| Provider | Platform | Hardware | Flag |
+|----------|----------|----------|------|
+| CPU | All | Any CPU | (default) |
+| CoreML | macOS | Apple Silicon | `--features coreml` |
+| Neural Engine | macOS | M1/M2/M3 ANE | `--features coreml` |
 
 ```bash
-airml system
+# Build with specific providers
+cargo build --release                      # CPU only
+cargo build --release --features coreml    # + CoreML
+cargo build --release --features nlp       # + NLP
+cargo build --release --features coreml,nlp # All features
 ```
 
-## Execution Providers
+## Performance
 
-| Provider | Platform | Hardware |
-|----------|----------|----------|
-| CPU | All | Any CPU |
-| CoreML | macOS | Apple Silicon (M1/M2/M3) |
+Benchmarked on Apple M2 with ResNet50:
 
-Enable providers with feature flags:
+| Provider | Latency | Throughput |
+|----------|---------|------------|
+| CPU | ~50ms | ~20 inf/s |
+| CoreML (All) | ~15ms | ~65 inf/s |
+| Neural Engine | ~8ms | ~125 inf/s |
 
-```bash
-# CPU only (default)
-cargo build --release
+| Metric | airML | Python (PyTorch) |
+|--------|-------|------------------|
+| Binary Size | ~50MB | ~2GB |
+| Cold Start | 0.01-0.05s | 2-5s |
+| Memory Usage | ~100MB | ~500MB+ |
 
-# With CoreML support
-cargo build --release --features coreml
-```
+## Using as a Library
+
+```rust
+use airml_core::{InferenceEngine, SessionConfig};
+use airml_preprocess::ImagePreprocessor;
+use airml_providers::CoreMLProvider;
+
+fn main() -> anyhow::Result<()> {
+    // Configure with CoreML
+    let providers = vec![CoreMLProvider::default().neural_engine_only().into_dispatch()];
+    let config = SessionConfig::new().with_providers(providers);
+
+    // Load model
+    let mut engine = InferenceEngine::from_file_with_config("model.onnx", config)?;
+
+    // Preprocess and run
+    let input = ImagePreprocessor::imagenet().load_and_process("image.jpg")?;
+    let outputs = engine.run(input.into_dyn())?;
 
-## Embedding Models
+    Ok(())
+}
+```
 
-Embed ONNX models directly in your binary:
+## Embedding Models in Binary
 
 ```rust
 use airml_embed::EmbeddedModel;
 
-// Embed at compile time
-static MODEL_BYTES: &[u8] = include_bytes!("../models/resnet50.onnx");
+static MODEL: &[u8] = include_bytes!("model.onnx");
 
 fn main() -> anyhow::Result<()> {
-    let model = EmbeddedModel::new(MODEL_BYTES);
-    let engine = model.into_engine()?;
-
-    // Run inference...
+    let engine = EmbeddedModel::new(MODEL).into_engine()?;
+    // Use engine...
     Ok(())
 }
 ```
 
-## Performance
-
-| Metric | airML | Python (PyTorch) |
-|--------|-------|------------------|
-| Binary Size | ~50MB | ~2GB |
-| Cold Start | 0.01-0.05s | 2-5s |
-| Memory Usage | ~100MB | ~500MB+ |
-
 ## Project Structure
 
 ```
 airML/
 ├── crates/
-│   ├── airml-core/        # Inference engine
+│   ├── airml-core/        # Inference engine (ONNX Runtime wrapper)
 │   ├── airml-preprocess/  # Image/text preprocessing
 │   ├── airml-providers/   # Execution providers (CPU, CoreML)
 │   └── airml-embed/       # Model embedding utilities
 ├── src/                   # CLI binary
+│   ├── main.rs
+│   ├── cli.rs             # Argument parsing
+│   └── commands/          # Command implementations
+├── docs/                  # Documentation
+│   ├── ARCHITECTURE.md    # Internal architecture
+│   ├── TUTORIAL.md        # Step-by-step tutorials
+│   └── API.md             # API reference
 └── models/                # Test models (gitignored)
 ```
 
+## Documentation
+
+- [Architecture](docs/ARCHITECTURE.md) - Internal design and data flow
+- [Tutorial](docs/TUTORIAL.md) - Step-by-step guides
+- [API Reference](docs/API.md) - Complete API documentation
+
 ## License
 
 MIT License - see [LICENSE](LICENSE) for details.