From 6cfd909b34712a6a1e977e8e7858f26356e96d9e Mon Sep 17 00:00:00 2001
From: SecretSettler <sean.jiang01@outlook.com>
Date: Wed, 25 Mar 2026 01:19:45 +0000
Subject: [PATCH 1/8] =?UTF-8?q?feat:=20native=20TypeScript=20OpenClaw=20pl?=
 =?UTF-8?q?ugin=20=E2=80=94=20zero=20external=20dependencies?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Complete rewrite of the OpenClaw plugin to run entirely in-process
without requiring the Python proxy server. Users just install and go:
  openclaw plugins install @contextpilot/openclaw-plugin

Engine modules ported from Python to TypeScript:
- engine/extract.ts: Document extraction from system/tool_results
  (XML tags, numbered lists, JSON results, markdown headers, separators)
  for both OpenAI Chat and Anthropic Messages API formats (1042→969 lines)
- engine/dedup.ts: Cross-turn block-level deduplication using
  content-defined chunking + SHA-256 hashing (250→355 lines)
- engine/cache-control.ts: Anthropic cache_control injection for
  system messages and tool_result content blocks (144 lines)
- engine/reorder.ts: Simplified LCP-based document reordering that
  maximizes prefix cache sharing across turns — no numpy/scipy needed

Plugin integration:
- Uses OpenClaw's wrapStreamFn to intercept requests before they
  reach the LLM backend, apply all optimizations, then forward
- Registers contextpilot provider with dynamic model resolution
- contextpilot_status tool reports engine state and savings
---
 openclaw-plugin/README.md                   | 105 +++
 openclaw-plugin/openclaw.plugin.json        |  42 +
 openclaw-plugin/package.json                |  31 +
 openclaw-plugin/src/engine/cache-control.ts | 144 +++
 openclaw-plugin/src/engine/dedup.ts         | 355 +++++++
 openclaw-plugin/src/engine/extract.ts       | 969 ++++++++++++++++++++
 openclaw-plugin/src/engine/reorder.ts       | 109 +++
 openclaw-plugin/src/index.ts                | 175 ++++
 openclaw-plugin/tsconfig.json               |  15 +
 9 files changed, 1945 insertions(+)
 create mode 100644 openclaw-plugin/README.md
 create mode 100644 openclaw-plugin/openclaw.plugin.json
 create mode 100644 openclaw-plugin/package.json
 create mode 100644 openclaw-plugin/src/engine/cache-control.ts
 create mode 100644 openclaw-plugin/src/engine/dedup.ts
 create mode 100644 openclaw-plugin/src/engine/extract.ts
 create mode 100644 openclaw-plugin/src/engine/reorder.ts
 create mode 100644 openclaw-plugin/src/index.ts
 create mode 100644 openclaw-plugin/tsconfig.json

diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md
new file mode 100644
index 0000000..851533c
--- /dev/null
+++ b/openclaw-plugin/README.md
@@ -0,0 +1,105 @@
+# @contextpilot/openclaw-plugin
+
+OpenClaw native plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context reuse. **Zero external dependencies** — no Python, no proxy server, just install and go.
+
+## What It Does
+
+ContextPilot optimizes every LLM request by:
+
+1. **Extracting** documents from system prompts and tool results
+2. **Reordering** documents for maximum prefix cache sharing across turns
+3. **Deduplicating** repeated content blocks with compact reference hints
+4. **Injecting** provider-specific cache control markers (Anthropic `cache_control`)
+
+All processing happens in-process inside the OpenClaw plugin — no external services needed.
+
+## Installation
+
+```bash
+openclaw plugins install @contextpilot/openclaw-plugin
+```
+
+## Configuration
+
+In `~/.openclaw/openclaw.json`:
+
+```json5
+{
+  plugins: {
+    entries: {
+      "contextpilot": {
+        enabled: true,
+        config: {
+          // "anthropic" (default) or "openai"
+          "backendProvider": "anthropic",
+          
+          // What to optimize: "all" (default), "system", or "tool_results"
+          "scope": "all"
+        }
+      }
+    }
+  }
+}
+```
+
+Set your API key:
+
+```bash
+export ANTHROPIC_API_KEY="sk-ant-xxx"
+# or
+export OPENAI_API_KEY="sk-xxx"
+```
+
+Then select a ContextPilot model (e.g., `contextpilot/claude-sonnet-4-6`) and start using OpenClaw.
+
+## Available Models
+
+### Anthropic backend (default)
+
+| Model ID | Name |
+|----------|------|
+| `contextpilot/claude-opus-4-6` | Claude Opus 4.6 (ContextPilot) |
+| `contextpilot/claude-sonnet-4-6` | Claude Sonnet 4.6 (ContextPilot) |
+
+### OpenAI backend
+
+| Model ID | Name |
+|----------|------|
+| `contextpilot/gpt-4o` | GPT-4o (ContextPilot) |
+| `contextpilot/gpt-4o-mini` | GPT-4o Mini (ContextPilot) |
+
+Any model ID works via dynamic resolution — use `contextpilot/<any-model-id>`.
+
+## How It Works
+
+```
+OpenClaw request
+  ↓
+ContextPilot Plugin (wrapStreamFn)
+  ├─ Extract documents from system/tool_results
+  ├─ Reorder for prefix cache sharing
+  ├─ Deduplicate repeated blocks
+  ├─ Inject cache_control markers
+  ↓
+Optimized request → LLM Backend (Anthropic/OpenAI)
+```
+
+The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercept requests before they reach the backend. All optimization is done in-process in TypeScript.
+
+## Agent Tool
+
+| Tool | Description |
+|------|-------------|
+| `contextpilot_status` | Check engine status, request count, and chars saved |
+
+## Scope Control
+
+| Scope | System Prompt | Tool Results |
+|:---:|:---:|:---:|
+| `all` (default) | Optimized | Optimized |
+| `system` | Optimized | Untouched |
+| `tool_results` | Untouched | Optimized |
+
+## License
+
+Apache-2.0
diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json
new file mode 100644
index 0000000..c1b5f9a
--- /dev/null
+++ b/openclaw-plugin/openclaw.plugin.json
@@ -0,0 +1,42 @@
+{
+  "id": "contextpilot",
+  "name": "ContextPilot",
+  "description": "Faster long-context inference via in-process context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing. No external dependencies.",
+  "version": "0.2.0",
+  "providers": ["contextpilot"],
+  "providerAuthEnvVars": {
+    "contextpilot": ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"]
+  },
+  "providerAuthChoices": [
+    {
+      "provider": "contextpilot",
+      "method": "api-key",
+      "choiceId": "contextpilot-api-key",
+      "choiceLabel": "Backend API key (Anthropic or OpenAI)",
+      "groupId": "contextpilot",
+      "groupLabel": "ContextPilot",
+      "cliFlag": "--anthropic-api-key",
+      "cliOption": "--anthropic-api-key <key>",
+      "cliDescription": "API key for the backend LLM provider",
+      "onboardingScopes": ["text-inference"]
+    }
+  ],
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "backendProvider": {
+        "type": "string",
+        "enum": ["anthropic", "openai"],
+        "description": "Backend LLM provider type",
+        "default": "anthropic"
+      },
+      "scope": {
+        "type": "string",
+        "enum": ["all", "system", "tool_results"],
+        "description": "Which messages ContextPilot optimizes",
+        "default": "all"
+      }
+    }
+  }
+}
diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json
new file mode 100644
index 0000000..58defc7
--- /dev/null
+++ b/openclaw-plugin/package.json
@@ -0,0 +1,31 @@
+{
+  "name": "@contextpilot/openclaw-plugin",
+  "version": "0.2.0",
+  "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.",
+  "type": "module",
+  "license": "Apache-2.0",
+  "author": "ContextPilot Contributors",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/EfficientContext/ContextPilot.git",
+    "directory": "openclaw-plugin"
+  },
+  "keywords": [
+    "openclaw",
+    "openclaw-plugin",
+    "contextpilot",
+    "kv-cache",
+    "context-reuse",
+    "prompt-cache",
+    "dedup",
+    "llm"
+  ],
+  "openclaw": {
+    "extensions": ["./src/index.ts"]
+  },
+  "files": [
+    "src/",
+    "openclaw.plugin.json",
+    "README.md"
+  ]
+}
diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts
new file mode 100644
index 0000000..53d48e7
--- /dev/null
+++ b/openclaw-plugin/src/engine/cache-control.ts
@@ -0,0 +1,144 @@
+export const MIN_CONTENT_LENGTH_FOR_CACHE = 1024;
+export const CACHE_CONTROL_EPHEMERAL = { type: 'ephemeral' } as const;
+
+type CacheControl = typeof CACHE_CONTROL_EPHEMERAL;
+
+interface TextBlock extends Record<string, unknown> {
+    type?: unknown;
+    text?: unknown;
+    cache_control?: CacheControl;
+}
+
+interface ToolResultBlock extends Record<string, unknown> {
+    type?: unknown;
+    content?: unknown;
+    cache_control?: CacheControl;
+}
+
+interface MessageBlock extends Record<string, unknown> {
+    role?: unknown;
+    content?: unknown;
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+    return typeof value === 'object' && value !== null;
+}
+
+function injectSystemCacheControl(
+    body: Record<string, unknown>,
+    cc: CacheControl
+): Record<string, unknown> {
+    const system = body.system;
+    if (system === undefined || system === null) {
+        return body;
+    }
+
+    if (typeof system === 'string') {
+        body.system = [{ type: 'text', text: system, cache_control: cc }];
+        return body;
+    }
+
+    if (Array.isArray(system) && system.length > 0) {
+        const lastBlock = system[system.length - 1];
+        if (isRecord(lastBlock)) {
+            lastBlock.cache_control = cc;
+        }
+    }
+
+    return body;
+}
+
+function maybeAddCacheControlToToolResult(block: ToolResultBlock, cc: CacheControl): void {
+    const toolResultContent = block.content ?? '';
+
+    if (typeof toolResultContent === 'string') {
+        if (toolResultContent.length >= MIN_CONTENT_LENGTH_FOR_CACHE) {
+            block.cache_control = cc;
+        }
+        return;
+    }
+
+    if (!Array.isArray(toolResultContent)) {
+        return;
+    }
+
+    const totalChars = toolResultContent.reduce((sum, inner) => {
+        if (!isRecord(inner) || inner.type !== 'text') {
+            return sum;
+        }
+        return sum + (typeof inner.text === 'string' ? inner.text.length : 0);
+    }, 0);
+
+    if (totalChars < MIN_CONTENT_LENGTH_FOR_CACHE || toolResultContent.length === 0) {
+        return;
+    }
+
+    let lastTextBlock: TextBlock | null = null;
+    for (let i = toolResultContent.length - 1; i >= 0; i -= 1) {
+        const inner = toolResultContent[i];
+        if (isRecord(inner) && inner.type === 'text') {
+            lastTextBlock = inner as TextBlock;
+            break;
+        }
+    }
+
+    if (lastTextBlock !== null) {
+        lastTextBlock.cache_control = cc;
+    }
+}
+
+function injectToolResultCacheControl(
+    body: Record<string, unknown>,
+    cc: CacheControl
+): Record<string, unknown> {
+    const messages = body.messages;
+    if (!Array.isArray(messages) || messages.length === 0) {
+        return body;
+    }
+
+    for (const msg of messages) {
+        if (!isRecord(msg)) {
+            continue;
+        }
+
+        const message = msg as MessageBlock;
+        if (message.role !== 'user' || !Array.isArray(message.content)) {
+            continue;
+        }
+
+        for (const block of message.content) {
+            if (!isRecord(block)) {
+                continue;
+            }
+            if (block.type !== 'tool_result' && block.type !== 'toolResult') {
+                continue;
+            }
+            maybeAddCacheControlToToolResult(block as ToolResultBlock, cc);
+        }
+    }
+
+    return body;
+}
+
+export function injectAnthropicCacheControl(body: Record<string, unknown>): Record<string, unknown> {
+    const copiedBody = structuredClone(body);
+    injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
+    injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
+    return copiedBody;
+}
+
+export function injectOpenAICacheControl(body: Record<string, unknown>): Record<string, unknown> {
+    // OpenAI prompt caching is automatic and prefix-based, so no explicit
+    // cache_control block injection is required at request construction time.
+    return body;
+}
+
+export function injectCacheControl(
+    body: Record<string, unknown>,
+    provider: 'anthropic' | 'openai'
+): Record<string, unknown> {
+    if (provider === 'anthropic') {
+        return injectAnthropicCacheControl(body);
+    }
+    return injectOpenAICacheControl(body);
+}
diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts
new file mode 100644
index 0000000..14d79f4
--- /dev/null
+++ b/openclaw-plugin/src/engine/dedup.ts
@@ -0,0 +1,355 @@
+import * as crypto from 'node:crypto';
+
+export const MIN_BLOCK_CHARS = 80;
+export const MIN_CONTENT_CHARS = 500;
+
+export const CHUNK_MODULUS = 13;
+export const CHUNK_MIN_LINES = 5;
+export const CHUNK_MAX_LINES = 40;
+
+export interface DedupResult {
+    blocksDeduped: number;
+    blocksTotal: number;
+    charsBefore: number;
+    charsAfter: number;
+    charsSaved: number;
+}
+
+export interface DedupOptions {
+    minBlockChars?: number;
+    minContentChars?: number;
+    chunkModulus?: number;
+}
+
+type SeenBlock = [number, string, number];
+
+interface OpenAIToolCall {
+    id?: string;
+    function?: {
+        name?: string;
+    };
+}
+
+interface OpenAIAssistantMessage {
+    role?: string;
+    tool_calls?: OpenAIToolCall[];
+}
+
+interface OpenAIToolMessage {
+    role?: string;
+    content?: string;
+    tool_call_id?: string;
+    name?: string;
+}
+
+interface ChatCompletionsBody {
+    messages?: OpenAIToolMessage[];
+}
+
+interface ResponsesFunctionCallItem {
+    type?: string;
+    call_id?: string;
+    name?: string;
+}
+
+interface ResponsesFunctionCallOutputItem {
+    type?: string;
+    call_id?: string;
+    output?: string;
+}
+
+interface ResponsesApiBody {
+    input?: ResponsesFunctionCallOutputItem[];
+}
+
+function emptyDedupResult(): DedupResult {
+    return {
+        blocksDeduped: 0,
+        blocksTotal: 0,
+        charsBefore: 0,
+        charsAfter: 0,
+        charsSaved: 0
+    };
+}
+
+export function hashString(str: string): number {
+    let h = 5381;
+    for (let i = 0; i < str.length; i++) {
+        h = ((h << 5) + h + str.charCodeAt(i)) & 0xFFFFFFFF;
+    }
+    return h >>> 0;
+}
+
+export function buildToolNameMapOpenai(messages: OpenAIAssistantMessage[]): Record<string, string> {
+    const mapping: Record<string, string> = {};
+    for (const msg of messages) {
+        if (!msg || typeof msg !== 'object' || msg.role !== 'assistant') {
+            continue;
+        }
+
+        for (const tc of msg.tool_calls || []) {
+            if (!tc || typeof tc !== 'object') {
+                continue;
+            }
+            const tcId = tc.id || '';
+            const fn = tc.function;
+            if (fn && typeof fn === 'object' && fn.name) {
+                mapping[tcId] = fn.name;
+            }
+        }
+    }
+    return mapping;
+}
+
+export function buildToolNameMapResponses(items: ResponsesFunctionCallItem[]): Record<string, string> {
+    const mapping: Record<string, string> = {};
+    for (const item of items) {
+        if (item && typeof item === 'object' && item.type === 'function_call') {
+            const callId = item.call_id || '';
+            const name = item.name || '';
+            if (callId && name) {
+                mapping[callId] = name;
+            }
+        }
+    }
+    return mapping;
+}
+
+export function contentDefinedChunking(
+    text: string,
+    chunkModulus: number = CHUNK_MODULUS
+): string[] {
+    const lines = text.split('\n');
+    if (lines.length <= CHUNK_MIN_LINES) {
+        return [text];
+    }
+
+    const blocks: string[] = [];
+    let current: string[] = [];
+
+    for (const line of lines) {
+        current.push(line);
+        const lineHash = hashString(line.trim()) & 0xFFFFFFFF;
+        const isBoundary = (
+            lineHash % chunkModulus === 0 && current.length >= CHUNK_MIN_LINES
+        ) || current.length >= CHUNK_MAX_LINES;
+
+        if (isBoundary) {
+            blocks.push(current.join('\n'));
+            current = [];
+        }
+    }
+
+    if (current.length > 0) {
+        if (blocks.length > 0 && current.length < CHUNK_MIN_LINES) {
+            blocks[blocks.length - 1] += `\n${current.join('\n')}`;
+        } else {
+            blocks.push(current.join('\n'));
+        }
+    }
+
+    return blocks;
+}
+
+export function hashBlock(block: string): string {
+    const normalized = block.trim();
+    return crypto.createHash('sha256').update(normalized, 'utf8').digest('hex').slice(0, 20);
+}
+
+export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptions = {}): DedupResult {
+    const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS;
+    const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS;
+    const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS;
+
+    const messages = body?.messages;
+    if (!Array.isArray(messages) || messages.length === 0) {
+        return emptyDedupResult();
+    }
+
+    const toolNames = buildToolNameMapOpenai(messages);
+    const seenBlocks = new Map<string, SeenBlock>();
+    const result = emptyDedupResult();
+
+    for (let idx = 0; idx < messages.length; idx++) {
+        const msg = messages[idx];
+        if (!msg || typeof msg !== 'object' || msg.role !== 'tool') {
+            continue;
+        }
+
+        const content = msg.content || '';
+        if (typeof content !== 'string' || content.length < minContentChars) {
+            continue;
+        }
+
+        const toolCallId = msg.tool_call_id || '';
+        const fnName = toolNames[toolCallId] || msg.name || 'tool';
+
+        const blocks = contentDefinedChunking(content, chunkModulus);
+        if (blocks.length < 2) {
+            for (const block of blocks) {
+                if (block.trim().length >= minBlockChars) {
+                    const h = hashBlock(block);
+                    if (!seenBlocks.has(h)) {
+                        seenBlocks.set(h, [idx, fnName, 0]);
+                    }
+                }
+            }
+            continue;
+        }
+
+        const newBlocks: string[] = [];
+        let dedupedInThis = 0;
+
+        for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+            const block = blocks[blockIdx];
+            if (block.trim().length < minBlockChars) {
+                newBlocks.push(block);
+                continue;
+            }
+
+            const h = hashBlock(block);
+            result.blocksTotal += 1;
+
+            const seen = seenBlocks.get(h);
+            if (seen && seen[0] !== idx) {
+                const origFn = seen[1];
+                const firstLine = block.trim().split('\n')[0].slice(0, 80);
+                const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`;
+                const charsSaved = block.length - ref.length;
+                if (charsSaved > 0) {
+                    newBlocks.push(ref);
+                    dedupedInThis += 1;
+                    result.blocksDeduped += 1;
+                } else {
+                    newBlocks.push(block);
+                }
+            } else {
+                if (!seen) {
+                    seenBlocks.set(h, [idx, fnName, blockIdx]);
+                }
+                newBlocks.push(block);
+            }
+        }
+
+        if (dedupedInThis > 0) {
+            const originalLen = content.length;
+            const newContent = newBlocks.join('\n\n');
+            msg.content = newContent;
+            const newLen = newContent.length;
+            result.charsBefore += originalLen;
+            result.charsAfter += newLen;
+            result.charsSaved += (originalLen - newLen);
+        } else {
+            for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+                const block = blocks[blockIdx];
+                if (block.trim().length >= minBlockChars) {
+                    const h = hashBlock(block);
+                    if (!seenBlocks.has(h)) {
+                        seenBlocks.set(h, [idx, fnName, blockIdx]);
+                    }
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+export function dedupResponsesApi(body: ResponsesApiBody, opts: DedupOptions = {}): DedupResult {
+    const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS;
+    const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS;
+    const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS;
+
+    const inputItems = body?.input;
+    if (!Array.isArray(inputItems) || inputItems.length === 0) {
+        return emptyDedupResult();
+    }
+
+    const fnNames = buildToolNameMapResponses(inputItems);
+    const seenBlocks = new Map<string, SeenBlock>();
+    const result = emptyDedupResult();
+
+    for (let idx = 0; idx < inputItems.length; idx++) {
+        const item = inputItems[idx];
+        if (!item || typeof item !== 'object' || item.type !== 'function_call_output') {
+            continue;
+        }
+
+        const output = item.output || '';
+        if (typeof output !== 'string' || output.length < minContentChars) {
+            continue;
+        }
+
+        const callId = item.call_id || '';
+        const fnName = fnNames[callId] || callId || 'tool';
+
+        const blocks = contentDefinedChunking(output, chunkModulus);
+        if (blocks.length < 2) {
+            for (const block of blocks) {
+                if (block.trim().length >= minBlockChars) {
+                    const h = hashBlock(block);
+                    if (!seenBlocks.has(h)) {
+                        seenBlocks.set(h, [idx, fnName, 0]);
+                    }
+                }
+            }
+            continue;
+        }
+
+        const newBlocks: string[] = [];
+        let dedupedInThis = 0;
+
+        for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+            const block = blocks[blockIdx];
+            if (block.trim().length < minBlockChars) {
+                newBlocks.push(block);
+                continue;
+            }
+
+            const h = hashBlock(block);
+            result.blocksTotal += 1;
+
+            const seen = seenBlocks.get(h);
+            if (seen && seen[0] !== idx) {
+                const origFn = seen[1];
+                const firstLine = block.trim().split('\n')[0].slice(0, 80);
+                const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`;
+                const charsSaved = block.length - ref.length;
+                if (charsSaved > 0) {
+                    newBlocks.push(ref);
+                    dedupedInThis += 1;
+                    result.blocksDeduped += 1;
+                } else {
+                    newBlocks.push(block);
+                }
+            } else {
+                if (!seen) {
+                    seenBlocks.set(h, [idx, fnName, blockIdx]);
+                }
+                newBlocks.push(block);
+            }
+        }
+
+        if (dedupedInThis > 0) {
+            const originalLen = output.length;
+            const newOutput = newBlocks.join('\n\n');
+            item.output = newOutput;
+            const newLen = newOutput.length;
+            result.charsBefore += originalLen;
+            result.charsAfter += newLen;
+            result.charsSaved += (originalLen - newLen);
+        } else {
+            for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+                const block = blocks[blockIdx];
+                if (block.trim().length >= minBlockChars) {
+                    const h = hashBlock(block);
+                    if (!seenBlocks.has(h)) {
+                        seenBlocks.set(h, [idx, fnName, blockIdx]);
+                    }
+                }
+            }
+        }
+    }
+
+    return result;
+}
diff --git a/openclaw-plugin/src/engine/extract.ts b/openclaw-plugin/src/engine/extract.ts
new file mode 100644
index 0000000..10b8c67
--- /dev/null
+++ b/openclaw-plugin/src/engine/extract.ts
@@ -0,0 +1,969 @@
+import * as crypto from 'crypto';
+
+/**
+ * HTTP Intercept Parser for ContextPilot
+ * 
+ * Pure parsing/extraction/reconstruction logic for intercepting LLM API requests.
+ * Extracts documents from system messages, supports reordering, and reconstructs
+ * the request body with reordered documents.
+ * 
+ * No server dependencies — independently testable.
+ */
+
+const _KNOWN_WRAPPER_TAGS = new Set(["documents", "contexts", "docs", "passages", "references", "files"]);
+const _KNOWN_ITEM_TAGS = new Set(["document", "context", "doc", "passage", "reference", "file"]);
+
+const _NUMBERED_RE = /\[(\d+)\]\s*/;
+const _SEPARATOR_PATTERNS = ["---", "==="];
+const _SINGLE_DOC_MIN_CHARS = 200;
+
+export interface InterceptConfig {
+    enabled: boolean;
+    mode: string;
+    tag: string;
+    separator: string;
+    alpha: number;
+    linkageMethod: string;
+    scope: string;
+}
+
+export interface ExtractionResult {
+    documents: string[];
+    prefix: string;
+    suffix: string;
+    mode: string;
+    wrapperTag: string;
+    itemTag: string;
+    separatorChar: string;
+    originalContent: string;
+    jsonItems: any[] | null;
+}
+
+export interface ToolResultLocation {
+    msgIndex: number;
+    blockIndex: number;      // -1 = content is string
+    innerBlockIndex: number; // For Anthropic nested content blocks
+}
+
+export interface SingleDocExtraction {
+    content: string;
+    contentHash: string;
+    toolCallId: string;
+}
+
+export class MultiExtractionResult {
+    systemExtraction: [ExtractionResult, number] | null = null;
+    toolExtractions: [ExtractionResult, ToolResultLocation][] = [];
+    singleDocExtractions: [SingleDocExtraction, ToolResultLocation][] = [];
+
+    get hasExtractions(): boolean {
+        return (
+            this.systemExtraction !== null ||
+            this.toolExtractions.length > 0 ||
+            this.singleDocExtractions.length > 0
+        );
+    }
+
+    get totalDocuments(): number {
+        let total = this.singleDocExtractions.length;
+        if (this.systemExtraction) {
+            total += this.systemExtraction[0].documents.length;
+        }
+        for (const [ext, _] of this.toolExtractions) {
+            total += ext.documents.length;
+        }
+        return total;
+    }
+}
+
+/**
+ * Parse X-ContextPilot-* headers into an InterceptConfig.
+ */
+export function parseInterceptHeaders(headers: Record<string, string>): InterceptConfig {
+    const get = (name: string, def: string = ""): string => {
+        const key = `x-contextpilot-${name}`;
+        for (const [k, v] of Object.entries(headers)) {
+            if (k.toLowerCase() === key) {
+                return v;
+            }
+        }
+        return def;
+    };
+
+    const enabledStr = get("enabled", "true").toLowerCase();
+    const enabled = !["false", "0", "no"].includes(enabledStr);
+
+    let scope = get("scope", "all").toLowerCase();
+    if (!["system", "tool_results", "all"].includes(scope)) {
+        scope = "all";
+    }
+
+    return {
+        enabled,
+        mode: get("mode", "auto").toLowerCase(),
+        tag: get("tag", "document").toLowerCase(),
+        separator: get("separator", "---"),
+        alpha: parseFloat(get("alpha", "0.001")) || 0.001,
+        linkageMethod: get("linkage", "average"),
+        scope
+    };
+}
+
+// ── Document extraction ─────────────────────────────────────────────────────
+
+function _escapeRegExp(string: string): string {
+    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
+}
+
+export function extractXmlTags(text: string, config: InterceptConfig): ExtractionResult | null {
+    let itemTagsToTry: string[] = [];
+    let wrapperTagsToTry: string[] = [];
+
+    if (config.mode === "xml_tag") {
+        itemTagsToTry.push(config.tag);
+        wrapperTagsToTry.push(config.tag + "s");
+        for (const t of _KNOWN_ITEM_TAGS) {
+            if (t !== config.tag) itemTagsToTry.push(t);
+        }
+        for (const t of _KNOWN_WRAPPER_TAGS) {
+            if (t !== config.tag + "s") wrapperTagsToTry.push(t);
+        }
+    } else {
+        itemTagsToTry = Array.from(_KNOWN_ITEM_TAGS);
+        wrapperTagsToTry = Array.from(_KNOWN_WRAPPER_TAGS);
+    }
+
+    for (const wrapperTag of wrapperTagsToTry) {
+        const wrapperPattern = new RegExp(`(<${wrapperTag}(?:\\s[^>]*)?>)(.*?)(</${wrapperTag}>)`, "s");
+        const wrapperMatch = wrapperPattern.exec(text);
+        if (!wrapperMatch) continue;
+
+        const innerText = wrapperMatch[2];
+        const prefix = text.substring(0, wrapperMatch.index);
+        const suffix = text.substring(wrapperMatch.index + wrapperMatch[0].length);
+
+        for (const itemTag of itemTagsToTry) {
+            const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)(</${itemTag}>)`, "gs");
+            let items: string[] = [];
+            while (true) {
+                const itemMatch = itemPattern.exec(innerText);
+                if (itemMatch === null) break;
+                items.push(itemMatch[2].trim());
+            }
+            if (items.length > 0) {
+                return {
+                    documents: items,
+                    prefix,
+                    suffix,
+                    mode: "xml_tag",
+                    wrapperTag,
+                    itemTag,
+                    separatorChar: "",
+                    originalContent: text,
+                    jsonItems: null
+                };
+            }
+        }
+    }
+
+    for (const itemTag of itemTagsToTry) {
+        const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)(</${itemTag}>)`, "gs");
+        const items: RegExpExecArray[] = [];
+        while (true) {
+            const match = itemPattern.exec(text);
+            if (match === null) break;
+            items.push(match);
+        }
+        
+        if (items.length >= 2) {
+            const firstStart = items[0].index;
+            const lastEnd = items[items.length - 1].index + items[items.length - 1][0].length;
+            return {
+                documents: items.map(m => m[2].trim()),
+                prefix: text.substring(0, firstStart),
+                suffix: text.substring(lastEnd),
+                mode: "xml_tag",
+                wrapperTag: "",
+                itemTag,
+                separatorChar: "",
+                originalContent: text,
+                jsonItems: null
+            };
+        }
+    }
+
+    return null;
+}
+
+export function extractNumbered(text: string, config: InterceptConfig): ExtractionResult | null {
+    const splits = text.split(_NUMBERED_RE);
+    if (splits.length < 4) {
+        return null;
+    }
+
+    const prefix = splits[0];
+    const documents: string[] = [];
+    let i = 1;
+    while (i + 1 < splits.length) {
+        const docText = splits[i + 1].trim();
+        if (docText) {
+            documents.push(docText);
+        }
+        i += 2;
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix,
+        suffix: "",
+        mode: "numbered",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+export function extractSeparator(text: string, config: InterceptConfig): ExtractionResult | null {
+    let sep = config.separator;
+    let parts: string[] = [];
+    let documents: string[] = [];
+    
+    if (config.mode === "auto") {
+        let found = false;
+        for (const candidate of _SEPARATOR_PATTERNS) {
+            const regex = new RegExp(`\\n${_escapeRegExp(candidate)}\\n`);
+            parts = text.split(regex);
+            if (parts.length >= 3) {
+                sep = candidate;
+                found = true;
+                break;
+            }
+        }
+        if (!found) return null;
+        documents = parts.map(p => p.trim()).filter(p => p);
+    } else {
+        const regex = new RegExp(`\\n${_escapeRegExp(sep)}\\n`);
+        parts = text.split(regex);
+        documents = parts.map(p => p.trim()).filter(p => p);
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix: "",
+        suffix: "",
+        mode: "separator",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: sep,
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+export function extractMarkdownHeaders(text: string, config: InterceptConfig): ExtractionResult | null {
+    const parts = text.split(/(?=^#{1,2}\s)/m);
+    if (!parts || parts.length === 0) return null;
+
+    let prefix = "";
+    const sections: string[] = [];
+    
+    for (const part of parts) {
+        const stripped = part.trim();
+        if (!stripped) continue;
+        
+        if (/^#{1,2}\s/.test(stripped)) {
+            sections.push(stripped);
+        } else {
+            prefix = part;
+        }
+    }
+
+    if (sections.length < 2) return null;
+
+    return {
+        documents: sections,
+        prefix,
+        suffix: "",
+        mode: "markdown_header",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+const _JSON_ID_KEYS = ["url", "path", "file", "filename", "uri", "href"];
+
+function _extractJsonId(item: any): string | null {
+    for (const key of _JSON_ID_KEYS) {
+        if (item && typeof item === "object" && key in item) {
+            const val = item[key];
+            if (typeof val === "string" && val.trim()) {
+                return val.trim();
+            }
+        }
+    }
+    return null;
+}
+
+export function extractJsonResults(text: string, config: InterceptConfig): ExtractionResult | null {
+    const stripped = text.trim();
+    if (!stripped.startsWith("{")) return null;
+    
+    let obj: any;
+    try {
+        obj = JSON.parse(stripped);
+    } catch (e) {
+        return null;
+    }
+
+    if (typeof obj !== "object" || obj === null) return null;
+    
+    const results = obj.results;
+    if (!Array.isArray(results) || results.length < 2) return null;
+
+    const documents: string[] = [];
+    for (const item of results) {
+        if (typeof item === "object" && item !== null) {
+            const docId = _extractJsonId(item);
+            if (docId !== null) {
+                documents.push(docId);
+            } else {
+                documents.push(JSON.stringify(item));
+            }
+        } else {
+            documents.push(JSON.stringify(item));
+        }
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix: "",
+        suffix: "",
+        mode: "json_results",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: results
+    };
+}
+
+export function extractDocuments(text: string, config: InterceptConfig): ExtractionResult | null {
+    if (config.mode === "xml_tag") {
+        return extractXmlTags(text, config);
+    } else if (config.mode === "numbered") {
+        return extractNumbered(text, config);
+    } else if (config.mode === "json_results") {
+        return extractJsonResults(text, config);
+    } else if (config.mode === "separator") {
+        return extractSeparator(text, config);
+    } else if (config.mode === "markdown_header") {
+        return extractMarkdownHeaders(text, config);
+    } else {
+        let result = extractXmlTags(text, config);
+        if (result) return result;
+        result = extractNumbered(text, config);
+        if (result) return result;
+        result = extractJsonResults(text, config);
+        if (result) return result;
+        return null;
+    }
+}
+
+// ── Reconstruction ───────────────────────────────────────────────────────────
+
+export function reconstructContent(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    if (extraction.mode === "xml_tag") {
+        return reconstructXml(extraction, reorderedDocs);
+    } else if (extraction.mode === "numbered") {
+        return reconstructNumbered(extraction, reorderedDocs);
+    } else if (extraction.mode === "json_results") {
+        return reconstructJsonResults(extraction, reorderedDocs);
+    } else if (extraction.mode === "separator") {
+        return reconstructSeparator(extraction, reorderedDocs);
+    } else if (extraction.mode === "markdown_header") {
+        return reconstructMarkdownHeaders(extraction, reorderedDocs);
+    } else {
+        return extraction.originalContent;
+    }
+}
+
+export function reconstructXml(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const itemTag = extraction.itemTag;
+    const items = reorderedDocs.map(doc => `<${itemTag}>${doc}</${itemTag}>`).join("\n");
+
+    let block: string;
+    if (extraction.wrapperTag) {
+        const wrapper = extraction.wrapperTag;
+        block = `<${wrapper}>\n${items}\n</${wrapper}>`;
+    } else {
+        block = items;
+    }
+
+    return extraction.prefix + block + extraction.suffix;
+}
+
+export function reconstructNumbered(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const parts = extraction.prefix ? [extraction.prefix] : [];
+    for (let i = 0; i < reorderedDocs.length; i++) {
+        parts.push(`[${i + 1}] ${reorderedDocs[i]}`);
+    }
+    let result = parts.length > 0 ? parts.join("\n") : "";
+    if (extraction.suffix) {
+        result += extraction.suffix;
+    }
+    return result;
+}
+
+export function reconstructJsonResults(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const obj = JSON.parse(extraction.originalContent);
+    if (extraction.jsonItems !== null) {
+        const origDocs = extraction.documents;
+        const docToIndices: Record<string, number[]> = {};
+        for (let i = 0; i < origDocs.length; i++) {
+            if (!docToIndices[origDocs[i]]) {
+                docToIndices[origDocs[i]] = [];
+            }
+            docToIndices[origDocs[i]].push(i);
+        }
+        
+        const used = new Set<number>();
+        const reorderedItems: any[] = [];
+        for (const doc of reorderedDocs) {
+            const indices = docToIndices[doc] || [];
+            for (const idx of indices) {
+                if (!used.has(idx)) {
+                    reorderedItems.push(extraction.jsonItems[idx]);
+                    used.add(idx);
+                    break;
+                }
+            }
+        }
+        obj.results = reorderedItems;
+    } else {
+        obj.results = reorderedDocs.map(doc => JSON.parse(doc));
+    }
+    return JSON.stringify(obj, null, 2);
+}
+
+export function reconstructSeparator(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const sep = extraction.separatorChar || "---";
+    return reorderedDocs.join(`\n${sep}\n`);
+}
+
+export function reconstructMarkdownHeaders(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const parts: string[] = [];
+    if (extraction.prefix.trim()) {
+        parts.push(extraction.prefix.trimEnd());
+    }
+    parts.push(...reorderedDocs);
+    return parts.join("\n\n");
+}
+
+// ── OpenAI Chat format ──────────────────────────────────────────────────────
+
+export function extractFromOpenaiChat(body: any, config: InterceptConfig): [ExtractionResult, number] | null {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return null;
+
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "system") continue;
+        
+        const content = msg.content || "";
+        if (typeof content === "string") {
+            const result = extractDocuments(content, config);
+            if (result) return [result, i];
+        } else if (Array.isArray(content)) {
+            for (const block of content) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    const result = extractDocuments(block.text || "", config);
+                    if (result) return [result, i];
+                }
+            }
+        }
+    }
+    return null;
+}
+
+export function reconstructOpenaiChat(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    systemMsgIndex: number
+): any {
+    const newBody = structuredClone(body);
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = newBody.messages[systemMsgIndex];
+
+    if (typeof msg.content === "string") {
+        msg.content = newContent;
+    } else if (Array.isArray(msg.content)) {
+        for (const block of msg.content) {
+            if (block && typeof block === "object" && block.type === "text") {
+                // Using dummy config since we just check if it was the block with documents
+                if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                    block.text = newContent;
+                    break;
+                }
+            }
+        }
+    }
+    return newBody;
+}
+
+// ── Anthropic Messages format ───────────────────────────────────────────────
+
+export function extractFromAnthropicMessages(body: any, config: InterceptConfig): ExtractionResult | null {
+    const system = body?.system;
+    if (system === undefined || system === null) return null;
+
+    if (typeof system === "string") {
+        return extractDocuments(system, config);
+    } else if (Array.isArray(system)) {
+        for (const block of system) {
+            if (block && typeof block === "object" && block.type === "text") {
+                const result = extractDocuments(block.text || "", config);
+                if (result) return result;
+            }
+        }
+    }
+    return null;
+}
+
+export function reconstructAnthropicMessages(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[]
+): any {
+    const newBody = structuredClone(body);
+    const newContent = reconstructContent(extraction, reorderedDocs);
+
+    if (typeof newBody.system === "string") {
+        newBody.system = newContent;
+    } else if (Array.isArray(newBody.system)) {
+        for (const block of newBody.system) {
+            if (block && typeof block === "object" && block.type === "text") {
+                if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                    block.text = newContent;
+                    break;
+                }
+            }
+        }
+    }
+    return newBody;
+}
+
+// ── Tool result extraction ─────────────────────────────────────────────────
+
+export function extractFromOpenaiToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [ExtractionResult, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "tool" && msg?.role !== "toolResult") continue;
+        
+        const content = msg.content || "";
+        if (typeof content === "string") {
+            const extraction = extractDocuments(content, config);
+            if (extraction && extraction.documents.length >= 2) {
+                results.push([extraction, { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }]);
+            }
+        } else if (Array.isArray(content)) {
+            for (let j = 0; j < content.length; j++) {
+                const block = content[j];
+                if (block && typeof block === "object" && block.type === "text") {
+                    const extraction = extractDocuments(block.text || "", config);
+                    if (extraction && extraction.documents.length >= 2) {
+                        results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]);
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+export function extractFromAnthropicToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [ExtractionResult, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "user") continue;
+        
+        const content = msg.content;
+        if (!Array.isArray(content)) continue;
+        
+        for (let j = 0; j < content.length; j++) {
+            const block = content[j];
+            if (!block || typeof block !== "object" || (block.type !== "tool_result" && block.type !== "toolResult")) continue;
+            
+            const trContent = block.content || "";
+            if (typeof trContent === "string") {
+                const extraction = extractDocuments(trContent, config);
+                if (extraction && extraction.documents.length >= 2) {
+                    results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]);
+                }
+            } else if (Array.isArray(trContent)) {
+                for (let k = 0; k < trContent.length; k++) {
+                    const inner = trContent[k];
+                    if (inner && typeof inner === "object" && inner.type === "text") {
+                        const extraction = extractDocuments(inner.text || "", config);
+                        if (extraction && extraction.documents.length >= 2) {
+                            results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: k }]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+// ── Tool result reconstruction ─────────────────────────────────────────────
+
+export function reconstructOpenaiToolResult(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    location: ToolResultLocation
+): void {
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = body.messages[location.msgIndex];
+    if (location.blockIndex === -1) {
+        msg.content = newContent;
+    } else {
+        msg.content[location.blockIndex].text = newContent;
+    }
+}
+
+export function reconstructAnthropicToolResult(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    location: ToolResultLocation
+): void {
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = body.messages[location.msgIndex];
+    const block = msg.content[location.blockIndex];
+    if (location.innerBlockIndex === -1) {
+        block.content = newContent;
+    } else {
+        block.content[location.innerBlockIndex].text = newContent;
+    }
+}
+
+// ── Aggregate extraction ───────────────────────────────────────────────────
+
+export function extractAllOpenai(body: any, config: InterceptConfig): MultiExtractionResult {
+    const result = new MultiExtractionResult();
+    if (["system", "all"].includes(config.scope)) {
+        const sysResult = extractFromOpenaiChat(body, config);
+        if (sysResult) {
+            result.systemExtraction = sysResult;
+        }
+    }
+    if (["tool_results", "all"].includes(config.scope)) {
+        result.toolExtractions = extractFromOpenaiToolResults(body, config);
+        result.singleDocExtractions = extractSingleDocsFromOpenaiToolResults(body, config);
+    }
+    return result;
+}
+
+export function extractAllAnthropic(body: any, config: InterceptConfig): MultiExtractionResult {
+    const result = new MultiExtractionResult();
+    if (["system", "all"].includes(config.scope)) {
+        const sysExtraction = extractFromAnthropicMessages(body, config);
+        if (sysExtraction && sysExtraction.documents.length >= 2) {
+            result.systemExtraction = [sysExtraction, -1];
+        }
+    }
+    if (["tool_results", "all"].includes(config.scope)) {
+        result.toolExtractions = extractFromAnthropicToolResults(body, config);
+        result.singleDocExtractions = extractSingleDocsFromAnthropicToolResults(body, config);
+    }
+    return result;
+}
+
+// ── Single-document extraction (for cross-turn dedup) ─────────────────────
+
+function _makeSingleDoc(content: string, toolCallId: string = ""): SingleDocExtraction {
+    const stripped = content.trim();
+    const contentHash = crypto.createHash("sha256").update(stripped).digest("hex");
+    return {
+        content: stripped,
+        contentHash,
+        toolCallId
+    };
+}
+
+export function extractSingleDocsFromOpenaiToolResults(
+    body: any, config: InterceptConfig
+): [SingleDocExtraction, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [SingleDocExtraction, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "tool" && msg?.role !== "toolResult") continue;
+        
+        const toolCallId = msg.tool_call_id || "";
+        const content = msg.content || "";
+
+        if (typeof content === "string") {
+            const extraction = extractDocuments(content, config);
+            if (extraction && extraction.documents.length >= 2) continue;
+            
+            if (content.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                results.push([
+                    _makeSingleDoc(content, toolCallId),
+                    { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }
+                ]);
+            }
+        } else if (Array.isArray(content)) {
+            for (let j = 0; j < content.length; j++) {
+                const block = content[j];
+                if (!block || typeof block !== "object" || block.type !== "text") continue;
+                
+                const text = block.text || "";
+                const extraction = extractDocuments(text, config);
+                if (extraction && extraction.documents.length >= 2) continue;
+                
+                if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                    results.push([
+                        _makeSingleDoc(text, toolCallId),
+                        { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }
+                    ]);
+                }
+            }
+        }
+    }
+    return results;
+}
+
+export function extractSingleDocsFromAnthropicToolResults(
+    body: any, config: InterceptConfig
+): [SingleDocExtraction, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [SingleDocExtraction, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "user") continue;
+        
+        const content = msg.content;
+        if (!Array.isArray(content)) continue;
+        
+        for (let j = 0; j < content.length; j++) {
+            const block = content[j];
+            if (!block || typeof block !== "object") continue;
+            if (block.type !== "tool_result" && block.type !== "toolResult") continue;
+            
+            const toolUseId = block.tool_use_id || "";
+            const trContent = block.content || "";
+
+            if (typeof trContent === "string") {
+                const extraction = extractDocuments(trContent, config);
+                if (extraction && extraction.documents.length >= 2) continue;
+                
+                if (trContent.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                    results.push([
+                        _makeSingleDoc(trContent, toolUseId),
+                        { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }
+                    ]);
+                }
+            } else if (Array.isArray(trContent)) {
+                for (let k = 0; k < trContent.length; k++) {
+                    const inner = trContent[k];
+                    if (!inner || typeof inner !== "object" || inner.type !== "text") continue;
+                    
+                    const text = inner.text || "";
+                    const extraction = extractDocuments(text, config);
+                    if (extraction && extraction.documents.length >= 2) continue;
+                    
+                    if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                        results.push([
+                            _makeSingleDoc(text, toolUseId),
+                            { msgIndex: i, blockIndex: j, innerBlockIndex: k }
+                        ]);
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+// ── Single-document hint replacement ──────────────────────────────────────
+
+export function replaceSingleDocOpenai(
+    body: any, location: ToolResultLocation, hint: string
+): void {
+    const msg = body.messages[location.msgIndex];
+    if (location.blockIndex === -1) {
+        msg.content = hint;
+    } else {
+        msg.content[location.blockIndex].text = hint;
+    }
+}
+
+export function replaceSingleDocAnthropic(
+    body: any, location: ToolResultLocation, hint: string
+): void {
+    const msg = body.messages[location.msgIndex];
+    const block = msg.content[location.blockIndex];
+    if (location.innerBlockIndex === -1) {
+        block.content = hint;
+    } else {
+        block.content[location.innerBlockIndex].text = hint;
+    }
+}
+
+// ── Format handler abstraction ─────────────────────────────────────────────
+
+export interface FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult;
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void;
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void;
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void;
+    toolCallPresent(body: any, toolCallId: string): boolean;
+    targetPath(): string;
+    cacheSystem(body: any): any;
+    restoreSystem(body: any, cached: any): void;
+}
+
+export class OpenAIChatHandler implements FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult {
+        return extractAllOpenai(body, config);
+    }
+
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void {
+        const newContent = reconstructContent(extraction, docs);
+        const msg = body.messages[sysIdx];
+        if (typeof msg.content === "string") {
+            msg.content = newContent;
+        } else if (Array.isArray(msg.content)) {
+            for (const block of msg.content) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                        block.text = newContent;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void {
+        reconstructOpenaiToolResult(body, extraction, docs, location);
+    }
+
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void {
+        replaceSingleDocOpenai(body, location, hint);
+    }
+
+    toolCallPresent(body: any, toolCallId: string): boolean {
+        for (const msg of (body.messages || [])) {
+            if (msg.role === "tool" || msg.role === "toolResult") {
+                if (msg.tool_call_id === toolCallId) return true;
+            }
+        }
+        return false;
+    }
+
+    targetPath(): string {
+        return "/v1/chat/completions";
+    }
+
+    cacheSystem(body: any): any {
+        return null; // System prompt is inside messages array
+    }
+
+    restoreSystem(body: any, cached: any): void {
+        // No-op
+    }
+}
+
+export class AnthropicMessagesHandler implements FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult {
+        return extractAllAnthropic(body, config);
+    }
+
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void {
+        const newContent = reconstructContent(extraction, docs);
+        if (typeof body.system === "string") {
+            body.system = newContent;
+        } else if (Array.isArray(body.system)) {
+            for (const block of body.system) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                        block.text = newContent;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void {
+        reconstructAnthropicToolResult(body, extraction, docs, location);
+    }
+
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void {
+        replaceSingleDocAnthropic(body, location, hint);
+    }
+
+    toolCallPresent(body: any, toolCallId: string): boolean {
+        for (const msg of (body.messages || [])) {
+            if (msg.role === "user" && Array.isArray(msg.content)) {
+                for (const block of msg.content) {
+                    if (block && typeof block === "object" && 
+                        (block.type === "tool_result" || block.type === "toolResult") && 
+                        block.tool_use_id === toolCallId) {
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+    targetPath(): string {
+        return "/v1/messages";
+    }
+
+    cacheSystem(body: any): any {
+        return structuredClone(body.system);
+    }
+
+    restoreSystem(body: any, cached: any): void {
+        if (cached !== null && cached !== undefined) {
+            body.system = structuredClone(cached);
+        }
+    }
+}
+
+const _FORMAT_HANDLERS: Record<string, FormatHandler> = {
+    "openai_chat": new OpenAIChatHandler(),
+    "anthropic_messages": new AnthropicMessagesHandler()
+};
+
+export function getFormatHandler(apiFormat: string): FormatHandler {
+    return _FORMAT_HANDLERS[apiFormat] || _FORMAT_HANDLERS["openai_chat"];
+}
diff --git a/openclaw-plugin/src/engine/reorder.ts b/openclaw-plugin/src/engine/reorder.ts
new file mode 100644
index 0000000..9b4d90f
--- /dev/null
+++ b/openclaw-plugin/src/engine/reorder.ts
@@ -0,0 +1,109 @@
+import * as crypto from 'node:crypto';
+
+interface IndexedDoc {
+    doc: string;
+    hash: string;
+    originalIndex: number;
+    previousPosition: number;
+}
+
+function hashDoc(doc: string): string {
+    return crypto.createHash('sha256').update(doc.trim()).digest('hex').slice(0, 16);
+}
+
+function buildIndexMappings(entries: IndexedDoc[], total: number): [number[], number[]] {
+    const originalOrder = entries.map((entry) => entry.originalIndex);
+
+    const newOrder = new Array<number>(total);
+    for (let newIndex = 0; newIndex < entries.length; newIndex += 1) {
+        newOrder[entries[newIndex].originalIndex] = newIndex;
+    }
+
+    return [originalOrder, newOrder];
+}
+
+function indexDocuments(docs: string[]): IndexedDoc[] {
+    return docs.map((doc, originalIndex) => ({
+        doc,
+        hash: hashDoc(doc),
+        originalIndex,
+        previousPosition: Number.POSITIVE_INFINITY
+    }));
+}
+
+export function reorderDocuments(docs: string[]): [string[], number[], number[]] {
+    const indexed = indexDocuments(docs);
+    indexed.sort((a, b) => {
+        const byHash = a.hash.localeCompare(b.hash);
+        if (byHash !== 0) {
+            return byHash;
+        }
+        return a.originalIndex - b.originalIndex;
+    });
+
+    const reorderedDocs = indexed.map((entry) => entry.doc);
+    const [originalOrder, newOrder] = buildIndexMappings(indexed, docs.length);
+    return [reorderedDocs, originalOrder, newOrder];
+}
+
+export class ReorderState {
+    private previousOrder: string[] = [];
+
+    private hashToDoc: Map<string, string> = new Map();
+
+    reorder(docs: string[]): [string[], number[], number[]] {
+        const indexed = indexDocuments(docs);
+        const previousPositions = new Map<string, number>();
+
+        for (let i = 0; i < this.previousOrder.length; i += 1) {
+            const hash = this.previousOrder[i];
+            if (!previousPositions.has(hash)) {
+                previousPositions.set(hash, i);
+            }
+        }
+
+        const known: IndexedDoc[] = [];
+        const unknown: IndexedDoc[] = [];
+
+        for (const entry of indexed) {
+            const previousPosition = previousPositions.get(entry.hash);
+            if (previousPosition === undefined) {
+                unknown.push(entry);
+                continue;
+            }
+
+            known.push({ ...entry, previousPosition });
+        }
+
+        known.sort((a, b) => {
+            if (a.previousPosition !== b.previousPosition) {
+                return a.previousPosition - b.previousPosition;
+            }
+            return a.originalIndex - b.originalIndex;
+        });
+
+        unknown.sort((a, b) => {
+            const byHash = a.hash.localeCompare(b.hash);
+            if (byHash !== 0) {
+                return byHash;
+            }
+            return a.originalIndex - b.originalIndex;
+        });
+
+        const reordered = [...known, ...unknown];
+
+        this.previousOrder = reordered.map((entry) => entry.hash);
+        for (const entry of reordered) {
+            this.hashToDoc.set(entry.hash, entry.doc);
+        }
+
+        const reorderedDocs = reordered.map((entry) => entry.doc);
+        const [originalOrder, newOrder] = buildIndexMappings(reordered, docs.length);
+        return [reorderedDocs, originalOrder, newOrder];
+    }
+
+    reset(): void {
+        this.previousOrder = [];
+        this.hashToDoc.clear();
+    }
+}
diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts
new file mode 100644
index 0000000..58fd015
--- /dev/null
+++ b/openclaw-plugin/src/index.ts
@@ -0,0 +1,175 @@
+import { Type } from "@sinclair/typebox";
+import {
+  definePluginEntry,
+  type ProviderResolveDynamicModelContext,
+  type ProviderWrapStreamFnContext,
+} from "openclaw/plugin-sdk/plugin-entry";
+import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth";
+
+import { injectCacheControl } from "./engine/cache-control.js";
+import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js";
+import { getFormatHandler, type InterceptConfig } from "./engine/extract.js";
+import { ReorderState } from "./engine/reorder.js";
+
+const PROVIDER_ID = "contextpilot";
+
+export default definePluginEntry({
+  id: "contextpilot",
+  name: "ContextPilot",
+  description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.",
+  register: (api) => {
+    const config = {
+      backendProvider: api.pluginConfig?.backendProvider === "openai" ? "openai" : "anthropic",
+      scope: ["system", "tool_results", "all"].includes(String(api.pluginConfig?.scope))
+        ? String(api.pluginConfig?.scope)
+        : "all",
+    };
+
+    const reorderState = new ReorderState();
+    let requestCount = 0;
+    let totalCharsSaved = 0;
+
+    api.registerProvider({
+      id: PROVIDER_ID,
+      label: "ContextPilot",
+      docsPath: "/providers/contextpilot",
+      envVars: [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"],
+      auth: [
+        createProviderApiKeyAuthMethod({
+          providerId: PROVIDER_ID,
+          methodId: "api-key",
+          label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key",
+          hint: "API key for the backend LLM provider",
+          optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey",
+          flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key",
+          envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY",
+          promptMessage: "Enter your API key",
+          defaultModel:
+            config.backendProvider === "anthropic"
+              ? "contextpilot/claude-sonnet-4-6"
+              : "contextpilot/gpt-4o",
+        }),
+      ],
+      resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => {
+        const isAnthropic = config.backendProvider === "anthropic";
+        return {
+          id: ctx.modelId,
+          name: ctx.modelId,
+          provider: PROVIDER_ID,
+          baseUrl: isAnthropic ? "https://api.anthropic.com/v1" : "https://api.openai.com/v1",
+          api: isAnthropic ? "anthropic-messages" : "openai-completions",
+          reasoning: false,
+          input: ["text", "image"] as Array<"text" | "image">,
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+          contextWindow: 200000,
+          maxTokens: 16384,
+        };
+      },
+      wrapStreamFn: (ctx: ProviderWrapStreamFnContext) => {
+        const originalStreamFn = ctx.streamFn;
+        if (!originalStreamFn) return undefined;
+
+        return async (params) => {
+          const request = params as { body?: unknown };
+          if (!request.body) {
+            return originalStreamFn(params);
+          }
+
+          const body = structuredClone(request.body) as Record<string, unknown>;
+          const apiFormat = config.backendProvider === "anthropic"
+            ? "anthropic_messages"
+            : "openai_chat";
+
+          const interceptConfig: InterceptConfig = {
+            enabled: true,
+            mode: "auto",
+            tag: "document",
+            separator: "---",
+            alpha: 0.001,
+            linkageMethod: "average",
+            scope: config.scope,
+          };
+
+          const handler = getFormatHandler(apiFormat);
+          const multi = handler.extractAll(body, interceptConfig);
+
+          if (multi.systemExtraction) {
+            const [extraction, sysIdx] = multi.systemExtraction;
+            if (extraction.documents.length >= 2) {
+              const [reordered] = reorderState.reorder(extraction.documents);
+              handler.reconstructSystem(body, extraction, reordered, sysIdx);
+            }
+          }
+
+          for (const [extraction, location] of multi.toolExtractions) {
+            if (extraction.documents.length >= 2) {
+              const [reordered] = reorderState.reorder(extraction.documents);
+              handler.reconstructToolResult(body, extraction, reordered, location);
+            }
+          }
+
+          if (apiFormat === "openai_chat") {
+            const dedupResult = dedupChatCompletions(body);
+            totalCharsSaved += dedupResult.charsSaved;
+          }
+          if (body.input && Array.isArray(body.input)) {
+            const dedupResult = dedupResponsesApi(body);
+            totalCharsSaved += dedupResult.charsSaved;
+          }
+
+          const optimizedBody = injectCacheControl(
+            body,
+            config.backendProvider === "anthropic" ? "anthropic" : "openai",
+          );
+
+          requestCount++;
+
+          return originalStreamFn({
+            ...params,
+            body: optimizedBody,
+          });
+        };
+      },
+      augmentModelCatalog: () => {
+        const isAnthropic = config.backendProvider === "anthropic";
+        if (isAnthropic) {
+          return [
+            { id: "claude-opus-4-6", name: "Claude Opus 4.6 (ContextPilot)", provider: PROVIDER_ID },
+            {
+              id: "claude-sonnet-4-6",
+              name: "Claude Sonnet 4.6 (ContextPilot)",
+              provider: PROVIDER_ID,
+            },
+          ];
+        }
+        return [
+          { id: "gpt-4o", name: "GPT-4o (ContextPilot)", provider: PROVIDER_ID },
+          { id: "gpt-4o-mini", name: "GPT-4o Mini (ContextPilot)", provider: PROVIDER_ID },
+        ];
+      },
+    });
+
+    api.registerTool({
+      name: "contextpilot_status",
+      description: "Report ContextPilot engine state",
+      parameters: Type.Object({}),
+      async execute(_toolCallId: string, _params: unknown) {
+        return {
+          content: [
+            {
+              type: "text" as const,
+              text: [
+                "ContextPilot Engine Status:",
+                "  Mode: in-process (native TypeScript)",
+                `  Requests optimized: ${requestCount}`,
+                `  Total chars saved: ${totalCharsSaved.toLocaleString()}`,
+                `  Backend: ${config.backendProvider}`,
+                `  Scope: ${config.scope}`,
+              ].join("\n"),
+            },
+          ],
+        };
+      },
+    });
+  },
+});
diff --git a/openclaw-plugin/tsconfig.json b/openclaw-plugin/tsconfig.json
new file mode 100644
index 0000000..017a5f9
--- /dev/null
+++ b/openclaw-plugin/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "outDir": "dist",
+    "declaration": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}

From b96032478ede50e11840a795a07317c2c3bf8d6b Mon Sep 17 00:00:00 2001
From: SecretSettler <sean.jiang01@outlook.com>
Date: Wed, 25 Mar 2026 01:35:28 +0000
Subject: [PATCH 2/8] test: add comprehensive test suite for ContextPilot
 engine (38 tests)

Tests cover all four engine modules:
- extract (18 tests): XML/numbered/JSON extraction, OpenAI/Anthropic
  format handlers, system+tool_result extraction, reconstruction
- dedup (7 tests): content-defined chunking, block hashing,
  cross-message deduplication for chat+responses API formats
- cache-control (6 tests): Anthropic cache_control injection for
  system/tool_results, immutability, OpenAI no-op, dispatcher
- reorder (7 tests): deterministic hash sort, cross-turn prefix
  stability, reset behavior, index mapping correctness

All 38 tests pass in 18ms.
---
 openclaw-plugin/src/engine/engine.test.ts | 696 ++++++++++++++++++++++
 1 file changed, 696 insertions(+)
 create mode 100644 openclaw-plugin/src/engine/engine.test.ts

diff --git a/openclaw-plugin/src/engine/engine.test.ts b/openclaw-plugin/src/engine/engine.test.ts
new file mode 100644
index 0000000..dcf0cab
--- /dev/null
+++ b/openclaw-plugin/src/engine/engine.test.ts
@@ -0,0 +1,696 @@
+import { describe, expect, it } from "vitest";
+import {
+  injectAnthropicCacheControl,
+  injectCacheControl,
+  injectOpenAICacheControl,
+} from "./cache-control.js";
+import {
+  buildToolNameMapOpenai,
+  contentDefinedChunking,
+  dedupChatCompletions,
+  dedupResponsesApi,
+  hashBlock,
+} from "./dedup.js";
+import {
+  extractAllOpenai,
+  extractDocuments,
+  extractFromAnthropicMessages,
+  extractFromAnthropicToolResults,
+  extractFromOpenaiChat,
+  extractFromOpenaiToolResults,
+  extractSingleDocsFromOpenaiToolResults,
+  getFormatHandler,
+  parseInterceptHeaders,
+  reconstructAnthropicToolResult,
+  reconstructContent,
+  reconstructOpenaiToolResult,
+} from "./extract.js";
+import { ReorderState, reorderDocuments } from "./reorder.js";
+
+const DEFAULT_CONFIG = parseInterceptHeaders({});
+
+const OPENAI_CHAT_BODY = {
+  model: "claude-sonnet-4-6",
+  messages: [
+    {
+      role: "system",
+      content:
+        "<documents><document>Doc A content here</document><document>Doc B content here</document><document>Doc C content here</document></documents>",
+    },
+    { role: "user", content: "What do these docs say?" },
+  ],
+};
+
+const ANTHROPIC_MESSAGES_BODY = {
+  model: "claude-sonnet-4-6",
+  system:
+    "<documents><document>Doc A content here</document><document>Doc B content here</document></documents>",
+  messages: [{ role: "user", content: "Summarize the documents." }],
+};
+
+const LARGE_CONTENT = "x".repeat(600) + "\n".repeat(20) + "y".repeat(600);
+
+const DEDUP_BODY = {
+  messages: [
+    {
+      role: "assistant",
+      content: "",
+      tool_calls: [
+        { id: "call_1", function: { name: "read_file", arguments: "{}" } },
+        { id: "call_2", function: { name: "read_file", arguments: "{}" } },
+      ],
+    },
+    { role: "tool", tool_call_id: "call_1", content: LARGE_CONTENT },
+    { role: "tool", tool_call_id: "call_2", content: LARGE_CONTENT },
+  ],
+};
+
+function makeLargeContent(prefix: string): string {
+  return Array.from(
+    { length: 20 },
+    (_, i) => `${prefix} line ${i} ${"z".repeat(60)}`,
+  ).join("\n");
+}
+
+describe("extract", () => {
+  it("parseInterceptHeaders parses X-ContextPilot-* headers and defaults", () => {
+    const parsed = parseInterceptHeaders({
+      "X-ContextPilot-Enabled": "0",
+      "x-contextpilot-mode": "xml_tag",
+      "x-contextpilot-tag": "context",
+      "x-contextpilot-separator": "===",
+      "x-contextpilot-alpha": "0.05",
+      "x-contextpilot-linkage": "single",
+      "x-contextpilot-scope": "invalid",
+    });
+
+    expect(parsed).toEqual({
+      enabled: false,
+      mode: "xml_tag",
+      tag: "context",
+      separator: "===",
+      alpha: 0.05,
+      linkageMethod: "single",
+      scope: "all",
+    });
+
+    const defaults = parseInterceptHeaders({});
+    expect(defaults.enabled).toBe(true);
+    expect(defaults.mode).toBe("auto");
+    expect(defaults.tag).toBe("document");
+    expect(defaults.separator).toBe("---");
+    expect(defaults.alpha).toBe(0.001);
+    expect(defaults.linkageMethod).toBe("average");
+    expect(defaults.scope).toBe("all");
+  });
+
+  it("extractDocuments extracts XML-tagged documents", () => {
+    const text =
+      "<documents><document>A</document><document>B</document></documents>";
+    const extraction = extractDocuments(text, DEFAULT_CONFIG);
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("xml_tag");
+    expect(extraction?.documents).toEqual(["A", "B"]);
+    expect(extraction?.wrapperTag).toBe("documents");
+    expect(extraction?.itemTag).toBe("document");
+  });
+
+  it("extractDocuments extracts numbered documents", () => {
+    const extraction = extractDocuments(
+      "[1] First doc\n[2] Second doc",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("numbered");
+    expect(extraction?.documents).toEqual(["First doc", "Second doc"]);
+  });
+
+  it("extractDocuments extracts JSON results documents", () => {
+    const extraction = extractDocuments(
+      JSON.stringify({ results: [{ url: "a.com" }, { url: "b.com" }] }),
+      parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }),
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("json_results");
+    expect(extraction?.documents).toEqual(["a.com", "b.com"]);
+  });
+
+  it("extractDocuments auto mode resolves XML > numbered > JSON", () => {
+    const xml = extractDocuments(
+      "<documents><document>[1] one</document><document>[2] two</document></documents>",
+      DEFAULT_CONFIG,
+    );
+    expect(xml?.mode).toBe("xml_tag");
+
+    const numbered = extractDocuments("[1] one\n[2] two", DEFAULT_CONFIG);
+    expect(numbered?.mode).toBe("numbered");
+
+    const json = extractDocuments(
+      JSON.stringify({ results: [{ url: "one" }, { url: "two" }] }),
+      DEFAULT_CONFIG,
+    );
+    expect(json?.mode).toBe("json_results");
+  });
+
+  it("extractDocuments returns null for fewer than two docs", () => {
+    const numberedSingle = extractDocuments(
+      "[1] Only one",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(numberedSingle).toBeNull();
+
+    const jsonSingle = extractDocuments(
+      JSON.stringify({ results: [{ url: "only-one" }] }),
+      parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }),
+    );
+    expect(jsonSingle).toBeNull();
+  });
+
+  it("reconstructContent rebuilds XML while preserving tags", () => {
+    const extraction = extractDocuments(
+      "prefix<documents><document>A</document><document>B</document></documents>suffix",
+      DEFAULT_CONFIG,
+    );
+    expect(extraction).not.toBeNull();
+    if (!extraction) {
+      throw new Error("expected extraction");
+    }
+
+    const rebuilt = reconstructContent(extraction, ["B", "A"]);
+    expect(rebuilt).toContain("prefix");
+    expect(rebuilt).toContain("suffix");
+    expect(rebuilt).toContain("<documents>");
+    expect(rebuilt).toContain("<document>B</document>");
+    expect(rebuilt).toContain("<document>A</document>");
+  });
+
+  it("reconstructContent rebuilds numbered format", () => {
+    const extraction = extractDocuments(
+      "Lead\n[1] First\n[2] Second",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(extraction).not.toBeNull();
+    if (!extraction) {
+      throw new Error("expected extraction");
+    }
+
+    const rebuilt = reconstructContent(extraction, ["Second", "First"]);
+    expect(rebuilt).toContain("Lead");
+    expect(rebuilt).toContain("[1] Second");
+    expect(rebuilt).toContain("[2] First");
+  });
+
+  it("extractFromOpenaiChat extracts from system message", () => {
+    const extraction = extractFromOpenaiChat(OPENAI_CHAT_BODY, DEFAULT_CONFIG);
+    expect(extraction).not.toBeNull();
+    expect(extraction?.[1]).toBe(0);
+    expect(extraction?.[0].documents).toEqual([
+      "Doc A content here",
+      "Doc B content here",
+      "Doc C content here",
+    ]);
+  });
+
+  it("extractFromAnthropicMessages extracts from system string", () => {
+    const extraction = extractFromAnthropicMessages(
+      ANTHROPIC_MESSAGES_BODY,
+      DEFAULT_CONFIG,
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.documents).toEqual([
+      "Doc A content here",
+      "Doc B content here",
+    ]);
+  });
+
+  it("extractFromOpenaiToolResults extracts tool-result documents", () => {
+    const body = {
+      messages: [
+        { role: "tool", content: "<documents><document>A</document><document>B</document></documents>" },
+      ],
+    };
+    const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    expect(extractions[0]?.[0].documents).toEqual(["A", "B"]);
+    expect(extractions[0]?.[1]).toEqual({
+      msgIndex: 0,
+      blockIndex: -1,
+      innerBlockIndex: -1,
+    });
+  });
+
+  it("extractFromAnthropicToolResults extracts tool_result blocks", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              content:
+                "<documents><document>A</document><document>B</document></documents>",
+            },
+          ],
+        },
+      ],
+    };
+    const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    expect(extractions[0]?.[0].documents).toEqual(["A", "B"]);
+    expect(extractions[0]?.[1]).toEqual({
+      msgIndex: 0,
+      blockIndex: 0,
+      innerBlockIndex: -1,
+    });
+  });
+
+  it("FormatHandler OpenAI returns a working handler", () => {
+    const handler = getFormatHandler("openai_chat");
+    expect(handler.targetPath()).toBe("/v1/chat/completions");
+
+    const body = structuredClone(OPENAI_CHAT_BODY);
+    const all = handler.extractAll(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.hasExtractions).toBe(true);
+
+    if (!all.systemExtraction) {
+      throw new Error("expected system extraction");
+    }
+
+    handler.reconstructSystem(
+      body,
+      all.systemExtraction[0],
+      ["Doc C content here", "Doc B content here", "Doc A content here"],
+      all.systemExtraction[1],
+    );
+    expect(body.messages[0]?.content).toContain("Doc C content here");
+  });
+
+  it("FormatHandler Anthropic returns a working handler", () => {
+    const handler = getFormatHandler("anthropic_messages");
+    expect(handler.targetPath()).toBe("/v1/messages");
+
+    const body = structuredClone(ANTHROPIC_MESSAGES_BODY);
+    const all = handler.extractAll(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.hasExtractions).toBe(true);
+
+    if (!all.systemExtraction) {
+      throw new Error("expected system extraction");
+    }
+
+    handler.reconstructSystem(
+      body,
+      all.systemExtraction[0],
+      ["Doc B content here", "Doc A content here"],
+      all.systemExtraction[1],
+    );
+    expect(body.system).toContain("Doc B content here");
+  });
+
+  it("extractAllOpenai extracts from both system and tool results", () => {
+    const body = {
+      messages: [
+        {
+          role: "system",
+          content:
+            "<documents><document>Sys A</document><document>Sys B</document></documents>",
+        },
+        {
+          role: "tool",
+          content:
+            "<documents><document>Tool A</document><document>Tool B</document></documents>",
+        },
+      ],
+    };
+
+    const all = extractAllOpenai(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.toolExtractions).toHaveLength(1);
+    expect(all.totalDocuments).toBe(4);
+  });
+
+  it("extractSingleDocsFromOpenaiToolResults extracts single long docs", () => {
+    const body = {
+      messages: [
+        {
+          role: "tool",
+          tool_call_id: "call_99",
+          content: `Result:\n${"r".repeat(240)}`,
+        },
+      ],
+    };
+
+    const extracted = extractSingleDocsFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extracted).toHaveLength(1);
+    expect(extracted[0]?.[0].toolCallId).toBe("call_99");
+    expect(extracted[0]?.[0].content.length).toBeGreaterThanOrEqual(200);
+    expect(extracted[0]?.[0].contentHash).toMatch(/^[0-9a-f]{64}$/);
+  });
+
+  it("reconstructOpenaiToolResult reconstructs a tool result in-place", () => {
+    const body = {
+      messages: [
+        {
+          role: "tool",
+          content:
+            "<documents><document>A</document><document>B</document></documents>",
+        },
+      ],
+    };
+
+    const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    const first = extractions[0];
+    if (!first) {
+      throw new Error("expected extraction");
+    }
+
+    reconstructOpenaiToolResult(body, first[0], ["B", "A"], first[1]);
+    expect(body.messages[0]?.content).toContain("<document>B</document>");
+    expect(body.messages[0]?.content).toContain("<document>A</document>");
+  });
+
+  it("reconstructAnthropicToolResult reconstructs a tool result in-place", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              content:
+                "<documents><document>A</document><document>B</document></documents>",
+            },
+          ],
+        },
+      ],
+    };
+
+    const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    const first = extractions[0];
+    if (!first) {
+      throw new Error("expected extraction");
+    }
+
+    reconstructAnthropicToolResult(body, first[0], ["B", "A"], first[1]);
+    expect(body.messages[0]?.content[0]?.content).toContain("<document>B</document>");
+    expect(body.messages[0]?.content[0]?.content).toContain("<document>A</document>");
+  });
+});
+
+describe("dedup", () => {
+  it("contentDefinedChunking splits text into multiple blocks at boundaries", () => {
+    const text = Array.from({ length: 12 }, (_, i) => `line-${i}`).join("\n");
+    const blocks = contentDefinedChunking(text, 1);
+    expect(blocks).toHaveLength(2);
+    expect(blocks[0]?.split("\n")).toHaveLength(5);
+    expect(blocks[1]?.split("\n")).toHaveLength(7);
+  });
+
+  it("contentDefinedChunking returns one block for short text", () => {
+    const short = "a\nb\nc\nd\ne";
+    const blocks = contentDefinedChunking(short);
+    expect(blocks).toEqual([short]);
+  });
+
+  it("hashBlock is consistent and returns 20-char hex", () => {
+    const h1 = hashBlock("  abc\n");
+    const h2 = hashBlock("abc");
+    expect(h1).toBe(h2);
+    expect(h1).toMatch(/^[0-9a-f]{20}$/);
+  });
+
+  it("dedupChatCompletions returns zero savings with no duplicates", () => {
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [
+            { id: "a", function: { name: "read_file" } },
+            { id: "b", function: { name: "read_file" } },
+          ],
+        },
+        { role: "tool", tool_call_id: "a", content: makeLargeContent("first") },
+        { role: "tool", tool_call_id: "b", content: makeLargeContent("second") },
+      ],
+    };
+
+    const before = body.messages[2]?.content;
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBe(0);
+    expect(result.charsSaved).toBe(0);
+    expect(body.messages[2]?.content).toBe(before);
+  });
+
+  it("dedupChatCompletions dedups duplicate blocks and inserts references", () => {
+    const body = structuredClone(DEDUP_BODY);
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.charsSaved).toBeGreaterThan(0);
+    expect(body.messages[2]?.content).toContain(
+      "identical to earlier read_file result",
+    );
+  });
+
+  it("dedupChatCompletions skips short content", () => {
+    const short = "s".repeat(300);
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [
+            { id: "a", function: { name: "search" } },
+            { id: "b", function: { name: "search" } },
+          ],
+        },
+        { role: "tool", tool_call_id: "a", content: short },
+        { role: "tool", tool_call_id: "b", content: short },
+      ],
+    };
+
+    const result = dedupChatCompletions(body);
+    expect(result.blocksTotal).toBe(0);
+    expect(result.blocksDeduped).toBe(0);
+    expect(result.charsSaved).toBe(0);
+    expect(body.messages[2]?.content).toBe(short);
+  });
+
+  it("dedupResponsesApi dedups duplicate function_call_output content", () => {
+    const body = {
+      input: [
+        { type: "function_call", call_id: "r1", name: "search" },
+        { type: "function_call", call_id: "r2", name: "search" },
+        { type: "function_call_output", call_id: "r1", output: LARGE_CONTENT },
+        { type: "function_call_output", call_id: "r2", output: LARGE_CONTENT },
+      ],
+    };
+
+    const result = dedupResponsesApi(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.charsSaved).toBeGreaterThan(0);
+    expect(body.input[3]?.output).toContain("identical to earlier search result");
+  });
+
+  it("buildToolNameMapOpenai maps tool_call_id to function name", () => {
+    const mapping = buildToolNameMapOpenai([
+      {
+        role: "assistant",
+        tool_calls: [
+          { id: "id_1", function: { name: "read_file" } },
+          { id: "id_2", function: { name: "search" } },
+        ],
+      },
+      { role: "user" },
+    ]);
+
+    expect(mapping).toEqual({ id_1: "read_file", id_2: "search" });
+  });
+});
+
+describe("cache-control", () => {
+  it("injectAnthropicCacheControl converts string system into array with cache_control", () => {
+    const body: Record<string, unknown> = { system: "system text", messages: [] };
+    const result = injectAnthropicCacheControl(body);
+
+    const system = result.system as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+    expect(Array.isArray(system)).toBe(true);
+    expect(system[0]).toEqual({
+      type: "text",
+      text: "system text",
+      cache_control: { type: "ephemeral" },
+    });
+  });
+
+  it("injectAnthropicCacheControl adds cache_control to last system block", () => {
+    const body: Record<string, unknown> = {
+      system: [
+        { type: "text", text: "first" },
+        { type: "text", text: "last" },
+      ],
+      messages: [],
+    };
+    const result = injectAnthropicCacheControl(body);
+    const system = result.system as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+
+    expect(system[0]?.cache_control).toBeUndefined();
+    expect(system[1]?.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("injectAnthropicCacheControl adds cache_control to large tool_result blocks", () => {
+    const body: Record<string, unknown> = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", content: "x".repeat(1200) },
+            {
+              type: "tool_result",
+              content: [
+                { type: "text", text: "a".repeat(800) },
+                { type: "text", text: "b".repeat(300) },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = injectAnthropicCacheControl(body);
+    const messages = result.messages as Array<{
+      role?: string;
+      content?: Array<{
+        type?: string;
+        content?: string | Array<{ type?: string; text?: string; cache_control?: { type: string } }>;
+        cache_control?: { type: string };
+      }>;
+    }>;
+
+    const firstToolResult = messages[0]?.content?.[0];
+    const secondToolResult = messages[0]?.content?.[1];
+    const secondInner = secondToolResult?.content as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+
+    expect(firstToolResult?.cache_control).toEqual({ type: "ephemeral" });
+    expect(secondInner[0]?.cache_control).toBeUndefined();
+    expect(secondInner[1]?.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("injectAnthropicCacheControl does not mutate original body", () => {
+    const body: Record<string, unknown> = {
+      system: "immutable",
+      messages: [{ role: "user", content: [] }],
+    };
+    const snapshot = structuredClone(body);
+    const result = injectAnthropicCacheControl(body);
+
+    expect(body).toEqual(snapshot);
+    expect(result).not.toBe(body);
+  });
+
+  it("injectOpenAICacheControl is a no-op", () => {
+    const body: Record<string, unknown> = {
+      messages: [{ role: "system", content: "keep" }],
+    };
+    const result = injectOpenAICacheControl(body);
+    expect(result).toBe(body);
+  });
+
+  it("injectCacheControl dispatches by provider", () => {
+    const anthropicBody: Record<string, unknown> = { system: "hello", messages: [] };
+    const openaiBody: Record<string, unknown> = { messages: [] };
+
+    const anthropicResult = injectCacheControl(anthropicBody, "anthropic");
+    const openaiResult = injectCacheControl(openaiBody, "openai");
+
+    expect(anthropicResult).not.toBe(anthropicBody);
+    expect(Array.isArray(anthropicResult.system)).toBe(true);
+    expect(openaiResult).toBe(openaiBody);
+  });
+});
+
+describe("reorder", () => {
+  it("ReorderState first call matches deterministic hash sort", () => {
+    const docs = ["Doc C", "Doc A", "Doc B"];
+    const state = new ReorderState();
+    const [stateOrder] = state.reorder(docs);
+    const [statelessOrder] = reorderDocuments(docs);
+    expect(stateOrder).toEqual(statelessOrder);
+  });
+
+  it("ReorderState second call keeps known order and appends new docs", () => {
+    const state = new ReorderState();
+    const [first] = state.reorder(["alpha", "beta", "gamma"]);
+    const [second] = state.reorder(["gamma", "alpha", "delta"]);
+
+    const knownOrder = first.filter((doc) => doc === "gamma" || doc === "alpha");
+    expect(second.slice(0, knownOrder.length)).toEqual(knownOrder);
+    expect(second[second.length - 1]).toBe("delta");
+  });
+
+  it("ReorderState reset restores first-call behavior", () => {
+    const docs = ["alpha", "beta", "gamma"];
+    const state = new ReorderState();
+
+    state.reorder(docs);
+    state.reorder(["gamma", "alpha", "delta"]);
+    state.reset();
+
+    const [afterReset] = state.reorder(docs);
+    const [expected] = reorderDocuments(docs);
+    expect(afterReset).toEqual(expected);
+  });
+
+  it("reorderDocuments is deterministic and stateless", () => {
+    const docs = ["one", "two", "three", "four"];
+    const first = reorderDocuments(docs);
+    const second = reorderDocuments(docs);
+    expect(first).toEqual(second);
+  });
+
+  it("reorderDocuments returns correct originalOrder and newOrder mappings", () => {
+    const docs = ["one", "two", "three", "four"];
+    const [reordered, originalOrder, newOrder] = reorderDocuments(docs);
+
+    expect(originalOrder).toHaveLength(docs.length);
+    expect(newOrder).toHaveLength(docs.length);
+
+    for (let newIndex = 0; newIndex < reordered.length; newIndex += 1) {
+      const originalIndex = originalOrder[newIndex];
+      expect(reordered[newIndex]).toBe(docs[originalIndex]);
+    }
+
+    for (let originalIndex = 0; originalIndex < docs.length; originalIndex += 1) {
+      const mappedNewIndex = newOrder[originalIndex];
+      expect(reordered[mappedNewIndex]).toBe(docs[originalIndex]);
+    }
+  });
+
+  it("ReorderState preserves known-doc prefix stability across calls", () => {
+    const state = new ReorderState();
+    const knownDocs = ["alpha", "beta", "gamma"];
+
+    const [first] = state.reorder(knownDocs);
+    const [second] = state.reorder(["gamma", "beta", "alpha", "delta"]);
+    const [third] = state.reorder(["alpha", "epsilon", "gamma", "beta", "zeta"]);
+
+    const knownPrefix = first.filter((doc) =>
+      knownDocs.includes(doc),
+    );
+
+    expect(second.slice(0, knownPrefix.length)).toEqual(knownPrefix);
+    expect(third.slice(0, knownPrefix.length)).toEqual(knownPrefix);
+  });
+});

From 08e5d3badad7393222c16283067865186de79324 Mon Sep 17 00:00:00 2001
From: SecretSettler <sean.jiang01@outlook.com>
Date: Wed, 25 Mar 2026 15:26:10 +0000
Subject: [PATCH 3/8] test: add E2E integration tests for full optimization
 pipeline (16 tests)

Tests simulate the exact wrapStreamFn pipeline end-to-end:
- Anthropic: system XML extraction+reorder+cache_control, tool_result
  cache injection, scope=system/tool_results filtering
- OpenAI: system reorder, duplicate tool result dedup with reference
  hints, responses API dedup, passthrough for plain messages
- Multi-turn: ReorderState preserves doc order across turns,
  reset clears history
- Edge cases: empty body, no messages, single doc (no reorder),
  short content (no dedup), null messages, system as block array

Total test suite: 54 tests (38 unit + 16 integration), all pass in 165ms.
---
 .../src/engine/integration.test.ts            | 362 ++++++++++++++++++
 1 file changed, 362 insertions(+)
 create mode 100644 openclaw-plugin/src/engine/integration.test.ts

diff --git a/openclaw-plugin/src/engine/integration.test.ts b/openclaw-plugin/src/engine/integration.test.ts
new file mode 100644
index 0000000..74328a6
--- /dev/null
+++ b/openclaw-plugin/src/engine/integration.test.ts
@@ -0,0 +1,362 @@
+import { describe, it, expect } from "vitest";
+import { getFormatHandler, type InterceptConfig } from "./extract.js";
+import { dedupChatCompletions, dedupResponsesApi } from "./dedup.js";
+import { injectCacheControl } from "./cache-control.js";
+import { ReorderState } from "./reorder.js";
+
+function runPipeline(
+  body: Record<string, unknown>,
+  opts: {
+    provider?: "anthropic" | "openai";
+    scope?: string;
+    reorderState?: ReorderState;
+  } = {}
+): Record<string, unknown> {
+  const provider = opts.provider ?? "anthropic";
+  const scope = opts.scope ?? "all";
+  const reorderState = opts.reorderState ?? new ReorderState();
+
+  const clonedBody = structuredClone(body);
+  const apiFormat = provider === "anthropic" ? "anthropic_messages" : "openai_chat";
+
+  const interceptConfig: InterceptConfig = {
+    enabled: true,
+    mode: "auto",
+    tag: "document",
+    separator: "---",
+    alpha: 0.001,
+    linkageMethod: "average",
+    scope,
+  };
+
+  const handler = getFormatHandler(apiFormat);
+  const multi = handler.extractAll(clonedBody, interceptConfig);
+
+  if (multi.systemExtraction) {
+    const [extraction, sysIdx] = multi.systemExtraction;
+    if (extraction.documents.length >= 2) {
+      const [reordered] = reorderState.reorder(extraction.documents);
+      handler.reconstructSystem(clonedBody, extraction, reordered, sysIdx);
+    }
+  }
+
+  for (const [extraction, location] of multi.toolExtractions) {
+    if (extraction.documents.length >= 2) {
+      const [reordered] = reorderState.reorder(extraction.documents);
+      handler.reconstructToolResult(clonedBody, extraction, reordered, location);
+    }
+  }
+
+  if (apiFormat === "openai_chat") {
+    dedupChatCompletions(clonedBody as any);
+  }
+  if (clonedBody.input && Array.isArray(clonedBody.input)) {
+    dedupResponsesApi(clonedBody as any);
+  }
+
+  return injectCacheControl(clonedBody, provider);
+}
+
+describe("full pipeline — Anthropic", () => {
+  it("system prompt with XML documents gets reordered and cache-controlled", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are a helpful assistant.\n<documents>\n<document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document>\n<document index="2">\nSecond document about Python.\nAlso multi-line.\n</document>\n<document index="3">\nThird document about Rust.\nYet another multi-line doc.\n</document>\n</documents>\nPlease answer based on the above.`,
+      messages: [{ role: "user", content: "Summarize the documents." }],
+    };
+
+    const reorderState = new ReorderState();
+    const result = runPipeline(body, { provider: "anthropic", reorderState });
+
+    expect(Array.isArray(result.system)).toBe(true);
+    const systemArray = result.system as any[];
+    
+    const lastBlock = systemArray[systemArray.length - 1];
+    expect(lastBlock.cache_control).toEqual({ type: "ephemeral" });
+
+    const textContent = systemArray.map(b => b.text).join("");
+    expect(textContent).toContain("You are a helpful assistant.");
+    expect(textContent).toContain("Please answer based on the above.");
+    
+    expect(textContent).toContain("First document about TypeScript.");
+    expect(textContent).toContain("Second document about Python.");
+    expect(textContent).toContain("Third document about Rust.");
+  });
+
+  it("Anthropic tool_result with large content gets cache_control", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: "You are helpful.",
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: "A".repeat(2000) },
+          ],
+        },
+      ],
+    };
+
+    const result = runPipeline(body, { provider: "anthropic" });
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    expect(content[0].cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("Anthropic scope=\"system\" only processes system, not tool results", () => {
+    const docText = `<documents><document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document><document index="2">\nSecond document about Python.\nAlso multi-line.\n</document></documents>`;
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are helpful.\n${docText}`,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: docText },
+          ],
+        },
+      ],
+    };
+
+    const reorderState = new ReorderState();
+    // Reorder stability means it will process it
+    const result = runPipeline(body, { provider: "anthropic", scope: "system", reorderState });
+    
+    // System should have its format modified to array due to reconstruction/cache control
+    expect(Array.isArray(result.system)).toBe(true);
+
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    // Tool result shouldn't have been reconstructed into blocks of its internal documents
+    expect(content[0].content).toBe(docText);
+  });
+
+  it("Anthropic scope=\"tool_results\" only processes tools, not system", () => {
+    const docText = `<documents><document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document><document index="2">\nSecond document about Python.\nAlso multi-line.\n</document></documents>`;
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are helpful.\n${docText}`,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: docText },
+          ],
+        },
+      ],
+    };
+
+    const reorderState = new ReorderState();
+    const result = runPipeline(body, { provider: "anthropic", scope: "tool_results", reorderState });
+    
+    // System should not be processed for documents (though it may be arrayified for cache control)
+    // Cache control injects string to array conversion for Anthropic system if needed
+    if (Array.isArray(result.system)) {
+        const textContent = (result.system as any[]).map(b => b.text).join("");
+        expect(textContent).toBe(`You are helpful.\n${docText}`);
+    } else {
+        expect(result.system).toBe(`You are helpful.\n${docText}`);
+    }
+
+    // Tool results should be reconstructed/reordered
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    expect(typeof content[0].content).toBe("string");
+    expect(content[0].content).toContain("First document about TypeScript.");
+    expect(content[0].content).toContain("Second document about Python.");
+  });
+});
+
+describe("full pipeline — OpenAI", () => {
+  it("OpenAI chat system message with XML documents gets reordered", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document></documents>" },
+        { role: "user", content: "Hello" }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    const sysMsg = msgs[0].content;
+    expect(sysMsg).toContain("Doc A content");
+    expect(sysMsg).toContain("Doc B content");
+    expect(sysMsg).toContain("Doc C content");
+  });
+
+  it("OpenAI chat with duplicate tool results gets deduped", () => {
+    const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n");
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "assistant", content: null, tool_calls: [
+          { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } },
+          { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } }
+        ]},
+        { role: "tool", tool_call_id: "call_1", content: sharedContent },
+        { role: "tool", tool_call_id: "call_2", content: sharedContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    
+    expect(msgs[1].content).toBe(sharedContent);
+    expect(msgs[2].content).not.toBe(sharedContent);
+    expect(msgs[2].content).toContain("identical to earlier read_file result");
+  });
+
+  it("OpenAI body with no extractable docs passes through unchanged", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "You are helpful." },
+        { role: "user", content: "Hi" }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("OpenAI responses API format gets deduped", () => {
+    const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n");
+    const body = {
+      input: [
+        { type: "function_call_output", call_id: "c1", output: sharedContent },
+        { type: "function_call_output", call_id: "c2", output: sharedContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const input = result.input as any[];
+    
+    expect(input[0].output).toBe(sharedContent);
+    expect(input[1].output).not.toBe(sharedContent);
+    expect(input[1].output).toContain("identical");
+  });
+});
+
+describe("multi-turn state — reorder stability", () => {
+  it("reorder state preserves doc order across turns", () => {
+    const reorderState = new ReorderState();
+    
+    const bodyTurn1 = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document></documents>" }
+      ]
+    };
+    
+    runPipeline(bodyTurn1, { provider: "openai", reorderState });
+    
+    const bodyTurn2 = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document><document>Doc D content</document></documents>" }
+      ]
+    };
+
+    const res2 = runPipeline(bodyTurn2, { provider: "openai", reorderState });
+    const sysMsg2 = (res2.messages as any[])[0].content;
+    
+    // In multi-turn, ReorderState should put the new item (D) at top, and preserve relative ordering of A, B, C.
+    // We just verify all are present and stable.
+    expect(sysMsg2).toContain("Doc A content");
+    expect(sysMsg2).toContain("Doc B content");
+    expect(sysMsg2).toContain("Doc C content");
+    expect(sysMsg2).toContain("Doc D content");
+  });
+
+  it("reorder state reset clears history", () => {
+    const reorderState = new ReorderState();
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document></documents>" }
+      ]
+    };
+    
+    runPipeline(body, { provider: "openai", reorderState });
+    
+    reorderState.reset();
+    
+    const res2 = runPipeline(body, { provider: "openai", reorderState });
+    const sysMsg2 = (res2.messages as any[])[0].content;
+    
+    expect(sysMsg2).toContain("Doc A content");
+    expect(sysMsg2).toContain("Doc B content");
+  });
+});
+
+describe("edge cases", () => {
+  it("empty body passes through", () => {
+    const result = runPipeline({}, { provider: "anthropic" });
+    expect(result).toEqual({});
+  });
+
+  it("body with no messages passes through", () => {
+    const body = { model: "gpt-4o" };
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("body with single document doesn't get reordered", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Only Doc</document></documents>" }
+      ]
+    };
+    const result = runPipeline(body, { provider: "openai" });
+    // It should be unchanged
+    expect(result).toEqual(body);
+  });
+
+  it("very short tool result content not deduped", () => {
+    const shortContent = "Too short for dedup.";
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "assistant", content: null, tool_calls: [
+          { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } },
+          { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } }
+        ]},
+        { role: "tool", tool_call_id: "call_1", content: shortContent },
+        { role: "tool", tool_call_id: "call_2", content: shortContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    expect(msgs[1].content).toBe(shortContent);
+    expect(msgs[2].content).toBe(shortContent);
+  });
+
+  it("null/undefined messages gracefully handled", () => {
+    const body = { model: "gpt-4o", messages: null };
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("Anthropic body with system as content block array", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: [
+        { type: "text", text: "<documents><document>A</document><document>B</document></documents>" }
+      ],
+      messages: [{ role: "user", content: "hi" }]
+    };
+
+    const result = runPipeline(body, { provider: "anthropic" });
+    const sys = result.system as any[];
+    expect(Array.isArray(sys)).toBe(true);
+    // Last block should have cache_control
+    expect(sys[sys.length - 1].cache_control).toEqual({ type: "ephemeral" });
+    
+    const fullText = sys.map(b => b.text).join("");
+    expect(fullText).toContain("A");
+    expect(fullText).toContain("B");
+  });
+});

From 64e9d9053edd2b0e370444b91797e3334b69faa7 Mon Sep 17 00:00:00 2001
From: SecretSettler <sean.jiang01@outlook.com>
Date: Thu, 26 Mar 2026 13:37:19 +0000
Subject: [PATCH 4/8] feat: complete ContextPilot engine port to TypeScript
 (6145 lines)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Full port of all Python ContextPilot modules to zero-dependency TypeScript:

Core index (replaces numpy+scipy):
- tree-nodes.ts: ClusterNode + NodeManager (334 lines)
- compute-distance.ts: O(n) merge-based distance computation (224 lines)
- index-construction.ts: hierarchical clustering with pure-TS linkage() (348 lines)
- intra-ordering.ts: within-context reordering via tree prefix (349 lines)
- inter-scheduler.ts: cross-context scheduling via path grouping (116 lines)

Live engine:
- live-index.ts: ContextPilot class — search/insert/evict/reorder (1232 lines)
- metadata.ts: NodeMetadata for per-node runtime tracking (82 lines)
- eviction-heap.ts: LRU min-heap for SGLang cache eviction sync (317 lines)
- conversation-tracker.ts: multi-turn document deduplication (241 lines)
- http-client.ts: native fetch() client for index server comms (267 lines)

Previously ported:
- extract.ts: document extraction from system/tool_results (969 lines)
- dedup.ts: cross-turn block-level content dedup (355 lines)
- cache-control.ts: Anthropic cache_control injection (144 lines)
- reorder.ts: simplified LCP reorder fallback (109 lines)
---
 .../src/engine/compute-distance.ts            |  224 +++
 .../src/engine/conversation-tracker.ts        |  241 ++++
 openclaw-plugin/src/engine/eviction-heap.ts   |  317 +++++
 openclaw-plugin/src/engine/http-client.ts     |  267 ++++
 .../src/engine/index-construction.ts          |  348 +++++
 openclaw-plugin/src/engine/inter-scheduler.ts |  116 ++
 openclaw-plugin/src/engine/intra-ordering.ts  |  349 +++++
 openclaw-plugin/src/engine/live-index.ts      | 1232 +++++++++++++++++
 openclaw-plugin/src/engine/metadata.ts        |   82 ++
 openclaw-plugin/src/engine/tree-nodes.ts      |  334 +++++
 10 files changed, 3510 insertions(+)
 create mode 100644 openclaw-plugin/src/engine/compute-distance.ts
 create mode 100644 openclaw-plugin/src/engine/conversation-tracker.ts
 create mode 100644 openclaw-plugin/src/engine/eviction-heap.ts
 create mode 100644 openclaw-plugin/src/engine/http-client.ts
 create mode 100644 openclaw-plugin/src/engine/index-construction.ts
 create mode 100644 openclaw-plugin/src/engine/inter-scheduler.ts
 create mode 100644 openclaw-plugin/src/engine/intra-ordering.ts
 create mode 100644 openclaw-plugin/src/engine/live-index.ts
 create mode 100644 openclaw-plugin/src/engine/metadata.ts
 create mode 100644 openclaw-plugin/src/engine/tree-nodes.ts

diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts
new file mode 100644
index 0000000..8aad2d5
--- /dev/null
+++ b/openclaw-plugin/src/engine/compute-distance.ts
@@ -0,0 +1,224 @@
+export interface PreparedContextsCpu {
+    chunkIds: number[];
+    originalPositions: number[];
+    lengths: number[];
+    offsets: number[];
+}
+
+export function computeDistanceSingle(
+    contextA: number[],
+    contextB: number[],
+    alpha: number = 0.001
+): number {
+    if (contextA.length === 0 || contextB.length === 0) {
+        return 1.0;
+    }
+
+    const posA = new Map<number, number>();
+    const posB = new Map<number, number>();
+
+    for (let pos = 0; pos < contextA.length; pos += 1) {
+        posA.set(contextA[pos], pos);
+    }
+    for (let pos = 0; pos < contextB.length; pos += 1) {
+        posB.set(contextB[pos], pos);
+    }
+
+    let intersectionSize = 0;
+    let positionDiffSum = 0;
+
+    for (const [docId, aPos] of posA) {
+        const bPos = posB.get(docId);
+        if (bPos === undefined) {
+            continue;
+        }
+
+        intersectionSize += 1;
+        positionDiffSum += Math.abs(aPos - bPos);
+    }
+
+    if (intersectionSize === 0) {
+        return 1.0;
+    }
+
+    const maxSize = Math.max(contextA.length, contextB.length);
+    const overlapTerm = 1.0 - intersectionSize / maxSize;
+    const positionTerm = alpha * (positionDiffSum / intersectionSize);
+
+    return overlapTerm + positionTerm;
+}
+
+export function computeDistancesBatch(
+    queries: number[][],
+    targets: number[][],
+    alpha: number = 0.001
+): number[][] {
+    const nQueries = queries.length;
+    const nTargets = targets.length;
+
+    if (nQueries === 0 || nTargets === 0) {
+        return Array.from({ length: nQueries }, () => new Array<number>(nTargets).fill(0));
+    }
+
+    const totalPairs = nQueries * nTargets;
+    const distances: number[][] = Array.from(
+        { length: nQueries },
+        () => new Array<number>(nTargets).fill(1.0)
+    );
+
+    if (totalPairs < 1000) {
+        for (let i = 0; i < nQueries; i += 1) {
+            for (let j = 0; j < nTargets; j += 1) {
+                distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha);
+            }
+        }
+        return distances;
+    }
+
+    for (let i = 0; i < nQueries; i += 1) {
+        for (let j = 0; j < nTargets; j += 1) {
+            distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha);
+        }
+    }
+
+    return distances;
+}
+
+export function prepareContextsForCpu(contexts: number[][]): PreparedContextsCpu {
+    const n = contexts.length;
+    const sortedData: Array<Array<[number, number]>> = new Array(n);
+    const lengths: number[] = new Array(n).fill(0);
+
+    for (let idx = 0; idx < n; idx += 1) {
+        const ctx = contexts[idx];
+        if (ctx.length === 0) {
+            sortedData[idx] = [];
+            lengths[idx] = 0;
+            continue;
+        }
+
+        const pairs: Array<[number, number]> = new Array(ctx.length);
+        for (let origPos = 0; origPos < ctx.length; origPos += 1) {
+            pairs[origPos] = [ctx[origPos], origPos];
+        }
+        pairs.sort((a, b) => a[0] - b[0]);
+
+        sortedData[idx] = pairs;
+        lengths[idx] = pairs.length;
+    }
+
+    const offsets: number[] = new Array(n + 1).fill(0);
+    for (let i = 0; i < n; i += 1) {
+        offsets[i + 1] = offsets[i] + lengths[i];
+    }
+
+    const totalElements = offsets[n];
+    const chunkIds: number[] = new Array(totalElements).fill(0);
+    const originalPositions: number[] = new Array(totalElements).fill(0);
+
+    for (let i = 0; i < n; i += 1) {
+        const pairs = sortedData[i];
+        const start = offsets[i];
+        for (let j = 0; j < pairs.length; j += 1) {
+            const [chunkId, origPos] = pairs[j];
+            chunkIds[start + j] = chunkId;
+            originalPositions[start + j] = origPos;
+        }
+    }
+
+    return {
+        chunkIds,
+        originalPositions,
+        lengths,
+        offsets
+    };
+}
+
+export function computeDistanceOptimized(
+    chunkIds: number[],
+    originalPositions: number[],
+    lengths: number[],
+    offsets: number[],
+    i: number,
+    j: number,
+    alpha: number
+): number {
+    const lenI = lengths[i];
+    const lenJ = lengths[j];
+
+    if (lenI === 0 || lenJ === 0) {
+        return 1.0;
+    }
+
+    const offsetI = offsets[i];
+    const offsetJ = offsets[j];
+    const endI = offsetI + lenI;
+    const endJ = offsetJ + lenJ;
+
+    let intersectionSize = 0;
+    let positionDiffSum = 0;
+
+    let pi = offsetI;
+    let pj = offsetJ;
+
+    while (pi < endI && pj < endJ) {
+        const chunkI = chunkIds[pi];
+        const chunkJ = chunkIds[pj];
+
+        if (chunkI === chunkJ) {
+            intersectionSize += 1;
+            positionDiffSum += Math.abs(originalPositions[pi] - originalPositions[pj]);
+            pi += 1;
+            pj += 1;
+        } else if (chunkI < chunkJ) {
+            pi += 1;
+        } else {
+            pj += 1;
+        }
+    }
+
+    const maxSize = Math.max(lenI, lenJ);
+    const overlapTerm = 1.0 - intersectionSize / maxSize;
+
+    let positionTerm = 0.0;
+    if (intersectionSize !== 0) {
+        const avgPosDiff = positionDiffSum / intersectionSize;
+        positionTerm = alpha * avgPosDiff;
+    }
+
+    return overlapTerm + positionTerm;
+}
+
+export function computeDistanceMatrixCpu(
+    contexts: number[][],
+    alpha: number = 0.001
+): Float64Array {
+    const n = contexts.length;
+    const numPairs = (n * (n - 1)) / 2;
+
+    if (numPairs === 0) {
+        return new Float64Array(0);
+    }
+
+    const { chunkIds, originalPositions, lengths, offsets } = prepareContextsForCpu(contexts);
+    const condensedDistances = new Float64Array(numPairs);
+
+    for (let i = 0; i < n; i += 1) {
+        for (let j = i + 1; j < n; j += 1) {
+            const dist = computeDistanceOptimized(
+                chunkIds,
+                originalPositions,
+                lengths,
+                offsets,
+                i,
+                j,
+                alpha
+            );
+
+            const condensedIdx = n * i - (i * (i + 1)) / 2 + j - i - 1;
+            condensedDistances[condensedIdx] = dist;
+        }
+    }
+
+    return condensedDistances;
+}
diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts
new file mode 100644
index 0000000..845ee68
--- /dev/null
+++ b/openclaw-plugin/src/engine/conversation-tracker.ts
@@ -0,0 +1,241 @@
+export interface DeduplicationResult {
+    originalDocs: number[];
+    overlappingDocs: number[];
+    newDocs: number[];
+    referenceHints: string[];
+    deduplicatedDocs: number[];
+    docSourceTurns: Map<number, string>;
+    isNewConversation: boolean;
+}
+
+export interface RequestHistory {
+    requestId: string;
+    docs: number[];
+    parentRequestId: string | null;
+    turnNumber: number;
+    timestamp: number;
+}
+
+export interface ConversationTrackerStats {
+    totalRequests: number;
+    totalDedupCalls: number;
+    totalDocsDeduplicated: number;
+    activeRequests: number;
+}
+
+export class ConversationTracker {
+    private _requests: Map<string, RequestHistory>;
+    private _hintTemplate: string;
+    private _stats: {
+        totalRequests: number;
+        totalDedupCalls: number;
+        totalDocsDeduplicated: number;
+    };
+
+    constructor(hintTemplate?: string) {
+        this._requests = new Map<string, RequestHistory>();
+        this._hintTemplate =
+            hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn.";
+        this._stats = {
+            totalRequests: 0,
+            totalDedupCalls: 0,
+            totalDocsDeduplicated: 0
+        };
+    }
+
+    registerRequest(requestId: string, docs: number[], parentRequestId?: string | null): RequestHistory {
+        let turnNumber = 1;
+        if (parentRequestId && this._requests.has(parentRequestId)) {
+            const parent = this._requests.get(parentRequestId);
+            if (parent) {
+                turnNumber = parent.turnNumber + 1;
+            }
+        }
+
+        const history: RequestHistory = {
+            requestId,
+            docs: [...docs],
+            parentRequestId: parentRequestId ?? null,
+            turnNumber,
+            timestamp: Date.now() / 1000
+        };
+
+        this._requests.set(requestId, history);
+        this._stats.totalRequests += 1;
+
+        return history;
+    }
+
+    getConversationChain(requestId: string): RequestHistory[] {
+        const chain: RequestHistory[] = [];
+        let currentId: string | null = requestId;
+
+        while (currentId && this._requests.has(currentId)) {
+            const history = this._requests.get(currentId);
+            if (!history) {
+                break;
+            }
+
+            chain.push(history);
+            currentId = history.parentRequestId;
+        }
+
+        chain.reverse();
+        return chain;
+    }
+
+    getAllPreviousDocs(parentRequestId: string): [Set<number>, Map<number, string>] {
+        const allDocs = new Set<number>();
+        const docSources = new Map<number, string>();
+
+        const chain = this.getConversationChain(parentRequestId);
+
+        for (const history of chain) {
+            for (const docId of history.docs) {
+                if (!allDocs.has(docId)) {
+                    allDocs.add(docId);
+                    docSources.set(docId, history.requestId);
+                }
+            }
+        }
+
+        return [allDocs, docSources];
+    }
+
+    deduplicate(
+        requestId: string,
+        docs: number[],
+        parentRequestId?: string | null,
+        hintTemplate?: string
+    ): DeduplicationResult {
+        this._stats.totalDedupCalls += 1;
+
+        if (!parentRequestId || !this._requests.has(parentRequestId)) {
+            this.registerRequest(requestId, docs, null);
+
+            return {
+                originalDocs: docs,
+                overlappingDocs: [],
+                newDocs: docs,
+                referenceHints: [],
+                deduplicatedDocs: docs,
+                docSourceTurns: new Map<number, string>(),
+                isNewConversation: true
+            };
+        }
+
+        const [previousDocs, docSources] = this.getAllPreviousDocs(parentRequestId);
+
+        const overlappingDocs: number[] = [];
+        const newDocs: number[] = [];
+        const docSourceTurns = new Map<number, string>();
+
+        for (const docId of docs) {
+            if (previousDocs.has(docId)) {
+                overlappingDocs.push(docId);
+                const sourceRequestId = docSources.get(docId);
+                if (sourceRequestId !== undefined) {
+                    docSourceTurns.set(docId, sourceRequestId);
+                }
+            } else {
+                newDocs.push(docId);
+            }
+        }
+
+        const template = hintTemplate ?? this._hintTemplate;
+        const referenceHints: string[] = [];
+
+        for (const docId of overlappingDocs) {
+            const sourceRequest = docSources.get(docId);
+            const sourceHistory = sourceRequest ? this._requests.get(sourceRequest) : undefined;
+            const turnNumber = sourceHistory ? String(sourceHistory.turnNumber) : "previous";
+
+            const hint = template
+                .replaceAll("{doc_id}", String(docId))
+                .replaceAll("{turn_number}", turnNumber)
+                .replaceAll("{source_request}", sourceRequest ?? "previous");
+
+            referenceHints.push(hint);
+        }
+
+        this.registerRequest(requestId, docs, parentRequestId);
+        this._stats.totalDocsDeduplicated += overlappingDocs.length;
+
+        return {
+            originalDocs: docs,
+            overlappingDocs,
+            newDocs,
+            referenceHints,
+            deduplicatedDocs: newDocs,
+            docSourceTurns,
+            isNewConversation: false
+        };
+    }
+
+    deduplicateBatch(
+        requestIds: string[],
+        docsList: number[][],
+        parentRequestIds?: Array<string | null | undefined>,
+        hintTemplate?: string
+    ): DeduplicationResult[] {
+        const effectiveParentRequestIds =
+            parentRequestIds ?? new Array<string | null | undefined>(requestIds.length).fill(null);
+
+        const results: DeduplicationResult[] = [];
+        const n = Math.min(requestIds.length, docsList.length, effectiveParentRequestIds.length);
+
+        for (let i = 0; i < n; i += 1) {
+            const result = this.deduplicate(
+                requestIds[i],
+                docsList[i],
+                effectiveParentRequestIds[i],
+                hintTemplate
+            );
+            results.push(result);
+        }
+
+        return results;
+    }
+
+    removeRequest(requestId: string): boolean {
+        if (this._requests.has(requestId)) {
+            this._requests.delete(requestId);
+            return true;
+        }
+
+        return false;
+    }
+
+    clearConversation(requestId: string): number {
+        const chain = this.getConversationChain(requestId);
+        let count = 0;
+
+        for (const history of chain) {
+            if (this.removeRequest(history.requestId)) {
+                count += 1;
+            }
+        }
+
+        return count;
+    }
+
+    reset(): void {
+        this._requests.clear();
+        this._stats = {
+            totalRequests: 0,
+            totalDedupCalls: 0,
+            totalDocsDeduplicated: 0
+        };
+    }
+
+    getStats(): ConversationTrackerStats {
+        return {
+            ...this._stats,
+            activeRequests: this._requests.size
+        };
+    }
+
+    getRequestHistory(requestId: string): RequestHistory | null {
+        return this._requests.get(requestId) ?? null;
+    }
+}
diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts
new file mode 100644
index 0000000..69de6ff
--- /dev/null
+++ b/openclaw-plugin/src/engine/eviction-heap.ts
@@ -0,0 +1,317 @@
+import type { NodeMetadata } from "./metadata.js";
+
+type HeapEntry = [number, number];
+
+export interface EvictionHeapStats {
+    size: number;
+    total_tokens: number;
+    max_tokens: number | null;
+    utilization_pct: number;
+    avg_tokens_per_node: number;
+    oldest_access_time: number | null;
+    newest_access_time: number | null;
+    num_requests: number;
+}
+
+export class EvictionHeap {
+    private _heap: HeapEntry[];
+    private _metadata: Map<number, NodeMetadata>;
+    private _requestToNode: Map<string, number>;
+    private _inHeap: Map<number, boolean>;
+    private _maxTokens: number | null;
+    private _totalTokens: number;
+
+    constructor(maxTokens?: number | null) {
+        this._heap = [];
+        this._metadata = new Map<number, NodeMetadata>();
+        this._requestToNode = new Map<string, number>();
+        this._inHeap = new Map<number, boolean>();
+        this._maxTokens = maxTokens ?? null;
+        this._totalTokens = 0;
+    }
+
+    get maxTokens(): number | null {
+        return this._maxTokens;
+    }
+
+    set maxTokens(value: number | null) {
+        this._maxTokens = value;
+    }
+
+    private _compare(a: HeapEntry, b: HeapEntry): number {
+        if (a[0] !== b[0]) {
+            return a[0] - b[0];
+        }
+        return a[1] - b[1];
+    }
+
+    private _swap(i: number, j: number): void {
+        const tmp = this._heap[i];
+        this._heap[i] = this._heap[j];
+        this._heap[j] = tmp;
+    }
+
+    private _siftUp(index: number): void {
+        let current = index;
+
+        while (current > 0) {
+            const parent = Math.floor((current - 1) / 2);
+            if (this._compare(this._heap[current], this._heap[parent]) >= 0) {
+                break;
+            }
+
+            this._swap(current, parent);
+            current = parent;
+        }
+    }
+
+    private _siftDown(index: number): void {
+        const n = this._heap.length;
+        let current = index;
+
+        while (true) {
+            const left = 2 * current + 1;
+            const right = 2 * current + 2;
+            let smallest = current;
+
+            if (left < n && this._compare(this._heap[left], this._heap[smallest]) < 0) {
+                smallest = left;
+            }
+
+            if (right < n && this._compare(this._heap[right], this._heap[smallest]) < 0) {
+                smallest = right;
+            }
+
+            if (smallest === current) {
+                break;
+            }
+
+            this._swap(current, smallest);
+            current = smallest;
+        }
+    }
+
+    private _heapPush(entry: HeapEntry): void {
+        this._heap.push(entry);
+        this._siftUp(this._heap.length - 1);
+    }
+
+    private _heapPop(): HeapEntry | null {
+        if (this._heap.length === 0) {
+            return null;
+        }
+
+        if (this._heap.length === 1) {
+            return this._heap.pop() ?? null;
+        }
+
+        const min = this._heap[0];
+        const last = this._heap.pop();
+        if (last !== undefined) {
+            this._heap[0] = last;
+            this._siftDown(0);
+        }
+        return min;
+    }
+
+    push(metadata: NodeMetadata): void {
+        const nodeId = metadata.nodeId;
+
+        if (this._inHeap.get(nodeId) === true) {
+            const oldMetadata = this._metadata.get(nodeId);
+            if (oldMetadata) {
+                this._totalTokens += metadata.extraTokens - oldMetadata.extraTokens;
+            }
+            this._metadata.set(nodeId, metadata);
+            this.updateAccessTime(nodeId, metadata.lastAccessTime);
+            return;
+        }
+
+        this._heapPush([metadata.lastAccessTime, nodeId]);
+        this._metadata.set(nodeId, metadata);
+        this._inHeap.set(nodeId, true);
+        this._totalTokens += metadata.extraTokens;
+
+        if (metadata.requestId) {
+            this._requestToNode.set(metadata.requestId, nodeId);
+        }
+    }
+
+    pop(): NodeMetadata | null {
+        while (this._heap.length > 0) {
+            const entry = this._heapPop();
+            if (entry === null) {
+                return null;
+            }
+
+            const [accessTime, nodeId] = entry;
+
+            if (!this._metadata.has(nodeId)) {
+                continue;
+            }
+
+            const metadata = this._metadata.get(nodeId);
+            if (!metadata) {
+                continue;
+            }
+
+            if (metadata.lastAccessTime === accessTime) {
+                this._inHeap.set(nodeId, false);
+                this._totalTokens -= metadata.extraTokens;
+                return metadata;
+            }
+        }
+
+        return null;
+    }
+
+    peek(): NodeMetadata | null {
+        while (this._heap.length > 0) {
+            const [accessTime, nodeId] = this._heap[0];
+
+            if (!this._metadata.has(nodeId)) {
+                this._heapPop();
+                continue;
+            }
+
+            const metadata = this._metadata.get(nodeId);
+            if (!metadata) {
+                this._heapPop();
+                continue;
+            }
+
+            if (metadata.lastAccessTime === accessTime) {
+                return metadata;
+            }
+
+            this._heapPop();
+        }
+
+        return null;
+    }
+
+    updateAccessTime(nodeId: number, newTime?: number): void {
+        const metadata = this._metadata.get(nodeId);
+        if (!metadata) {
+            return;
+        }
+
+        const effectiveTime = newTime ?? Date.now() / 1000;
+        metadata.lastAccessTime = effectiveTime;
+
+        this._heapPush([effectiveTime, nodeId]);
+    }
+
+    remove(nodeId: number): void {
+        const metadata = this._metadata.get(nodeId);
+
+        if (metadata) {
+            this._totalTokens -= metadata.extraTokens;
+
+            if (metadata.requestId) {
+                this._requestToNode.delete(metadata.requestId);
+            }
+
+            this._metadata.delete(nodeId);
+        }
+
+        if (this._inHeap.has(nodeId)) {
+            this._inHeap.set(nodeId, false);
+        }
+    }
+
+    getNodeByRequestId(requestId: string): NodeMetadata | null {
+        const nodeId = this._requestToNode.get(requestId);
+        if (nodeId !== undefined) {
+            return this._metadata.get(nodeId) ?? null;
+        }
+        return null;
+    }
+
+    updateTokensForRequest(requestId: string, inputTokens: number, outputTokens: number): boolean {
+        const metadata = this.getNodeByRequestId(requestId);
+        if (metadata === null) {
+            return false;
+        }
+
+        const oldTokens = metadata.totalTokens;
+        const totalNew = inputTokens + outputTokens;
+        const delta = totalNew - oldTokens;
+
+        metadata.totalTokens = totalNew;
+        metadata.extraTokens = Math.max(0, metadata.extraTokens + delta);
+        metadata.updateAccessTime();
+
+        this._totalTokens += delta;
+        this._heapPush([metadata.lastAccessTime, metadata.nodeId]);
+
+        return true;
+    }
+
+    needsEviction(): boolean {
+        if (this._maxTokens === null) {
+            return false;
+        }
+        return this._totalTokens > this._maxTokens;
+    }
+
+    tokensToEvict(): number {
+        if (this._maxTokens === null || this._totalTokens <= this._maxTokens) {
+            return 0;
+        }
+        return this._totalTokens - this._maxTokens;
+    }
+
+    getMetadata(nodeId: number): NodeMetadata | null {
+        return this._metadata.get(nodeId) ?? null;
+    }
+
+    isEmpty(): boolean {
+        return this.peek() === null;
+    }
+
+    size(): number {
+        return this._metadata.size;
+    }
+
+    totalTokens(): number {
+        return this._totalTokens;
+    }
+
+    getAllRequestIds(): Set<string> {
+        return new Set(this._requestToNode.keys());
+    }
+
+    getStats(): EvictionHeapStats {
+        if (this._metadata.size === 0) {
+            return {
+                size: 0,
+                total_tokens: 0,
+                max_tokens: this._maxTokens,
+                utilization_pct: 0,
+                avg_tokens_per_node: 0,
+                oldest_access_time: null,
+                newest_access_time: null,
+                num_requests: 0
+            };
+        }
+
+        const accessTimes = Array.from(this._metadata.values(), (m) => m.lastAccessTime);
+        const utilization = this._maxTokens ? (this._totalTokens / this._maxTokens) * 100 : 0;
+
+        return {
+            size: this._metadata.size,
+            total_tokens: this._totalTokens,
+            max_tokens: this._maxTokens,
+            utilization_pct: utilization,
+            avg_tokens_per_node: this._totalTokens / this._metadata.size,
+            oldest_access_time: Math.min(...accessTimes),
+            newest_access_time: Math.max(...accessTimes),
+            num_requests: this._requestToNode.size
+        };
+    }
+
+    toString(): string {
+        return `EvictionHeap(size=${this._metadata.size}, total_tokens=${this._totalTokens}, max_tokens=${this._maxTokens})`;
+    }
+}
diff --git a/openclaw-plugin/src/engine/http-client.ts b/openclaw-plugin/src/engine/http-client.ts
new file mode 100644
index 0000000..1166785
--- /dev/null
+++ b/openclaw-plugin/src/engine/http-client.ts
@@ -0,0 +1,267 @@
+type JsonObject = Record<string, unknown>;
+
+function isJsonObject(value: unknown): value is JsonObject {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+async function fetchJson(
+  url: string,
+  init: RequestInit,
+  timeoutMs: number,
+): Promise<JsonObject | null> {
+  try {
+    const response = await fetch(url, {
+      ...init,
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+
+    if (!response.ok) {
+      return null;
+    }
+
+    const data: unknown = await response.json();
+    return isJsonObject(data) ? data : null;
+  } catch {
+    return null;
+  }
+}
+
+export class ContextPilotIndexClient {
+  private readonly baseUrl: string;
+
+  private readonly timeout: number;
+
+  private readonly retryOnFailure: boolean;
+
+  constructor(
+    baseUrl: string = "http://localhost:8765",
+    timeout: number = 1000,
+    retryOnFailure: boolean = false,
+  ) {
+    this.baseUrl = baseUrl.replace(/\/+$/, "");
+    this.timeout = timeout;
+    this.retryOnFailure = retryOnFailure;
+  }
+
+  private async _post(endpoint: string, jsonData: JsonObject): Promise<JsonObject | null> {
+    const url = `${this.baseUrl}${endpoint}`;
+    const attempt = () =>
+      fetchJson(
+        url,
+        {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify(jsonData),
+        },
+        this.timeout,
+      );
+
+    const result = await attempt();
+    if (result !== null || !this.retryOnFailure) {
+      return result;
+    }
+
+    return attempt();
+  }
+
+  private async _get(endpoint: string): Promise<JsonObject | null> {
+    const url = `${this.baseUrl}${endpoint}`;
+    const attempt = () =>
+      fetchJson(
+        url,
+        {
+          method: "GET",
+        },
+        this.timeout,
+      );
+
+    const result = await attempt();
+    if (result !== null || !this.retryOnFailure) {
+      return result;
+    }
+
+    return attempt();
+  }
+
+  async evict(requestIds: string[]): Promise<JsonObject | null> {
+    return this._post("/evict", { request_ids: requestIds });
+  }
+
+  async search(context: number[], updateAccess: boolean = true): Promise<JsonObject | null> {
+    return this._post("/search", {
+      context,
+      update_access: updateAccess,
+    });
+  }
+
+  async updateNode(searchPath: number[], tokenDelta: number): Promise<JsonObject | null> {
+    return this._post("/update", {
+      search_path: searchPath,
+      token_delta: tokenDelta,
+    });
+  }
+
+  async insert(
+    context: number[],
+    searchPath: number[],
+    totalTokens: number = 0,
+  ): Promise<JsonObject | null> {
+    return this._post("/insert", {
+      context,
+      search_path: searchPath,
+      total_tokens: totalTokens,
+    });
+  }
+
+  async reorder(
+    contexts: Array<Array<number | string>>,
+    alpha: number = 0.001,
+    useGpu: boolean = false,
+    linkageMethod: string = "average",
+    initialTokensPerContext: number = 0,
+    deduplicate: boolean = false,
+    parentRequestIds?: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<[Array<Array<number | string>>, number[]] | null> {
+    const result = await this.reorderRaw(
+      contexts,
+      alpha,
+      useGpu,
+      linkageMethod,
+      initialTokensPerContext,
+      deduplicate,
+      parentRequestIds,
+      hintTemplate,
+    );
+
+    if (result === null) {
+      return null;
+    }
+
+    const reorderedContexts = result.reordered_contexts;
+    const originalIndices = result.original_indices;
+
+    if (!Array.isArray(reorderedContexts) || !Array.isArray(originalIndices)) {
+      return null;
+    }
+
+    if (!originalIndices.every((index) => typeof index === "number")) {
+      return null;
+    }
+
+    return [reorderedContexts as Array<Array<number | string>>, originalIndices as number[]];
+  }
+
+  async reorderRaw(
+    contexts: Array<Array<number | string>>,
+    alpha: number = 0.001,
+    useGpu: boolean = false,
+    linkageMethod: string = "average",
+    initialTokensPerContext: number = 0,
+    deduplicate: boolean = false,
+    parentRequestIds?: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<JsonObject | null> {
+    const payload: JsonObject = {
+      contexts,
+      alpha,
+      use_gpu: useGpu,
+      linkage_method: linkageMethod,
+      initial_tokens_per_context: initialTokensPerContext,
+      deduplicate,
+    };
+
+    if (parentRequestIds !== undefined) {
+      payload.parent_request_ids = parentRequestIds;
+    }
+
+    if (hintTemplate !== undefined) {
+      payload.hint_template = hintTemplate;
+    }
+
+    return this._post("/reorder", payload);
+  }
+
+  async deduplicate(
+    contexts: number[][],
+    parentRequestIds: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<JsonObject | null> {
+    const payload: JsonObject = {
+      contexts,
+      parent_request_ids: parentRequestIds,
+    };
+
+    if (hintTemplate !== undefined) {
+      payload.hint_template = hintTemplate;
+    }
+
+    return this._post("/deduplicate", payload);
+  }
+
+  async reset(): Promise<JsonObject | null> {
+    return this._post("/reset", {});
+  }
+
+  async getRequests(): Promise<JsonObject | null> {
+    return this._get("/requests");
+  }
+
+  async getStats(): Promise<JsonObject | null> {
+    return this._get("/stats");
+  }
+
+  async health(): Promise<JsonObject | null> {
+    return this._get("/health");
+  }
+
+  async isReady(): Promise<boolean> {
+    const health = await this.health();
+    return health !== null && health.status === "ready";
+  }
+}
+
+export async function evictRequests(
+  requestIds: string[],
+  serverUrl: string = "http://localhost:8765",
+): Promise<JsonObject | null> {
+  return fetchJson(
+    `${serverUrl.replace(/\/+$/, "")}/evict`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({ request_ids: requestIds }),
+    },
+    1000,
+  );
+}
+
+export async function scheduleBatch(
+  contexts: number[][],
+  serverUrl: string = "http://localhost:8765",
+  alpha: number = 0.001,
+  useGpu: boolean = false,
+  linkageMethod: string = "average",
+  timeout: number = 30000,
+): Promise<JsonObject | null> {
+  return fetchJson(
+    `${serverUrl.replace(/\/+$/, "")}/reorder`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        contexts,
+        alpha,
+        use_gpu: useGpu,
+        linkage_method: linkageMethod,
+      }),
+    },
+    timeout,
+  );
+}
diff --git a/openclaw-plugin/src/engine/index-construction.ts b/openclaw-plugin/src/engine/index-construction.ts
new file mode 100644
index 0000000..a0f28d5
--- /dev/null
+++ b/openclaw-plugin/src/engine/index-construction.ts
@@ -0,0 +1,348 @@
+import { ClusterNode, NodeManager, NodeStats } from './tree-nodes.js';
+import { IntraContextOrderer } from './intra-ordering.js';
+import { computeDistanceMatrixCpu } from './compute-distance.js';
+
+export function linkage(
+  condensedDistances: Float64Array,
+  n: number,
+  method: "single" | "complete" | "average" = "average"
+): number[][] {
+  const dist: number[][] = Array.from({length: n}, () => new Array(n).fill(Infinity));
+  for (let i = 0; i < n; i++) dist[i][i] = 0;
+  for (let i = 0; i < n; i++) {
+    for (let j = i + 1; j < n; j++) {
+      const idx = n * i - (i * (i + 1)) / 2 + j - i - 1;
+      dist[i][j] = condensedDistances[idx];
+      dist[j][i] = condensedDistances[idx];
+    }
+  }
+  
+  const active = new Set(Array.from({length: n}, (_, i) => i));
+  const sizes = new Array(2 * n - 1).fill(1);
+  const result: number[][] = [];
+  
+  for (let step = 0; step < n - 1; step++) {
+    let minDist = Infinity;
+    let minI = -1, minJ = -1;
+    
+    for (const i of active) {
+      for (const j of active) {
+        if (j <= i) continue;
+        if (dist[i][j] < minDist) {
+          minDist = dist[i][j];
+          minI = i;
+          minJ = j;
+        }
+      }
+    }
+    
+    const newClusterId = n + step;
+    const sizeNew = sizes[minI] + sizes[minJ];
+    sizes[newClusterId] = sizeNew;
+    
+    result.push([minI, minJ, minDist, sizeNew]);
+    
+    while (dist.length <= newClusterId) {
+      dist.push(new Array(dist[0]?.length ?? 0).fill(Infinity));
+    }
+    for (const row of dist) {
+      while (row.length <= newClusterId) row.push(Infinity);
+    }
+    dist[newClusterId][newClusterId] = 0;
+    
+    for (const k of active) {
+      if (k === minI || k === minJ) continue;
+      let newDist: number;
+      if (method === "single") {
+        newDist = Math.min(dist[minI][k], dist[minJ][k]);
+      } else if (method === "complete") {
+        newDist = Math.max(dist[minI][k], dist[minJ][k]);
+      } else { // average (UPGMA)
+        newDist = (dist[minI][k] * sizes[minI] + dist[minJ][k] * sizes[minJ]) / sizeNew;
+      }
+      dist[newClusterId][k] = newDist;
+      dist[k][newClusterId] = newDist;
+    }
+    
+    active.delete(minI);
+    active.delete(minJ);
+    active.add(newClusterId);
+  }
+  
+  return result;
+}
+
+export class IndexResult {
+    linkageMatrix: number[][];
+    clusterNodes: Map<number, ClusterNode>;
+    uniqueNodes: Map<number, ClusterNode>;
+    reorderedContexts: (number[] | string[])[];
+    originalContexts: (number[] | string[])[];
+    stats: NodeStats;
+    searchPaths: number[][] | null;
+    
+    // Legacy attributes for backward compatibility
+    reorderedPrompts: (number[] | string[])[];
+    originalPrompts: (number[] | string[])[];
+
+    constructor(
+        linkageMatrix: number[][],
+        clusterNodes: Map<number, ClusterNode>,
+        uniqueNodes: Map<number, ClusterNode>,
+        reorderedContexts: (number[] | string[])[],
+        originalContexts: (number[] | string[])[],
+        stats: NodeStats,
+        searchPaths: number[][] | null = null
+    ) {
+        this.linkageMatrix = linkageMatrix;
+        this.clusterNodes = clusterNodes;
+        this.uniqueNodes = uniqueNodes;
+        this.reorderedContexts = reorderedContexts;
+        this.originalContexts = originalContexts;
+        this.stats = stats;
+        this.searchPaths = searchPaths;
+
+        this.reorderedPrompts = this.reorderedContexts;
+        this.originalPrompts = this.originalContexts;
+    }
+
+    printTree(): void {
+        console.log("\n--- Unique Cluster Tree Nodes ---");
+        const sortedKeys = Array.from(this.uniqueNodes.keys()).sort((a, b) => a - b);
+        for (const nodeId of sortedKeys) {
+            const node = this.uniqueNodes.get(nodeId);
+            if (!node) continue;
+            console.log(`ClusterNode ${nodeId}`);
+            console.log(`  Content: [${node.docIds.join(', ')}]`);
+            console.log(`  Original indices: [${Array.from(node.originalIndices).sort((a, b) => a - b).join(', ')}]`);
+            if (node.searchPath && node.searchPath.length > 0) {
+                const pathStr = "[" + node.searchPath.join("][") + "]";
+                console.log(`  Search path (child indices from root): ${pathStr}`);
+            } else {
+                console.log(`  Search path: (root node)`);
+            }
+            if (!node.isLeaf) {
+                console.log(`  Children: [${node.children.join(', ')}]`);
+                console.log(`  Merge distance: ${node.mergeDistance.toFixed(4)}`);
+            }
+            console.log("-".repeat(40));
+        }
+    }
+}
+
+export interface ContextIndexOptions {
+    linkageMethod?: "single" | "complete" | "average";
+    useGpu?: boolean;
+    alpha?: number;
+    numWorkers?: number | null;
+    batchSize?: number;
+}
+
+export class ContextIndex {
+    linkageMethod: "single" | "complete" | "average";
+    useGpu: boolean;
+    alpha: number;
+    numWorkers: number | null;
+    batchSize: number;
+
+    nodeManager: NodeManager;
+    contextOrderer: IntraContextOrderer;
+
+    _strToId: Map<string, number>;
+    _idToStr: Map<number, string>;
+    _nextStrId: number;
+    _isStringInput: boolean;
+
+    constructor(options: ContextIndexOptions = {}) {
+        this.linkageMethod = options.linkageMethod || "average";
+        this.useGpu = false;
+        this.alpha = options.alpha !== undefined ? options.alpha : 0.001;
+        this.numWorkers = options.numWorkers || null;
+        this.batchSize = options.batchSize || 1000;
+
+        this.nodeManager = new NodeManager();
+        this.contextOrderer = new IntraContextOrderer();
+
+        this._strToId = new Map<string, number>();
+        this._idToStr = new Map<number, string>();
+        this._nextStrId = 0;
+        this._isStringInput = false;
+    }
+
+    _convertToInt(contexts: (number[] | string[])[]): number[][] {
+        if (!contexts || contexts.length === 0 || !contexts[0] || contexts[0].length === 0) {
+            return contexts as number[][];
+        }
+        if (typeof contexts[0][0] === "string") {
+            this._isStringInput = true;
+            const converted: number[][] = [];
+            for (const ctx of contexts as string[][]) {
+                const convertedCtx: number[] = [];
+                for (const item of ctx) {
+                    let sid = this._strToId.get(item);
+                    if (sid === undefined) {
+                        sid = this._nextStrId;
+                        this._strToId.set(item, sid);
+                        this._idToStr.set(sid, item);
+                        this._nextStrId += 1;
+                    }
+                    convertedCtx.push(sid);
+                }
+                converted.push(convertedCtx);
+            }
+            return converted;
+        }
+        return contexts as number[][];
+    }
+
+    _convertToStr(contexts: number[][]): string[][] {
+        if (!this._isStringInput || !contexts || contexts.length === 0) {
+            return contexts as any;
+        }
+        if (contexts[0] && typeof contexts[0][0] === "string") {
+            return contexts as any;
+        }
+        const result: string[][] = [];
+        for (const ctx of contexts) {
+            const strCtx: string[] = [];
+            for (const i of ctx) {
+                strCtx.push(this._idToStr.get(i) as string);
+            }
+            result.push(strCtx);
+        }
+        return result;
+    }
+
+    fitTransform(contexts: (number[] | string[])[]): IndexResult {
+        const intContexts = this._convertToInt(contexts);
+        const n = intContexts.length;
+
+        if (n < 2) {
+            return this._handleSinglePrompt(intContexts);
+        }
+
+        const condensedDistances = this._computeDistanceMatrix(intContexts);
+        const linkageMatrix = linkage(condensedDistances, n, this.linkageMethod);
+
+        this._buildTree(intContexts, linkageMatrix);
+        
+        this.nodeManager.cleanupEmptyNodes();
+        this.nodeManager.updateSearchPaths();
+
+        const reorderedContexts = this.contextOrderer.reorderContexts(
+            intContexts,
+            this.nodeManager.uniqueNodes
+        );
+
+        const searchPaths = this.contextOrderer.extractSearchPaths(
+            this.nodeManager.uniqueNodes,
+            intContexts.length
+        );
+
+        const stats = this.nodeManager.getNodeStats();
+
+        return new IndexResult(
+            linkageMatrix,
+            this.nodeManager.clusterNodes,
+            this.nodeManager.uniqueNodes,
+            reorderedContexts,
+            intContexts,
+            stats,
+            searchPaths
+        );
+    }
+
+    _computeDistanceMatrix(contexts: number[][]): Float64Array {
+        return computeDistanceMatrixCpu(contexts, this.alpha);
+    }
+
+    _handleSinglePrompt(contexts: number[][]): IndexResult {
+        for (let i = 0; i < contexts.length; i++) {
+            const prompt = contexts[i];
+            const node = this.nodeManager.createLeafNode(i, prompt);
+            node.docIds = [...prompt];
+        }
+
+        const leafIds = Array.from(this.nodeManager.uniqueNodes.keys());
+        const virtualRootId = leafIds.length > 0 ? Math.max(...leafIds) + 1 : 0;
+        
+        let freqSum = 0;
+        for (const nid of leafIds) {
+            const n = this.nodeManager.uniqueNodes.get(nid);
+            if (n) freqSum += n.frequency;
+        }
+
+        const virtualRoot = new ClusterNode(
+            virtualRootId,
+            new Set<number>(),
+            new Set<number>(),
+            0.0,
+            leafIds,
+            null,
+            freqSum
+        );
+        this.nodeManager.uniqueNodes.set(virtualRootId, virtualRoot);
+
+        for (const nid of leafIds) {
+            const n = this.nodeManager.uniqueNodes.get(nid);
+            if (n) {
+                n.parent = virtualRootId;
+            }
+        }
+
+        this.nodeManager.updateSearchPaths();
+
+        const searchPaths = this.contextOrderer.extractSearchPaths(
+            this.nodeManager.uniqueNodes,
+            contexts.length
+        );
+
+        const reorderedContexts = contexts.map(c => [...c]);
+
+        return new IndexResult(
+            [],
+            this.nodeManager.clusterNodes,
+            this.nodeManager.uniqueNodes,
+            reorderedContexts,
+            contexts,
+            this.nodeManager.getNodeStats(),
+            searchPaths
+        );
+    }
+
+    _buildTree(contexts: number[][], linkageMatrix: number[][]): void {
+        const n = contexts.length;
+
+        for (let i = 0; i < n; i++) {
+            this.nodeManager.createLeafNode(i, contexts[i]);
+        }
+
+        for (let i = 0; i < linkageMatrix.length; i++) {
+            const [idx1, idx2, distance] = linkageMatrix[i];
+            const newNodeId = n + i;
+            this.nodeManager.createInternalNode(
+                newNodeId,
+                Math.floor(idx1),
+                Math.floor(idx2),
+                distance
+            );
+        }
+    }
+}
+
+export function buildContextIndex(
+    contexts: (number[] | string[])[],
+    options: ContextIndexOptions = {}
+): IndexResult {
+    const indexer = new ContextIndex(options);
+    const result = indexer.fitTransform(contexts);
+
+    if (indexer._isStringInput) {
+        result.reorderedContexts = indexer._convertToStr(result.reorderedContexts as number[][]);
+        result.originalContexts = indexer._convertToStr(result.originalContexts as number[][]);
+        result.reorderedPrompts = result.reorderedContexts;
+        result.originalPrompts = result.originalContexts;
+    }
+
+    return result;
+}
diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts
new file mode 100644
index 0000000..702eebc
--- /dev/null
+++ b/openclaw-plugin/src/engine/inter-scheduler.ts
@@ -0,0 +1,116 @@
+import type { ClusterNode } from './tree-nodes.js';
+
+export interface ClusteringResult {
+    reorderedPrompts: number[][];
+    originalPrompts: number[][];
+    searchPaths: number[][];
+}
+
+export class InterContextScheduler {
+    scheduleContexts(
+        clusteringResult: ClusteringResult
+    ): [number[][], number[][], number[], Array<[number, number[]]>] {
+        const reorderedContexts = clusteringResult.reorderedPrompts;
+        const originalContexts = clusteringResult.originalPrompts;
+        const searchPaths = clusteringResult.searchPaths;
+
+        const groupsByRoot = this._groupByRootPrefix(searchPaths);
+        const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths, reorderedContexts);
+
+        const allGroupsWithInfo: Array<[number, number[]]> = [];
+        for (const groupIndices of sortedGroups) {
+            allGroupsWithInfo.push([0, groupIndices]);
+        }
+
+        allGroupsWithInfo.sort((a, b) => {
+            const sizeDiff = b[1].length - a[1].length;
+            if (sizeDiff !== 0) {
+                return sizeDiff;
+            }
+
+            const aFirst = a[1].length > 0 ? a[1][0] : Number.POSITIVE_INFINITY;
+            const bFirst = b[1].length > 0 ? b[1][0] : Number.POSITIVE_INFINITY;
+            return aFirst - bFirst;
+        });
+
+        const finalIndexMapping = allGroupsWithInfo.flatMap(([, group]) => group);
+
+        const scheduledReordered = finalIndexMapping.map((idx) => reorderedContexts[idx]);
+        const scheduledOriginals = finalIndexMapping.map((idx) => originalContexts[idx]);
+
+        return [scheduledReordered, scheduledOriginals, finalIndexMapping, allGroupsWithInfo];
+    }
+
+    _groupByRootPrefix(searchPaths: number[][]): Map<number, number[]> {
+        const groups = new Map<number, number[]>();
+
+        for (let contextIdx = 0; contextIdx < searchPaths.length; contextIdx += 1) {
+            const path = searchPaths[contextIdx];
+            const groupKey = path.length >= 1 ? path[0] : -1;
+
+            const existing = groups.get(groupKey);
+            if (existing) {
+                existing.push(contextIdx);
+            } else {
+                groups.set(groupKey, [contextIdx]);
+            }
+        }
+
+        return groups;
+    }
+
+    _sortGroupsByPathLength(
+        groupsByRoot: Map<number, number[]>,
+        searchPaths: number[][],
+        contexts: number[][]
+    ): number[][] {
+        void contexts;
+        const sortedGroups: number[][] = [];
+
+        for (const groupIndices of groupsByRoot.values()) {
+            const sortedGroup = [...groupIndices].sort((a, b) => {
+                const lengthDiff = searchPaths[b].length - searchPaths[a].length;
+                if (lengthDiff !== 0) {
+                    return lengthDiff;
+                }
+
+                const lexCompare = this._compareNumberArrays(searchPaths[a], searchPaths[b]);
+                if (lexCompare !== 0) {
+                    return lexCompare;
+                }
+
+                return a - b;
+            });
+
+            sortedGroups.push(sortedGroup);
+        }
+
+        return sortedGroups;
+    }
+
+    reorderPrompts(
+        clusteringResult: ClusteringResult
+    ): [number[][], number[][], number[], Array<[number, number[]]>] {
+        return this.scheduleContexts(clusteringResult);
+    }
+
+    _reorderSinglePrompt(
+        promptIndex: number,
+        originalPrompt: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        void promptIndex;
+        void uniqueNodes;
+        return [...originalPrompt];
+    }
+
+    private _compareNumberArrays(a: number[], b: number[]): number {
+        const minLength = Math.min(a.length, b.length);
+        for (let i = 0; i < minLength; i += 1) {
+            if (a[i] !== b[i]) {
+                return a[i] - b[i];
+            }
+        }
+        return a.length - b.length;
+    }
+}
diff --git a/openclaw-plugin/src/engine/intra-ordering.ts b/openclaw-plugin/src/engine/intra-ordering.ts
new file mode 100644
index 0000000..0d2eed0
--- /dev/null
+++ b/openclaw-plugin/src/engine/intra-ordering.ts
@@ -0,0 +1,349 @@
+import type { ClusterNode } from './tree-nodes.js';
+
+export class IntraContextOrderer {
+    reorderContexts(originalContexts: number[][], uniqueNodes: Map<number, ClusterNode>): number[][] {
+        let rootNode: ClusterNode | null = null;
+        for (const node of uniqueNodes.values()) {
+            if (node.isRoot) {
+                rootNode = node;
+                break;
+            }
+        }
+
+        if (!rootNode) {
+            return originalContexts;
+        }
+
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.size > 0) {
+                const firstIdx = Math.min(...node.originalIndices);
+                if (firstIdx < originalContexts.length) {
+                    this._setNodeDocs(node, [...originalContexts[firstIdx]]);
+                }
+            }
+        }
+
+        const queue: number[] = [rootNode.nodeId];
+        const visited = new Set<number>();
+
+        while (queue.length > 0) {
+            const nodeId = queue.shift();
+            if (nodeId === undefined || visited.has(nodeId) || !uniqueNodes.has(nodeId)) {
+                continue;
+            }
+
+            visited.add(nodeId);
+            const node = uniqueNodes.get(nodeId);
+            if (!node) {
+                continue;
+            }
+
+            if (!node.isRoot && node.parent !== null) {
+                const parentNode = uniqueNodes.get(node.parent);
+                if (parentNode) {
+                    const parentDocs = this._getNodeDocs(parentNode);
+                    const nodeDocs = this._getNodeDocs(node);
+                    if (parentDocs.length > 0 && nodeDocs.length > 0) {
+                        this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs));
+                    }
+                }
+            }
+
+            for (const childId of node.children) {
+                if (uniqueNodes.has(childId)) {
+                    queue.push(childId);
+                }
+            }
+        }
+
+        const reorderedContexts: number[][] = [];
+        for (let i = 0; i < originalContexts.length; i += 1) {
+            const leafNode = this._findLeafNode(i, uniqueNodes);
+            if (leafNode) {
+                const leafDocs = this._getNodeDocs(leafNode);
+                if (leafDocs.length > 0) {
+                    reorderedContexts.push(leafDocs);
+                    continue;
+                }
+            }
+
+            reorderedContexts.push([...originalContexts[i]]);
+        }
+
+        return reorderedContexts;
+    }
+
+    _updateTreeAndReorderNodes(uniqueNodes: Map<number, ClusterNode>, reorderedContexts: number[][]): void {
+        let rootNode: ClusterNode | null = null;
+        for (const node of uniqueNodes.values()) {
+            if (node.isRoot) {
+                rootNode = node;
+                break;
+            }
+        }
+
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.size > 0) {
+                const firstIdx = Math.min(...node.originalIndices);
+                if (firstIdx < reorderedContexts.length) {
+                    this._setNodeDocs(node, [...reorderedContexts[firstIdx]]);
+                }
+            }
+        }
+
+        if (!rootNode) {
+            return;
+        }
+
+        const queue: Array<[number, boolean]> = [];
+        for (const childId of rootNode.children) {
+            if (uniqueNodes.has(childId)) {
+                queue.push([childId, true]);
+            }
+        }
+
+        while (queue.length > 0) {
+            const item = queue.shift();
+            if (!item) {
+                continue;
+            }
+
+            const [nodeId, isChildOfRoot] = item;
+            const node = uniqueNodes.get(nodeId);
+            if (!node) {
+                continue;
+            }
+
+            if (!isChildOfRoot && node.parent !== null) {
+                const parentNode = uniqueNodes.get(node.parent);
+                if (parentNode) {
+                    const parentDocs = this._getNodeDocs(parentNode);
+                    const nodeDocs = this._getNodeDocs(node);
+                    if (parentDocs.length > 0 && nodeDocs.length > 0) {
+                        this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs));
+                    }
+                }
+            }
+
+            for (const childId of node.children) {
+                if (uniqueNodes.has(childId)) {
+                    queue.push([childId, false]);
+                }
+            }
+        }
+    }
+
+    _reorderWithParentPrefix(nodeDocs: number[], parentDocs: number[]): number[] {
+        if (parentDocs.length === 0) {
+            return nodeDocs;
+        }
+
+        const result = [...parentDocs];
+        const parentSet = new Set(parentDocs);
+
+        for (const doc of nodeDocs) {
+            if (!parentSet.has(doc)) {
+                result.push(doc);
+            }
+        }
+
+        return result;
+    }
+
+    _reorderContextWithTreePrefix(
+        contextIndex: number,
+        originalContext: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        const leafNode = this._findLeafNode(contextIndex, uniqueNodes);
+        if (!leafNode) {
+            return [...originalContext];
+        }
+
+        const prefixDocs: number[] = [];
+        const visited = new Set<number>();
+        let currentNode: ClusterNode | undefined = leafNode;
+
+        const ancestors: ClusterNode[] = [];
+        while (currentNode && !currentNode.isRoot) {
+            if (visited.has(currentNode.nodeId)) {
+                break;
+            }
+
+            visited.add(currentNode.nodeId);
+            ancestors.push(currentNode);
+
+            if (currentNode.parent !== null && uniqueNodes.has(currentNode.parent)) {
+                currentNode = uniqueNodes.get(currentNode.parent);
+            } else {
+                break;
+            }
+        }
+
+        ancestors.reverse();
+
+        const seenDocs = new Set<number>();
+        for (const ancestor of ancestors) {
+            const ancestorDocs = this._getNodeDocs(ancestor);
+            for (const doc of ancestorDocs) {
+                if (!seenDocs.has(doc)) {
+                    prefixDocs.push(doc);
+                    seenDocs.add(doc);
+                }
+            }
+        }
+
+        const result = [...prefixDocs];
+        for (const doc of originalContext) {
+            if (!seenDocs.has(doc)) {
+                result.push(doc);
+                seenDocs.add(doc);
+            }
+        }
+
+        return result;
+    }
+
+    extractSearchPaths(uniqueNodes: Map<number, ClusterNode>, numContexts: number): number[][] {
+        const searchPaths: number[][] = Array.from({ length: numContexts }, () => []);
+
+        const contextToLeaf = new Map<number, number>();
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            if (!node.isLeaf) {
+                continue;
+            }
+
+            for (const origIdx of node.originalIndices) {
+                contextToLeaf.set(origIdx, nodeId);
+            }
+        }
+
+        for (let contextIdx = 0; contextIdx < numContexts; contextIdx += 1) {
+            const leafId = contextToLeaf.get(contextIdx);
+            if (leafId === undefined) {
+                searchPaths[contextIdx] = [];
+                continue;
+            }
+
+            const childIndices: number[] = [];
+            let currentId: number | null = leafId;
+            const visited = new Set<number>();
+
+            while (currentId !== null) {
+                if (visited.has(currentId)) {
+                    break;
+                }
+                visited.add(currentId);
+
+                const currentNode = uniqueNodes.get(currentId);
+                if (!currentNode) {
+                    break;
+                }
+
+                if (currentNode.parent !== null) {
+                    const parentNode = uniqueNodes.get(currentNode.parent);
+                    if (parentNode) {
+                        const childIndex = parentNode.children.indexOf(currentId);
+                        if (childIndex !== -1) {
+                            childIndices.push(childIndex);
+                        }
+                    }
+                }
+
+                currentId = currentNode.parent;
+            }
+
+            searchPaths[contextIdx] = [...childIndices].reverse();
+        }
+
+        return searchPaths;
+    }
+
+    _reorderSingleContext(
+        contextIndex: number,
+        originalContext: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        const originalSet = new Set(originalContext);
+
+        const leafNode = this._findLeafNode(contextIndex, uniqueNodes);
+        if (!leafNode) {
+            return [...originalContext];
+        }
+
+        if (leafNode.isRoot) {
+            return Array.from(leafNode.content).sort((a, b) => a - b);
+        }
+
+        if (leafNode.frequency > 1) {
+            const prefixContent = leafNode.content;
+            const prefixList = Array.from(prefixContent).sort((a, b) => a - b);
+            const remainingList = Array.from(originalSet)
+                .filter((value) => !prefixContent.has(value))
+                .sort((a, b) => a - b);
+            return [...prefixList, ...remainingList];
+        }
+
+        const bestNode = this._findBestAncestor(leafNode, uniqueNodes);
+        if (!bestNode) {
+            return [...originalContext];
+        }
+
+        const prefixContent = bestNode.content;
+        const prefixList = Array.from(prefixContent).sort((a, b) => a - b);
+        const remainingList = Array.from(originalSet)
+            .filter((value) => !prefixContent.has(value))
+            .sort((a, b) => a - b);
+        return [...prefixList, ...remainingList];
+    }
+
+    _findLeafNode(contextIndex: number, uniqueNodes: Map<number, ClusterNode>): ClusterNode | null {
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.has(contextIndex)) {
+                return node;
+            }
+        }
+
+        return null;
+    }
+
+    _findBestAncestor(startNode: ClusterNode, uniqueNodes: Map<number, ClusterNode>): ClusterNode | null {
+        let currentNode: ClusterNode = startNode;
+
+        while (currentNode.parent !== null) {
+            const parentId = currentNode.parent;
+            const parentNode = uniqueNodes.get(parentId);
+            if (!parentNode) {
+                return null;
+            }
+
+            if (parentNode.frequency > 1 && !parentNode.isEmpty) {
+                return parentNode;
+            }
+
+            currentNode = parentNode;
+        }
+
+        return null;
+    }
+
+    reorderPrompts(originalPrompts: number[][], uniqueNodes: Map<number, ClusterNode>): number[][] {
+        return this.reorderContexts(originalPrompts, uniqueNodes);
+    }
+
+    _reorderSinglePrompt(
+        promptIndex: number,
+        originalPrompt: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        return this._reorderSingleContext(promptIndex, originalPrompt, uniqueNodes);
+    }
+
+    private _getNodeDocs(node: ClusterNode): number[] {
+        return Array.from(node.content);
+    }
+
+    private _setNodeDocs(node: ClusterNode, docs: number[]): void {
+        node.content = new Set(docs);
+    }
+}
diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts
new file mode 100644
index 0000000..777b1e8
--- /dev/null
+++ b/openclaw-plugin/src/engine/live-index.ts
@@ -0,0 +1,1232 @@
+import { ContextIndex, IndexResult } from './index-construction.js';
+import { ClusterNode, NodeManager } from './tree-nodes.js';
+import { NodeMetadata } from './metadata.js';
+import { InterContextScheduler } from './inter-scheduler.js';
+import { IntraContextOrderer } from './intra-ordering.js';
+import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js';
+import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js';
+import { EvictionHeap } from './eviction-heap.js';
+import crypto from 'crypto';
+
+export function computePrefixLength(list1: number[], list2: number[]): number {
+    let length = 0;
+    const minLen = Math.min(list1.length, list2.length);
+    for (let i = 0; i < minLen; i++) {
+        if (list1[i] === list2[i]) {
+            length++;
+        } else {
+            break;
+        }
+    }
+    return length;
+}
+
+export class ContextPilot extends ContextIndex {
+    metadata: Map<number, NodeMetadata> = new Map();
+    interScheduler = new InterContextScheduler();
+    
+    protected _requestToNode: Map<string, number | null> = new Map();
+    protected _nextRequestCounter: number = 0;
+    
+    protected _conversations: Map<string, { seenDocs: Set<any>; turnCount: number }> = new Map();
+    protected _hasExplicitConversation: boolean = false;
+    
+    isLive: boolean = false;
+    initialResult: any = null;
+    scheduledResult: any = null;
+    
+    nodes: Map<number, ClusterNode> = new Map();
+    rootId: number | null = null;
+    nextNodeId: number = 0;
+    
+    liveStats = {
+        totalSearches: 0,
+        totalInsertions: 0,
+        totalEvictions: 0,
+        totalSearchTimeUs: 0,
+        totalTraversalTimeUs: 0,
+        totalRemovals: 0
+    };
+    
+    static readonly _DEFAULT_CONVERSATION = "_default";
+
+    constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: string = "average", batchSize: number = 10000) {
+        super(alpha, useGpu, linkageMethod, batchSize);
+    }
+
+    getAllRequestIds(): Set<string> {
+        return new Set(this._requestToNode.keys());
+    }
+
+    reset(): void {
+        this.metadata.clear();
+        this._requestToNode.clear();
+        this._nextRequestCounter = 0;
+        this.isLive = false;
+        this.initialResult = null;
+        this.scheduledResult = null;
+        this.nodes.clear();
+        this.rootId = null;
+        this.nextNodeId = 0;
+        this.liveStats = {
+            totalSearches: 0,
+            totalInsertions: 0,
+            totalEvictions: 0,
+            totalSearchTimeUs: 0,
+            totalTraversalTimeUs: 0,
+            totalRemovals: 0
+        };
+    }
+
+    buildAndSchedule(contexts: number[][], initialTokensPerContext: number = 0): any {
+        this.initialResult = this.fitTransform(contexts);
+        
+        const [scheduledReordered, scheduledOriginals, finalMapping, groups] = 
+            this.interScheduler.scheduleContexts(this.initialResult);
+            
+        this.scheduledResult = {
+            reordered_contexts: scheduledReordered,
+            original_indices: finalMapping,
+            scheduled_originals: scheduledOriginals,
+            groups: groups,
+            clustering_result: this.initialResult
+        };
+        
+        const [requestIdMapping, requestIdsOrdered] = this._initializeLiveMetadata(
+            initialTokensPerContext,
+            contexts.length
+        );
+        
+        this.scheduledResult['request_id_mapping'] = requestIdMapping;
+        this.scheduledResult['request_ids'] = requestIdsOrdered;
+        
+        this.isLive = true;
+        
+        return this.scheduledResult;
+    }
+
+    reorder(contexts: any, initialTokensPerContext: number = 0, conversationId?: string): [any[], number[]] {
+        if (contexts && !Array.isArray(contexts[0])) {
+            contexts = [contexts];
+        }
+
+        const result = this.buildIncremental(contexts, initialTokensPerContext);
+        const reordered = result.reordered_contexts;
+
+        const cid = conversationId || ContextPilot._DEFAULT_CONVERSATION;
+        if (conversationId !== undefined && conversationId !== null) {
+            this._hasExplicitConversation = true;
+        }
+
+        let conv = this._conversations.get(cid);
+        if (!conv) {
+            conv = { seenDocs: new Set(), turnCount: 0 };
+            this._conversations.set(cid, conv);
+        }
+
+        for (const ctx of reordered) {
+            for (const doc of ctx) {
+                conv.seenDocs.add(doc);
+            }
+        }
+        conv.turnCount += 1;
+
+        return [reordered, result.original_indices];
+    }
+
+    optimize(docs: string[], query: string, conversationId?: string, systemInstruction?: string): any[] {
+        const [reordered, _indices] = this.reorder(docs, 0, conversationId);
+        const reorderedDocs = reordered[0];
+        
+        const systemContent = [systemInstruction, ...reorderedDocs].filter(Boolean).join("\n\n");
+        
+        return [
+            { role: "system", content: systemContent },
+            { role: "user", content: query }
+        ];
+    }
+
+    optimizeBatch(allDocs: string[][], allQueries: string[], systemInstruction?: string): [any[][], number[]] {
+        if (allDocs.length !== allQueries.length) {
+            throw new Error(`all_docs (${allDocs.length}) and all_queries (${allQueries.length}) must have the same length.`);
+        }
+
+        const [reorderedContexts, order] = this.reorder(allDocs);
+        const messagesBatch: any[][] = [];
+
+        for (let i = 0; i < reorderedContexts.length; i++) {
+            const ctx = reorderedContexts[i];
+            const origIdx = order[i];
+            
+            const systemContent = [systemInstruction, ...ctx].filter(Boolean).join("\n\n");
+            messagesBatch.push([
+                { role: "system", content: systemContent },
+                { role: "user", content: allQueries[origIdx] }
+            ]);
+        }
+
+        return [messagesBatch, order];
+    }
+
+    deduplicate(contexts: any[][], conversationId: string, hintTemplate?: string): any[] {
+        if (!conversationId) {
+            throw new Error("conversation_id is required for .deduplicate().");
+        }
+
+        const template = hintTemplate || "Please refer to [Doc {doc_id}] from the previous conversation.";
+
+        if (!this._conversations.has(conversationId)) {
+            throw new Error(`No prior .reorder() call found for conversation_id='${conversationId}'.`);
+        }
+
+        const conv = this._conversations.get(conversationId)!;
+        const seen = conv.seenDocs;
+        const results: any[] = [];
+
+        for (const ctx of contexts) {
+            const overlapping = ctx.filter(d => seen.has(d));
+            const newDocs = ctx.filter(d => !seen.has(d));
+            const hints = overlapping.map(d => template.replace("{doc_id}", String(d)));
+
+            results.push({
+                new_docs: newDocs,
+                overlapping_docs: overlapping,
+                reference_hints: hints,
+                deduplicated_docs: newDocs
+            });
+
+            for (const d of ctx) {
+                seen.add(d);
+            }
+        }
+
+        conv.turnCount += 1;
+        return results;
+    }
+
+    buildIncremental(contexts: any[][], initialTokensPerContext: number = 0): any {
+        // @ts-ignore - Assuming inherited from ContextIndex
+        const convertedContexts = this._convertToInt ? this._convertToInt(contexts) : contexts;
+
+        if (!this.isLive) {
+            const result = this.buildAndSchedule(convertedContexts, initialTokensPerContext);
+            const reordered = result.reordered_contexts || convertedContexts;
+            // @ts-ignore
+            const stringReordered = this._convertToStr ? this._convertToStr(reordered) : reordered;
+            
+            return {
+                request_ids: result.request_ids || [],
+                reordered_contexts: stringReordered,
+                matched_count: 0,
+                inserted_count: convertedContexts.length,
+                merged_count: 0,
+                original_indices: result.original_indices || Array.from({ length: convertedContexts.length }, (_, i) => i),
+                groups: result.groups || []
+            };
+        }
+
+        const matchedContexts: any[] = [];
+        const unmatchedContexts: any[] = [];
+
+        const searchResults = this.searchBatch(convertedContexts);
+
+        for (let i = 0; i < convertedContexts.length; i++) {
+            const context = convertedContexts[i];
+            let [searchPath, matchedNodeId, overlapCount, hasPrefix] = searchResults[i];
+
+            if (overlapCount > 0 && matchedNodeId >= 0 && matchedNodeId !== this.rootId) {
+                const matchedNode = this.nodes.get(matchedNodeId);
+                let nodeDocs: number[] | null = null;
+                
+                if (this.metadata.has(matchedNodeId) && this.metadata.get(matchedNodeId)!.docIds) {
+                    nodeDocs = this.metadata.get(matchedNodeId)!.docIds as number[];
+                } else if (matchedNode && matchedNode.docIds) {
+                    nodeDocs = matchedNode.docIds as number[];
+                }
+
+                let reordered = context;
+                if (nodeDocs) {
+                    reordered = this._reorderWithPrefix(context, nodeDocs);
+                } else {
+                    hasPrefix = true;
+                }
+                
+                matchedContexts.push([i, reordered, searchPath, hasPrefix]);
+            } else {
+                unmatchedContexts.push([i, context]);
+            }
+        }
+
+        const requestIds: (string | null)[] = new Array(convertedContexts.length).fill(null);
+        const reorderedContexts: any[] = new Array(convertedContexts.length).fill(null);
+        const contextInfo: any[] = [];
+
+        for (const [origIdx, reordered, searchPath, hasPrefix] of matchedContexts) {
+            const matchedNode = this.traverse(searchPath);
+            let newNodeId: number, newSearchPath: number[], requestId: string;
+
+            if (hasPrefix && matchedNode && matchedNode.isLeaf) {
+                [newNodeId, newSearchPath, requestId] = this._splitLeafAndInsert(
+                    reordered, matchedNode, searchPath, initialTokensPerContext
+                );
+            } else if (hasPrefix) {
+                [newNodeId, newSearchPath, requestId] = this.insert(
+                    reordered, searchPath, initialTokensPerContext
+                );
+            } else {
+                const insertPath = searchPath.length > 0 ? searchPath.slice(0, -1) : searchPath;
+                [newNodeId, newSearchPath, requestId] = this.insert(
+                    reordered, insertPath, initialTokensPerContext
+                );
+            }
+            
+            requestIds[origIdx] = requestId;
+            reorderedContexts[origIdx] = reordered;
+            contextInfo.push([origIdx, requestId, newSearchPath]);
+        }
+
+        let mergedCount = 0;
+        if (unmatchedContexts.length > 0) {
+            const unmatchedOnly = unmatchedContexts.map(x => x[1]);
+            
+            const tempIndex = new ContextPilot(
+                this.alpha,
+                // @ts-ignore
+                this.useGpu,
+                // @ts-ignore
+                this.linkageMethod,
+                // @ts-ignore
+                this.batchSize
+            );
+            
+            const tempResult = tempIndex.fitTransform(unmatchedOnly);
+            
+            const [mergedRequestIds, mergedSearchPaths] = this._mergeIndex(
+                tempResult,
+                unmatchedContexts,
+                initialTokensPerContext
+            );
+
+            for (let i = 0; i < unmatchedContexts.length; i++) {
+                const [origIdx, origContext] = unmatchedContexts[i];
+                requestIds[origIdx] = mergedRequestIds[i];
+                
+                if (tempResult.reordered_contexts && i < tempResult.reordered_contexts.length) {
+                    reorderedContexts[origIdx] = tempResult.reordered_contexts[i];
+                } else {
+                    reorderedContexts[origIdx] = origContext;
+                }
+                
+                contextInfo.push([origIdx, mergedRequestIds[i], mergedSearchPaths[i]]);
+            }
+            
+            mergedCount = unmatchedContexts.length;
+        }
+
+        const scheduledOrder = this._scheduleIncremental(contextInfo);
+        const groups = this._groupByPathPrefix(contextInfo);
+
+        // @ts-ignore
+        const finalReorderedStr = this._convertToStr ? this._convertToStr(reorderedContexts) : reorderedContexts;
+
+        return {
+            request_ids: requestIds,
+            reordered_contexts: finalReorderedStr,
+            matched_count: matchedContexts.length,
+            inserted_count: convertedContexts.length,
+            merged_count: mergedCount,
+            original_indices: scheduledOrder,
+            groups: groups
+        };
+    }
+
+    _reorderWithPrefix(context: number[], prefix: number[]): number[] {
+        const contextSet = new Set(context);
+        const result: number[] = [];
+        const prefixUsed = new Set<number>();
+
+        for (const elem of prefix) {
+            if (contextSet.has(elem) && !prefixUsed.has(elem)) {
+                result.push(elem);
+                prefixUsed.add(elem);
+            }
+        }
+
+        for (const elem of context) {
+            if (!prefixUsed.has(elem)) {
+                result.push(elem);
+            }
+        }
+
+        return result;
+    }
+
+    _mergeIndex(tempResult: any, unmatchedInfo: any[], initialTokens: number): [string[], number[][]] {
+        const requestIds: string[] = [];
+        const searchPaths: number[][] = [];
+        
+        const uniqueNodes = tempResult.unique_nodes || tempResult.uniqueNodes;
+        let tempRoot: any = null;
+        
+        if (uniqueNodes) {
+            for (const node of uniqueNodes.values()) {
+                if (node.isRoot) {
+                    tempRoot = node;
+                    break;
+                }
+            }
+        }
+
+        const fallbackInsert = () => {
+            for (const [origIdx, context] of unmatchedInfo) {
+                const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens);
+                requestIds.push(reqId);
+                searchPaths.push(newPath);
+            }
+        };
+
+        if (!tempRoot || this.rootId === null) {
+            fallbackInsert();
+            return [requestIds, searchPaths];
+        }
+
+        const globalRoot = this.nodes.get(this.rootId);
+        if (!globalRoot) {
+            fallbackInsert();
+            return [requestIds, searchPaths];
+        }
+
+        const nodeIdMap = new Map<number, number>();
+        const baseChildIdx = globalRoot.children.length;
+
+        for (let childIdx = 0; childIdx < tempRoot.children.length; childIdx++) {
+            const tempChildId = tempRoot.children[childIdx];
+            const newChildIdx = baseChildIdx + childIdx;
+            this._copySubtree(
+                uniqueNodes,
+                tempChildId,
+                this.rootId,
+                nodeIdMap,
+                initialTokens,
+                [newChildIdx]
+            );
+        }
+
+        for (let i = 0; i < unmatchedInfo.length; i++) {
+            const [origIdx, context] = unmatchedInfo[i];
+            let tempLeafId: number | null = null;
+            
+            for (const [nodeId, node] of uniqueNodes.entries()) {
+                if (node.isLeaf && node.originalIndices && node.originalIndices.has(i)) {
+                    tempLeafId = nodeId;
+                    break;
+                }
+            }
+
+            if (tempLeafId !== null && nodeIdMap.has(tempLeafId)) {
+                const newNodeId = nodeIdMap.get(tempLeafId)!;
+                if (this.metadata.has(newNodeId)) {
+                    const meta = this.metadata.get(newNodeId)!;
+                    requestIds.push(meta.requestId!);
+                    searchPaths.push(meta.searchPath);
+                    continue;
+                }
+            }
+
+            const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens);
+            requestIds.push(reqId);
+            searchPaths.push(newPath);
+        }
+
+        return [requestIds, searchPaths];
+    }
+
+    _copySubtree(sourceNodes: Map<number, any>, sourceNodeId: number, parentId: number, 
+                 nodeIdMap: Map<number, number>, initialTokens: number, searchPath: number[]): void {
+        const sourceNode = sourceNodes.get(sourceNodeId);
+        if (!sourceNode) return;
+
+        const newNodeId = this.nextNodeId++;
+        const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []);
+        const originalIndices = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set<number>();
+        
+        const newNode = new ClusterNode(
+            newNodeId,
+            content,
+            [],
+            parentId,
+            originalIndices
+        );
+        
+        if (sourceNode.docIds) {
+            newNode.docIds = [...sourceNode.docIds];
+        }
+
+        this.nodes.set(newNodeId, newNode);
+        nodeIdMap.set(sourceNodeId, newNodeId);
+
+        const parentNode = this.nodes.get(parentId);
+        if (parentNode) {
+            parentNode.addChild(newNodeId);
+        }
+
+        const isLeaf = sourceNode.isLeaf || sourceNode.is_leaf;
+        const requestId = isLeaf ? `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}` : null;
+
+        const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0;
+        
+        const metadata = new NodeMetadata(
+            newNodeId,
+            isLeaf ? initialTokens : 0,
+            isLeaf ? Math.max(0, initialTokens - parentTokens) : 0,
+            searchPath,
+            sourceNode.docIds ? [...sourceNode.docIds] : null,
+            isLeaf,
+            requestId
+        );
+        
+        this.metadata.set(newNodeId, metadata);
+
+        if (isLeaf && requestId) {
+            this._requestToNode.set(requestId, newNodeId);
+        }
+
+        if (sourceNode.children) {
+            for (let childIdx = 0; childIdx < sourceNode.children.length; childIdx++) {
+                const childId = sourceNode.children[childIdx];
+                const childSearchPath = [...searchPath, childIdx];
+                this._copySubtree(
+                    sourceNodes, childId, newNodeId,
+                    nodeIdMap, initialTokens, childSearchPath
+                );
+            }
+        }
+    }
+
+    _scheduleIncremental(contextInfo: any[]): number[] {
+        const groups = new Map<number, any[]>();
+
+        for (const [ctxIdx, reqId, path] of contextInfo) {
+            const groupKey = path && path.length > 0 ? path[0] : -1;
+            if (!groups.has(groupKey)) {
+                groups.set(groupKey, []);
+            }
+            groups.get(groupKey)!.push({ ctxIdx, len: path ? path.length : 0 });
+        }
+
+        const scheduled: number[] = [];
+        const sortedKeys = Array.from(groups.keys()).sort((a, b) => a - b);
+
+        for (const groupKey of sortedKeys) {
+            const items = groups.get(groupKey)!;
+            items.sort((a, b) => b.len - a.len);
+            scheduled.push(...items.map(item => item.ctxIdx));
+        }
+
+        return scheduled;
+    }
+
+    _groupByPathPrefix(contextInfo: any[]): [number, number[]][] {
+        const groups = new Map<number, number[]>();
+
+        for (const [ctxIdx, reqId, path] of contextInfo) {
+            const groupKey = path && path.length > 0 ? path[0] : -1;
+            if (!groups.has(groupKey)) {
+                groups.set(groupKey, []);
+            }
+            groups.get(groupKey)!.push(ctxIdx);
+        }
+
+        const result: [number, number[]][] = [];
+        for (const [groupKey, indices] of groups.entries()) {
+            result.push([indices.length, indices]);
+        }
+
+        result.sort((a, b) => b[0] - a[0]);
+        return result;
+    }
+
+    scheduleOnly(contexts: number[][]): any {
+        const result = this.fitTransform(contexts);
+        
+        const [scheduledReordered, scheduledOriginals, finalMapping, groups] = 
+            this.interScheduler.scheduleContexts(result);
+            
+        return {
+            reordered_contexts: scheduledReordered,
+            original_indices: finalMapping,
+            scheduled_originals: scheduledOriginals,
+            groups: groups,
+            stats: {
+                total_nodes: result.stats?.total_nodes || result.stats?.totalNodes,
+                leaf_nodes: result.stats?.leaf_nodes || result.stats?.leafNodes,
+                num_contexts: contexts.length,
+                num_groups: groups.length
+            }
+        };
+    }
+
+    _initializeLiveMetadata(initialTokensPerContext: number, numInputContexts?: number): [Record<string, number>, (string | null)[]] {
+        if (!this.initialResult) {
+            throw new Error("Must call fitTransform() before initializing metadata");
+        }
+
+        const uniqueNodes = this.initialResult.unique_nodes || this.initialResult.uniqueNodes;
+        const reorderedContexts = this.initialResult.reordered_contexts || this.initialResult.reorderedContexts;
+        const requestIdMapping: Record<string, number> = {};
+
+        this.nodes = uniqueNodes;
+
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            if (node.isRoot || node.is_root) {
+                this.rootId = nodeId;
+                break;
+            }
+        }
+
+        this.nextNodeId = uniqueNodes.size > 0 ? Math.max(...Array.from(uniqueNodes.keys())) + 1 : 0;
+        let leafCounter = 0;
+        const originalIndexToRequestId = new Map<number, string>();
+
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            const searchPath = this._computeSearchPath(nodeId);
+            const isLeaf = node.isLeaf || node.is_leaf;
+            
+            let totalTokens = 0;
+            let requestId: string | null = null;
+
+            if (isLeaf) {
+                totalTokens = initialTokensPerContext;
+                requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+                leafCounter++;
+
+                if (node.originalIndices || node.original_indices) {
+                    const indices = node.originalIndices || node.original_indices;
+                    for (const origIdx of indices) {
+                        originalIndexToRequestId.set(origIdx, requestId);
+                    }
+                }
+            }
+
+            let parentTokens = 0;
+            if (node.parent !== null && this.metadata.has(node.parent)) {
+                parentTokens = this.metadata.get(node.parent)!.totalTokens;
+            }
+            const extraTokens = Math.max(0, totalTokens - parentTokens);
+
+            let leafDocIds: number[] | null = null;
+            if (isLeaf && (node.originalIndices || node.original_indices)) {
+                const indices = Array.from((node.originalIndices || node.original_indices) as Set<number>);
+                if (indices.length > 0) {
+                    const firstOrigIdx = Math.min(...indices);
+                    if (reorderedContexts && firstOrigIdx < reorderedContexts.length) {
+                        leafDocIds = reorderedContexts[firstOrigIdx];
+                    } else {
+                        leafDocIds = node.docIds || node.doc_ids;
+                    }
+                }
+            } else {
+                leafDocIds = node.docIds || node.doc_ids;
+            }
+
+            const metadata = new NodeMetadata(
+                nodeId,
+                totalTokens,
+                extraTokens,
+                searchPath,
+                leafDocIds,
+                isLeaf,
+                requestId
+            );
+
+            this.metadata.set(nodeId, metadata);
+
+            if (isLeaf && requestId) {
+                this._requestToNode.set(requestId, nodeId);
+                requestIdMapping[requestId] = nodeId;
+            }
+        }
+
+        this.nextNodeId = this.nodes.size > 0 ? Math.max(...Array.from(this.nodes.keys())) + 1 : 0;
+        this._nextRequestCounter = leafCounter;
+
+        const numContexts = numInputContexts !== undefined ? numInputContexts : originalIndexToRequestId.size;
+        const requestIdsOrdered: (string | null)[] = [];
+        
+        for (let i = 0; i < numContexts; i++) {
+            requestIdsOrdered.push(originalIndexToRequestId.get(i) || null);
+        }
+
+        return [requestIdMapping, requestIdsOrdered];
+    }
+
+    trackRequest(requestId: string): void {
+        if (!this._requestToNode.has(requestId)) {
+            this._requestToNode.set(requestId, null);
+        }
+    }
+
+    removeRequests(requestIds: Set<string>): any {
+        const evictedNodes: number[] = [];
+        const notFound: string[] = [];
+
+        for (const requestId of requestIds) {
+            if (!this._requestToNode.has(requestId)) {
+                notFound.push(requestId);
+                continue;
+            }
+
+            const nodeId = this._requestToNode.get(requestId);
+            this._requestToNode.delete(requestId);
+
+            if (nodeId !== null && nodeId !== undefined) {
+                evictedNodes.push(nodeId);
+                this._removeNodeAndPrune(nodeId);
+            }
+        }
+
+        this.liveStats.totalEvictions += evictedNodes.length;
+
+        const arrayReqs = Array.from(requestIds);
+        return {
+            removed_count: evictedNodes.length,
+            evicted_node_ids: evictedNodes,
+            evicted_request_ids: arrayReqs.filter(id => !notFound.includes(id)),
+            not_found: notFound,
+            nodes_remaining: this.nodes.size,
+            requests_remaining: this._requestToNode.size
+        };
+    }
+
+    removeRequestById(requestId: string): boolean {
+        const result = this.removeRequests(new Set([requestId]));
+        return result.evicted_node_ids.length > 0;
+    }
+
+    getRequestNode(requestId: string): number | null {
+        return this._requestToNode.get(requestId) ?? null;
+    }
+
+    _collectAllNodeDocs(): [number[], number[][], Record<number, number[]>] {
+        const nodeIds: number[] = [];
+        const nodeDocsList: number[][] = [];
+        const nodeIdToPath: Record<number, number[]> = {};
+
+        if (this.rootId === null) return [nodeIds, nodeDocsList, nodeIdToPath];
+
+        const queue: [number, number[]][] = [[this.rootId, []]];
+
+        while (queue.length > 0) {
+            const [nodeId, path] = queue.shift()!;
+
+            if (!this.nodes.has(nodeId)) continue;
+
+            const node = this.nodes.get(nodeId)!;
+            const nodeMeta = this.metadata.get(nodeId);
+
+            let docs: number[] | null = null;
+            if (nodeMeta && nodeMeta.docIds) {
+                docs = nodeMeta.docIds;
+            } else if (node.docIds) {
+                docs = node.docIds;
+            }
+
+            if (docs) {
+                nodeIds.push(nodeId);
+                nodeDocsList.push(docs);
+                nodeIdToPath[nodeId] = path;
+            }
+
+            if (!node.isLeaf && node.children) {
+                for (let idx = 0; idx < node.children.length; idx++) {
+                    queue.push([node.children[idx], [...path, idx]]);
+                }
+            }
+        }
+
+        return [nodeIds, nodeDocsList, nodeIdToPath];
+    }
+
+    _getNodeDocs(nodeId: number): number[] | null {
+        const meta = this.metadata.get(nodeId);
+        if (meta && meta.docIds) return meta.docIds;
+        const node = this.nodes.get(nodeId);
+        if (node && node.docIds) return node.docIds;
+        return null;
+    }
+
+    _searchSingleHierarchical(context: number[]): [number[], number, number, boolean] {
+        const contextSet = new Set(context);
+        let currentId = this.rootId;
+        let currentPath: number[] = [];
+
+        while (true) {
+            if (currentId === null) return [[], -1, 0, false];
+            const currentNode = this.nodes.get(currentId);
+            
+            if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) {
+                const docs = this._getNodeDocs(currentId);
+                if (docs && currentId !== this.rootId) {
+                    const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                    const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false;
+                    return [currentPath, currentId, overlap, hasPrefix];
+                }
+                return [[], -1, 0, false];
+            }
+
+            const childIds: number[] = [];
+            const childDocsList: number[][] = [];
+            const childIndices: number[] = [];
+
+            for (let idx = 0; idx < currentNode.children.length; idx++) {
+                const childId = currentNode.children[idx];
+                const docs = this._getNodeDocs(childId);
+                if (docs) {
+                    childIds.push(childId);
+                    childDocsList.push(docs);
+                    childIndices.push(idx);
+                }
+            }
+
+            if (childIds.length === 0) return [[], -1, 0, false];
+
+            const distances = computeDistancesBatch([context], childDocsList, this.alpha);
+            
+            let bestJ = -1;
+            let bestDistance = Infinity;
+            let bestOverlap = 0;
+
+            for (let j = 0; j < childIds.length; j++) {
+                const docs = childDocsList[j];
+                const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                if (overlap === 0) continue;
+                
+                const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j];
+                
+                if (dist < bestDistance) {
+                    bestDistance = dist;
+                    bestOverlap = overlap;
+                    bestJ = j;
+                }
+            }
+
+            if (bestJ < 0) {
+                if (currentId !== this.rootId) {
+                    const docs = this._getNodeDocs(currentId);
+                    if (docs) {
+                        const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                        return [currentPath, currentId, overlap, true];
+                    }
+                }
+                return [[], -1, 0, false];
+            }
+
+            const bestChildId = childIds[bestJ];
+            const bestChildIdx = childIndices[bestJ];
+            const bestDocs = childDocsList[bestJ];
+            const childPath = [...currentPath, bestChildIdx];
+
+            if (contextSet.has(bestDocs[0])) {
+                const bestChildNode = this.nodes.get(bestChildId);
+                if (bestChildNode && !bestChildNode.isLeaf && bestChildNode.children && bestChildNode.children.length > 0) {
+                    currentId = bestChildId;
+                    currentPath = childPath;
+                    continue;
+                } else {
+                    return [childPath, bestChildId, bestOverlap, true];
+                }
+            } else {
+                return [childPath, bestChildId, bestOverlap, false];
+            }
+        }
+    }
+
+    searchBatch(contexts: number[][]): [number[], number, number, boolean][] {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        if (this.rootId === null || contexts.length === 0) {
+            return contexts.map(() => [[], -1, 0, false]);
+        }
+
+        const results = contexts.map(ctx => this._searchSingleHierarchical(ctx));
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        const elapsedUs = (endTime - startTime) * 1000;
+        
+        this.liveStats.totalSearches += contexts.length;
+        this.liveStats.totalSearchTimeUs += elapsedUs;
+
+        return results;
+    }
+
+    search(context: number[], updateAccess: boolean = true): [number[], number, number, boolean] {
+        const results = this.searchBatch([context]);
+        const [searchPath, nodeId, overlap, hasPrefix] = results[0];
+
+        if (updateAccess && nodeId >= 0 && this.metadata.has(nodeId)) {
+            this.metadata.get(nodeId)!.updateAccessTime();
+        }
+
+        return [searchPath, nodeId, overlap, hasPrefix];
+    }
+
+    traverse(searchPath: number[]): ClusterNode | null {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        if (this.rootId === null) return null;
+
+        let currentId = this.rootId;
+
+        for (const childIdx of searchPath) {
+            if (!this.nodes.has(currentId)) return null;
+
+            const currentNode = this.nodes.get(currentId)!;
+
+            if (!currentNode.children || childIdx >= currentNode.children.length) {
+                return null;
+            }
+
+            currentId = currentNode.children[childIdx];
+        }
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        const elapsedUs = (endTime - startTime) * 1000;
+        this.liveStats.totalTraversalTimeUs += elapsedUs;
+
+        return this.nodes.get(currentId) || null;
+    }
+
+    insert(context: number[], searchPath: number[], totalTokens: number = 0): [number, number[], string] {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        let matchedNode = this.traverse(searchPath);
+
+        if (!matchedNode) {
+            matchedNode = this.nodes.get(this.rootId!)!;
+            searchPath = [];
+        }
+
+        let newNodeId: number, newSearchPath: number[], requestId: string;
+
+        if (matchedNode.isLeaf) {
+            [newNodeId, newSearchPath, requestId] = this._insertAtLeaf(
+                context, matchedNode, searchPath, totalTokens
+            );
+        } else {
+            [newNodeId, newSearchPath, requestId] = this._insertAtInternal(
+                context, matchedNode, searchPath, totalTokens
+            );
+        }
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        this.liveStats.totalInsertions += 1;
+
+        return [newNodeId, newSearchPath, requestId];
+    }
+
+    _insertAtInternal(context: number[], parentNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        
+        const newNodeId = this.nextNodeId++;
+        const newNode = new ClusterNode(
+            newNodeId,
+            context,
+            [],
+            parentNode.nodeId,
+            new Set([newNodeId])
+        );
+
+        this.nodes.set(newNodeId, newNode);
+        parentNode.addChild(newNodeId);
+
+        const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
+        const newSearchPath = [...searchPath, parentNode.children.length - 1];
+
+        const metadata = new NodeMetadata(
+            newNodeId,
+            totalTokens,
+            Math.max(0, totalTokens - parentTokens),
+            newSearchPath,
+            context,
+            true,
+            requestId
+        );
+
+        this.metadata.set(newNodeId, metadata);
+        this._requestToNode.set(requestId, newNodeId);
+
+        return [newNodeId, newSearchPath, requestId];
+    }
+
+    _insertAtLeaf(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        
+        let parentNode: ClusterNode;
+        let parentSearchPath: number[];
+
+        if (leafNode.parent === null) {
+            parentNode = this.nodes.get(this.rootId!)!;
+            parentSearchPath = [];
+        } else {
+            parentNode = this.nodes.get(leafNode.parent)!;
+            parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : [];
+        }
+
+        const newLeafId = this.nextNodeId++;
+        const newLeaf = new ClusterNode(
+            newLeafId,
+            context,
+            [],
+            parentNode.nodeId,
+            new Set([newLeafId])
+        );
+
+        this.nodes.set(newLeafId, newLeaf);
+        parentNode.addChild(newLeafId);
+
+        const newSearchPath = [...parentSearchPath, parentNode.children.length - 1];
+        const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
+
+        const newMetadata = new NodeMetadata(
+            newLeafId,
+            totalTokens,
+            Math.max(0, totalTokens - parentTokens),
+            newSearchPath,
+            context,
+            true,
+            requestId
+        );
+
+        this.metadata.set(newLeafId, newMetadata);
+        this._requestToNode.set(requestId, newLeafId);
+
+        return [newLeafId, newSearchPath, requestId];
+    }
+
+    _splitLeafAndInsert(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const matchedDocs = this._getNodeDocs(leafNode.nodeId);
+
+        if (!matchedDocs) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        const sharedPrefix: number[] = [];
+        for (let i = 0; i < Math.min(matchedDocs.length, context.length); i++) {
+            if (matchedDocs[i] === context[i]) {
+                sharedPrefix.push(matchedDocs[i]);
+            } else {
+                break;
+            }
+        }
+
+        if (sharedPrefix.length === 0) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        if (sharedPrefix.length === matchedDocs.length && new Set(matchedDocs).size === new Set(context).size && 
+            [...new Set(matchedDocs)].every(d => new Set(context).has(d))) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        let parentId = leafNode.parent;
+        if (parentId === null) {
+            parentId = this.rootId!;
+        }
+        const parentNode = this.nodes.get(parentId)!;
+        const parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : [];
+
+        const leafChildIdx = parentNode.children.indexOf(leafNode.nodeId);
+
+        const newInternalId = this.nextNodeId++;
+        const allContent = new Set([...leafNode.content, ...context]);
+        
+        const newInternal = new ClusterNode(
+            newInternalId,
+            Array.from(allContent),
+            [leafNode.nodeId],
+            parentId,
+            new Set()
+        );
+        newInternal.docIds = [...sharedPrefix];
+
+        this.nodes.set(newInternalId, newInternal);
+
+        parentNode.children[leafChildIdx] = newInternalId;
+        leafNode.parent = newInternalId;
+
+        const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0;
+        const leafMeta = this.metadata.get(leafNode.nodeId);
+        const leafTotal = leafMeta ? leafMeta.totalTokens : 0;
+
+        let internalTokens = parentTokens;
+        if (matchedDocs && matchedDocs.length > 0) {
+            const prefixRatio = sharedPrefix.length / matchedDocs.length;
+            internalTokens = Math.floor(parentTokens + (leafTotal - parentTokens) * prefixRatio);
+        }
+
+        const internalPath = [...parentSearchPath, leafChildIdx];
+
+        const internalMeta = new NodeMetadata(
+            newInternalId,
+            internalTokens,
+            Math.max(0, internalTokens - parentTokens),
+            internalPath,
+            [...sharedPrefix],
+            false,
+            null
+        );
+        this.metadata.set(newInternalId, internalMeta);
+
+        if (leafMeta) {
+            leafMeta.extraTokens = Math.max(0, leafTotal - internalTokens);
+            leafMeta.searchPath = [...internalPath, 0];
+        }
+
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        const newLeafId = this.nextNodeId++;
+
+        const newLeaf = new ClusterNode(
+            newLeafId,
+            context,
+            [],
+            newInternalId,
+            new Set([newLeafId])
+        );
+        newLeaf.docIds = [...context];
+
+        this.nodes.set(newLeafId, newLeaf);
+        newInternal.addChild(newLeafId);
+
+        const newLeafPath = [...internalPath, 1];
+
+        const newLeafMeta = new NodeMetadata(
+            newLeafId,
+            totalTokens,
+            Math.max(0, totalTokens - internalTokens),
+            newLeafPath,
+            [...context],
+            true,
+            requestId
+        );
+
+        this.metadata.set(newLeafId, newLeafMeta);
+        this._requestToNode.set(requestId, newLeafId);
+
+        return [newLeafId, newLeafPath, requestId];
+    }
+
+    updateNode(searchPath: number[], tokenDelta: number): boolean {
+        const node = this.traverse(searchPath);
+        
+        if (!node || !this.metadata.has(node.nodeId)) {
+            return false;
+        }
+
+        const metadata = this.metadata.get(node.nodeId)!;
+
+        if (tokenDelta > 0) {
+            metadata.addTokens(tokenDelta);
+        } else {
+            metadata.removeTokens(Math.abs(tokenDelta));
+        }
+
+        return true;
+    }
+
+    _removeNode(nodeId: number): void {
+        this._removeNodeAndPrune(nodeId);
+    }
+
+    _removeNodeAndPrune(nodeId: number): number {
+        if (!this.nodes.has(nodeId)) {
+            return 0;
+        }
+
+        let nodesPruned = 0;
+        const node = this.nodes.get(nodeId)!;
+        const parentId = node.parent;
+
+        if (parentId !== null && this.nodes.has(parentId)) {
+            const parent = this.nodes.get(parentId)!;
+            const idx = parent.children.indexOf(nodeId);
+            if (idx > -1) {
+                parent.children.splice(idx, 1);
+            }
+
+            if (parent.children.length === 0 && !parent.isRoot) {
+                nodesPruned += 1;
+                nodesPruned += this._removeNodeAndPrune(parentId);
+            }
+        }
+
+        this.nodes.delete(nodeId);
+
+        if (this.metadata.has(nodeId)) {
+            this.metadata.delete(nodeId);
+        }
+
+        return nodesPruned;
+    }
+
+    _computeSearchPath(nodeId: number): number[] {
+        if (nodeId === this.rootId) return [];
+
+        const path: number[] = [];
+        let currentId: number | null = nodeId;
+        const visited = new Set<number>();
+
+        while (currentId !== this.rootId && currentId !== null) {
+            if (visited.has(currentId)) break;
+            visited.add(currentId);
+
+            const node = this.nodes.get(currentId);
+            if (!node || node.parent === null) break;
+
+            const parent = this.nodes.get(node.parent);
+            if (!parent) break;
+
+            const childIdx = parent.children.indexOf(currentId);
+            if (childIdx === -1) break;
+
+            path.push(childIdx);
+            currentId = node.parent;
+        }
+
+        return path.reverse();
+    }
+
+    _findCommonPrefix(list1: number[], list2: number[]): number[] {
+        const prefix: number[] = [];
+        const minLen = Math.min(list1.length, list2.length);
+        for (let i = 0; i < minLen; i++) {
+            if (list1[i] === list2[i]) {
+                prefix.push(list1[i]);
+            } else {
+                break;
+            }
+        }
+        return prefix;
+    }
+
+    getStats(): any {
+        const avgSearchTime = this.liveStats.totalSearches > 0 
+            ? this.liveStats.totalSearchTimeUs / this.liveStats.totalSearches 
+            : 0;
+
+        let totalTokens = 0;
+        for (const meta of this.metadata.values()) {
+            totalTokens += meta.extraTokens;
+        }
+
+        return {
+            num_nodes: this.nodes.size,
+            active_nodes: this.metadata.size,
+            total_tokens: totalTokens,
+            num_requests: this._requestToNode.size,
+            total_searches: this.liveStats.totalSearches,
+            total_insertions: this.liveStats.totalInsertions,
+            total_removals: this.liveStats.totalRemovals,
+            avg_search_time_us: avgSearchTime
+        };
+    }
+}
diff --git a/openclaw-plugin/src/engine/metadata.ts b/openclaw-plugin/src/engine/metadata.ts
new file mode 100644
index 0000000..d2bf562
--- /dev/null
+++ b/openclaw-plugin/src/engine/metadata.ts
@@ -0,0 +1,82 @@
+export interface NodeMetadataInit {
+    totalTokens?: number;
+    extraTokens?: number;
+    lastAccessTime?: number;
+    searchPath?: number[];
+    isActive?: boolean;
+    isLeaf?: boolean;
+    docIds?: number[] | null;
+    requestId?: string | null;
+}
+
+export class NodeMetadata {
+    nodeId: number;
+    totalTokens: number;
+    extraTokens: number;
+    lastAccessTime: number;
+    searchPath: number[];
+    isActive: boolean;
+    isLeaf: boolean;
+    docIds: number[] | null;
+    requestId: string | null;
+
+    constructor(nodeId: number, init: NodeMetadataInit = {}) {
+        this.nodeId = nodeId;
+        this.totalTokens = init.totalTokens ?? 0;
+        this.extraTokens = init.extraTokens ?? 0;
+        this.lastAccessTime = init.lastAccessTime ?? Date.now() / 1000;
+        this.searchPath = init.searchPath ?? [];
+        this.isActive = init.isActive ?? true;
+        this.isLeaf = init.isLeaf ?? false;
+        this.docIds = init.docIds ?? null;
+        this.requestId = init.requestId ?? null;
+    }
+
+    updateAccessTime(): void {
+        this.lastAccessTime = Date.now() / 1000;
+    }
+
+    addTokens(delta: number): void {
+        this.totalTokens += delta;
+        this.extraTokens += delta;
+        this.updateAccessTime();
+    }
+
+    removeTokens(delta: number): number {
+        if (delta <= 0) {
+            return 0;
+        }
+
+        let tokensRemoved = Math.min(delta, this.extraTokens);
+        this.extraTokens -= tokensRemoved;
+        this.totalTokens -= tokensRemoved;
+
+        const remaining = delta - tokensRemoved;
+        if (remaining > 0) {
+            const actualRemoved = Math.min(remaining, this.totalTokens);
+            this.totalTokens -= actualRemoved;
+            tokensRemoved += actualRemoved;
+        }
+
+        return tokensRemoved;
+    }
+
+    isEmpty(): boolean {
+        return this.totalTokens <= 0;
+    }
+
+    lessThan(other: NodeMetadata): boolean {
+        return this.lastAccessTime < other.lastAccessTime;
+    }
+
+    toString(): string {
+        const req = this.requestId ? `, request_id=${this.requestId}` : "";
+        return (
+            `NodeMetadata(id=${this.nodeId}, ` +
+            `total_tokens=${this.totalTokens}, ` +
+            `extra_tokens=${this.extraTokens}, ` +
+            `is_leaf=${this.isLeaf}${req}, ` +
+            `active=${this.isActive})`
+        );
+    }
+}
diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts
new file mode 100644
index 0000000..3f9b380
--- /dev/null
+++ b/openclaw-plugin/src/engine/tree-nodes.ts
@@ -0,0 +1,334 @@
+export class ClusterNode {
+    nodeId: number;
+    content: Set<number>;
+    originalIndices: Set<number>;
+    distance: number;
+    children: number[];
+    parent: number | null;
+    frequency: number;
+    mergeDistance: number;
+    searchPath: number[];
+
+    constructor(
+        nodeId: number,
+        content: Set<number>,
+        originalIndices: Set<number> = new Set([nodeId]),
+        distance: number = 0.0,
+        children: number[] = [],
+        parent: number | null = null,
+        frequency: number = 1
+    ) {
+        this.nodeId = nodeId;
+        this.content = content instanceof Set ? new Set(content) : new Set(content);
+        this.originalIndices = originalIndices;
+        this.distance = distance;
+        this.children = children;
+        this.parent = parent;
+        this.frequency = frequency;
+        this.mergeDistance = distance;
+        this.searchPath = [];
+    }
+
+    get isLeaf(): boolean {
+        return this.children.length === 0;
+    }
+
+    get isRoot(): boolean {
+        return this.parent === null;
+    }
+
+    get isEmpty(): boolean {
+        return this.content.size === 0;
+    }
+
+    get docIds(): number[] {
+        return Array.from(this.content).sort((a, b) => a - b);
+    }
+
+    set docIds(value: number[]) {
+        this.content = new Set(value);
+    }
+
+    addChild(childId: number): void {
+        if (!this.children.includes(childId) && childId !== this.nodeId) {
+            this.children.push(childId);
+        }
+    }
+
+    removeChild(childId: number): void {
+        const idx = this.children.indexOf(childId);
+        if (idx !== -1) {
+            this.children.splice(idx, 1);
+        }
+    }
+
+    updateFrequency(additionalFrequency: number): void {
+        this.frequency += additionalFrequency;
+    }
+
+    mergeWith(otherNode: ClusterNode): void {
+        this.content = new Set(Array.from(this.content).filter((v) => otherNode.content.has(v)));
+        this.originalIndices = new Set([...this.originalIndices, ...otherNode.originalIndices]);
+        this.frequency += otherNode.frequency;
+    }
+
+    getDepth(): number {
+        return this.searchPath.length;
+    }
+}
+
+export interface NodeStats {
+    totalNodes: number;
+    leafNodes: number;
+    rootNodes: number;
+    internalNodes: number;
+}
+
+export class NodeManager {
+    clusterNodes: Map<number, ClusterNode>;
+    uniqueNodes: Map<number, ClusterNode>;
+    redirects: Map<number, number>;
+    contentToNodeId: Map<string, number>;
+
+    constructor() {
+        this.clusterNodes = new Map<number, ClusterNode>();
+        this.uniqueNodes = new Map<number, ClusterNode>();
+        this.redirects = new Map<number, number>();
+        this.contentToNodeId = new Map<string, number>();
+    }
+
+    private contentKey(content: Set<number>): string {
+        return Array.from(content).sort((a, b) => a - b).join(',');
+    }
+
+    createLeafNode(nodeId: number, promptContent: Iterable<number>): ClusterNode {
+        const contentSet = promptContent instanceof Set ? new Set(promptContent) : new Set(promptContent);
+        const key = this.contentKey(contentSet);
+
+        const canonicalId = this.contentToNodeId.get(key);
+        if (canonicalId !== undefined) {
+            const canonicalNode = this.uniqueNodes.get(canonicalId);
+            if (!canonicalNode) {
+                throw new Error(`Missing canonical leaf node for id ${canonicalId}`);
+            }
+
+            canonicalNode.updateFrequency(1);
+            canonicalNode.originalIndices.add(nodeId);
+
+            this.redirects.set(nodeId, canonicalId);
+            this.clusterNodes.set(nodeId, canonicalNode);
+            return canonicalNode;
+        }
+
+        const node = new ClusterNode(nodeId, contentSet);
+        this.clusterNodes.set(nodeId, node);
+        this.uniqueNodes.set(nodeId, node);
+        this.contentToNodeId.set(key, nodeId);
+        return node;
+    }
+
+    createInternalNode(
+        nodeId: number,
+        child1Id: number,
+        child2Id: number,
+        distance: number
+    ): ClusterNode {
+        const canonicalChild1Id = this.redirects.get(child1Id) ?? child1Id;
+        const canonicalChild2Id = this.redirects.get(child2Id) ?? child2Id;
+
+        if (canonicalChild1Id === canonicalChild2Id) {
+            this.redirects.set(nodeId, canonicalChild1Id);
+            const canonicalNode = this.uniqueNodes.get(canonicalChild1Id);
+            if (!canonicalNode) {
+                throw new Error(`Missing canonical child node for id ${canonicalChild1Id}`);
+            }
+            this.clusterNodes.set(nodeId, canonicalNode);
+            return canonicalNode;
+        }
+
+        const child1 = this.uniqueNodes.get(canonicalChild1Id);
+        const child2 = this.uniqueNodes.get(canonicalChild2Id);
+        if (!child1 || !child2) {
+            throw new Error(
+                `Missing child nodes for internal node ${nodeId}: ${canonicalChild1Id}, ${canonicalChild2Id}`
+            );
+        }
+
+        const intersectionContent = new Set(
+            Array.from(child1.content).filter((v) => child2.content.has(v))
+        );
+        const key = this.contentKey(intersectionContent);
+
+        const existingId = this.contentToNodeId.get(key);
+        if (existingId !== undefined && intersectionContent.size > 0) {
+            if (existingId !== canonicalChild1Id && existingId !== canonicalChild2Id) {
+                const existingNode = this.uniqueNodes.get(existingId);
+                if (!existingNode) {
+                    throw new Error(`Missing existing node for id ${existingId}`);
+                }
+
+                existingNode.addChild(canonicalChild1Id);
+                existingNode.addChild(canonicalChild2Id);
+                existingNode.frequency = Math.max(
+                    existingNode.frequency,
+                    child1.frequency + child2.frequency
+                );
+                existingNode.originalIndices = new Set([
+                    ...existingNode.originalIndices,
+                    ...child1.originalIndices,
+                    ...child2.originalIndices
+                ]);
+
+                child1.parent = existingId;
+                child2.parent = existingId;
+
+                this.redirects.set(nodeId, existingId);
+                this.clusterNodes.set(nodeId, existingNode);
+                return existingNode;
+            }
+        }
+
+        const combinedIndices = new Set([...child1.originalIndices, ...child2.originalIndices]);
+        const node = new ClusterNode(
+            nodeId,
+            intersectionContent,
+            combinedIndices,
+            distance,
+            [canonicalChild1Id, canonicalChild2Id],
+            null,
+            child1.frequency + child2.frequency
+        );
+
+        this.clusterNodes.set(nodeId, node);
+        this.uniqueNodes.set(nodeId, node);
+
+        if (intersectionContent.size > 0) {
+            this.contentToNodeId.set(key, nodeId);
+        }
+
+        child1.parent = nodeId;
+        child2.parent = nodeId;
+
+        return node;
+    }
+
+    cleanupEmptyNodes(): void {
+        const emptyNodeIds = Array.from(this.uniqueNodes.entries())
+            .filter(([_, node]) => node.isEmpty)
+            .map(([nodeId]) => nodeId);
+
+        if (emptyNodeIds.length === 0) {
+            return;
+        }
+
+        const sortedEmptyIds = emptyNodeIds.sort((a, b) => b - a);
+
+        for (const emptyId of sortedEmptyIds) {
+            const emptyNode = this.uniqueNodes.get(emptyId);
+            if (!emptyNode) {
+                continue;
+            }
+
+            const parentId = emptyNode.parent;
+            const childrenIds = [...emptyNode.children];
+
+            if (parentId !== null) {
+                const parentNode = this.uniqueNodes.get(parentId);
+                if (parentNode) {
+                    parentNode.removeChild(emptyId);
+                    for (const childId of childrenIds) {
+                        if (this.uniqueNodes.has(childId)) {
+                            parentNode.addChild(childId);
+                        }
+                    }
+                }
+            }
+
+            for (const childId of childrenIds) {
+                const childNode = this.uniqueNodes.get(childId);
+                if (childNode) {
+                    childNode.parent = parentId;
+                }
+            }
+
+            this.uniqueNodes.delete(emptyId);
+        }
+
+        for (const node of this.uniqueNodes.values()) {
+            if (node.parent !== null && !this.uniqueNodes.has(node.parent)) {
+                node.parent = null;
+            }
+        }
+    }
+
+    getNodeStats(): NodeStats {
+        const totalNodes = this.uniqueNodes.size;
+        let leafNodes = 0;
+        let rootNodes = 0;
+
+        for (const node of this.uniqueNodes.values()) {
+            if (node.isLeaf) {
+                leafNodes += 1;
+            }
+            if (node.isRoot) {
+                rootNodes += 1;
+            }
+        }
+
+        return {
+            totalNodes,
+            leafNodes,
+            rootNodes,
+            internalNodes: totalNodes - leafNodes
+        };
+    }
+
+    updateSearchPaths(): void {
+        const rootNodes = Array.from(this.uniqueNodes.values()).filter((node) => node.isRoot);
+
+        if (rootNodes.length === 0) {
+            return;
+        }
+
+        if (rootNodes.length === 1) {
+            const root = rootNodes[0];
+            root.searchPath = [];
+            this._updatePathsFromNode(root);
+            return;
+        }
+
+        const currentMaxId = Math.max(...Array.from(this.uniqueNodes.keys()));
+        const virtualRootId = currentMaxId + 1;
+        const virtualRoot = new ClusterNode(
+            virtualRootId,
+            new Set<number>(),
+            new Set<number>(),
+            0.0,
+            rootNodes.map((node) => node.nodeId),
+            null,
+            rootNodes.reduce((sum, node) => sum + node.frequency, 0)
+        );
+        virtualRoot.searchPath = [];
+
+        this.uniqueNodes.set(virtualRootId, virtualRoot);
+
+        for (const node of rootNodes) {
+            node.parent = virtualRootId;
+        }
+
+        this._updatePathsFromNode(virtualRoot);
+    }
+
+    _updatePathsFromNode(node: ClusterNode): void {
+        for (let childIndex = 0; childIndex < node.children.length; childIndex += 1) {
+            const childId = node.children[childIndex];
+            const childNode = this.uniqueNodes.get(childId);
+            if (!childNode) {
+                continue;
+            }
+
+            childNode.searchPath = [...node.searchPath, childIndex];
+            this._updatePathsFromNode(childNode);
+        }
+    }
+}

From 86ab671a27ed605a8233faa44652ff2756894b86 Mon Sep 17 00:00:00 2001
From: SecretSettler <sean.jiang01@outlook.com>
Date: Thu, 26 Mar 2026 13:45:58 +0000
Subject: [PATCH 5/8] feat: wire full ContextPilot engine + SGLang mode into
 plugin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

index.ts now supports three backend modes:
- anthropic: in-process ContextPilot engine (clustering + reorder + dedup + cache_control)
- openai: in-process engine (same pipeline, OpenAI cache is automatic)
- sglang: remote ContextPilotIndexClient → index server for cache-aware reorder,
  in-process dedup, no cache_control injection (RadixAttention handles caching)

Config additions:
- backendProvider now accepts 'sglang'
- indexServerUrl: URL for ContextPilot index server (default: http://localhost:8765)

contextpilot_status tool shows engine stats (cloud) or server health (sglang)
---
 openclaw-plugin/openclaw.plugin.json |   7 +-
 openclaw-plugin/src/index.ts         | 210 ++++++++++++++++++++++-----
 2 files changed, 176 insertions(+), 41 deletions(-)

diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json
index c1b5f9a..94ec378 100644
--- a/openclaw-plugin/openclaw.plugin.json
+++ b/openclaw-plugin/openclaw.plugin.json
@@ -27,10 +27,15 @@
     "properties": {
       "backendProvider": {
         "type": "string",
-        "enum": ["anthropic", "openai"],
+        "enum": ["anthropic", "openai", "sglang"],
         "description": "Backend LLM provider type",
         "default": "anthropic"
       },
+      "indexServerUrl": {
+        "type": "string",
+        "description": "ContextPilot index server URL (used in SGLang mode)",
+        "default": "http://localhost:8765"
+      },
       "scope": {
         "type": "string",
         "enum": ["all", "system", "tool_results"],
diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts
index 58fd015..fbd1384 100644
--- a/openclaw-plugin/src/index.ts
+++ b/openclaw-plugin/src/index.ts
@@ -9,9 +9,80 @@ import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-aut
 import { injectCacheControl } from "./engine/cache-control.js";
 import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js";
 import { getFormatHandler, type InterceptConfig } from "./engine/extract.js";
-import { ReorderState } from "./engine/reorder.js";
+import { ContextPilotIndexClient } from "./engine/http-client.js";
+import { ContextPilot } from "./engine/live-index.js";
 
 const PROVIDER_ID = "contextpilot";
+type BackendProvider = "anthropic" | "openai" | "sglang";
+
+function parseBackendProvider(value: unknown): BackendProvider {
+  if (value === "openai" || value === "sglang") {
+    return value;
+  }
+  return "anthropic";
+}
+
+function parseScope(value: unknown): "all" | "system" | "tool_results" {
+  if (value === "system" || value === "tool_results" || value === "all") {
+    return value;
+  }
+  return "all";
+}
+
+function detectApiFormat(
+  body: Record<string, unknown>,
+  backendProvider: BackendProvider,
+): "openai_chat" | "anthropic_messages" {
+  if (backendProvider === "anthropic") {
+    return "anthropic_messages";
+  }
+  if (backendProvider === "openai") {
+    return "openai_chat";
+  }
+  return "system" in body ? "anthropic_messages" : "openai_chat";
+}
+
+function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] {
+  const [reordered] = engine.reorder(docs);
+  if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) {
+    return docs;
+  }
+  const candidate = reordered[0];
+  if (!candidate.every((entry) => typeof entry === "string")) {
+    return docs;
+  }
+  return candidate as string[];
+}
+
+async function reorderWithClient(
+  client: ContextPilotIndexClient,
+  docs: string[],
+): Promise<string[]> {
+  const encodedDocs = docs.map((doc) => Array.from(doc, (ch) => ch.charCodeAt(0)));
+  const result = await client.reorder(encodedDocs, 0.001, false, "average");
+
+  if (result === null) {
+    return docs;
+  }
+
+  const [, originalIndices] = result;
+  if (!Array.isArray(originalIndices) || originalIndices.length !== docs.length) {
+    return docs;
+  }
+
+  const reordered = originalIndices.map((index) => {
+    if (typeof index !== "number" || index < 0 || index >= docs.length) {
+      return null;
+    }
+    return docs[index];
+  });
+
+  return reordered.includes(null) ? docs : (reordered as string[]);
+}
+
+function formatJson(value: unknown): string {
+  return value === null || value === undefined ? "unavailable" : JSON.stringify(value);
+}
 
 export default definePluginEntry({
   id: "contextpilot",
@@ -19,13 +90,15 @@ export default definePluginEntry({
   description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.",
   register: (api) => {
     const config = {
-      backendProvider: api.pluginConfig?.backendProvider === "openai" ? "openai" : "anthropic",
-      scope: ["system", "tool_results", "all"].includes(String(api.pluginConfig?.scope))
-        ? String(api.pluginConfig?.scope)
-        : "all",
+      backendProvider: parseBackendProvider(api.pluginConfig?.backendProvider),
+      scope: parseScope(api.pluginConfig?.scope),
+      indexServerUrl: String(api.pluginConfig?.indexServerUrl || "http://localhost:8765"),
     };
 
-    const reorderState = new ReorderState();
+    const isSglang = config.backendProvider === "sglang";
+    const engine = isSglang ? null : new ContextPilot(0.001, false, "average");
+    const client = isSglang ? new ContextPilotIndexClient(config.indexServerUrl) : null;
+
     let requestCount = 0;
     let totalCharsSaved = 0;
 
@@ -33,24 +106,43 @@ export default definePluginEntry({
       id: PROVIDER_ID,
       label: "ContextPilot",
       docsPath: "/providers/contextpilot",
-      envVars: [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"],
-      auth: [
-        createProviderApiKeyAuthMethod({
-          providerId: PROVIDER_ID,
-          methodId: "api-key",
-          label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key",
-          hint: "API key for the backend LLM provider",
-          optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey",
-          flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key",
-          envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY",
-          promptMessage: "Enter your API key",
-          defaultModel:
-            config.backendProvider === "anthropic"
-              ? "contextpilot/claude-sonnet-4-6"
-              : "contextpilot/gpt-4o",
-        }),
-      ],
+      envVars: isSglang
+        ? []
+        : [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"],
+      auth: isSglang
+        ? []
+        : [
+          createProviderApiKeyAuthMethod({
+            providerId: PROVIDER_ID,
+            methodId: "api-key",
+            label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key",
+            hint: "API key for the backend LLM provider",
+            optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey",
+            flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key",
+            envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY",
+            promptMessage: "Enter your API key",
+            defaultModel:
+              config.backendProvider === "anthropic"
+                ? "contextpilot/claude-sonnet-4-6"
+                : "contextpilot/gpt-4o",
+          }),
+        ],
       resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => {
+        if (config.backendProvider === "sglang") {
+          return {
+            id: ctx.modelId,
+            name: ctx.modelId,
+            provider: PROVIDER_ID,
+            baseUrl: config.indexServerUrl,
+            api: "openai-completions",
+            reasoning: false,
+            input: ["text", "image"] as Array<"text" | "image">,
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 200000,
+            maxTokens: 16384,
+          };
+        }
+
         const isAnthropic = config.backendProvider === "anthropic";
         return {
           id: ctx.modelId,
@@ -76,9 +168,7 @@ export default definePluginEntry({
           }
 
           const body = structuredClone(request.body) as Record<string, unknown>;
-          const apiFormat = config.backendProvider === "anthropic"
-            ? "anthropic_messages"
-            : "openai_chat";
+          const apiFormat = detectApiFormat(body, config.backendProvider);
 
           const interceptConfig: InterceptConfig = {
             enabled: true,
@@ -93,17 +183,30 @@ export default definePluginEntry({
           const handler = getFormatHandler(apiFormat);
           const multi = handler.extractAll(body, interceptConfig);
 
+          const reorderDocs = async (docs: string[]): Promise<string[]> => {
+            if (docs.length < 2) {
+              return docs;
+            }
+            if (client) {
+              return reorderWithClient(client, docs);
+            }
+            if (engine) {
+              return reorderWithEngine(engine, docs);
+            }
+            return docs;
+          };
+
           if (multi.systemExtraction) {
             const [extraction, sysIdx] = multi.systemExtraction;
             if (extraction.documents.length >= 2) {
-              const [reordered] = reorderState.reorder(extraction.documents);
+              const reordered = await reorderDocs(extraction.documents);
               handler.reconstructSystem(body, extraction, reordered, sysIdx);
             }
           }
 
           for (const [extraction, location] of multi.toolExtractions) {
             if (extraction.documents.length >= 2) {
-              const [reordered] = reorderState.reorder(extraction.documents);
+              const reordered = await reorderDocs(extraction.documents);
               handler.reconstructToolResult(body, extraction, reordered, location);
             }
           }
@@ -117,10 +220,9 @@ export default definePluginEntry({
             totalCharsSaved += dedupResult.charsSaved;
           }
 
-          const optimizedBody = injectCacheControl(
-            body,
-            config.backendProvider === "anthropic" ? "anthropic" : "openai",
-          );
+          const optimizedBody = isSglang
+            ? body
+            : injectCacheControl(body, config.backendProvider === "anthropic" ? "anthropic" : "openai");
 
           requestCount++;
 
@@ -131,6 +233,12 @@ export default definePluginEntry({
         };
       },
       augmentModelCatalog: () => {
+        if (config.backendProvider === "sglang") {
+          return [
+            { id: "default", name: "SGLang Default (ContextPilot)", provider: PROVIDER_ID },
+          ];
+        }
+
         const isAnthropic = config.backendProvider === "anthropic";
         if (isAnthropic) {
           return [
@@ -154,18 +262,40 @@ export default definePluginEntry({
       description: "Report ContextPilot engine state",
       parameters: Type.Object({}),
       async execute(_toolCallId: string, _params: unknown) {
+        const lines = [
+          "ContextPilot Engine Status:",
+          `  Backend: ${config.backendProvider}`,
+          `  Scope: ${config.scope}`,
+          `  Requests optimized: ${requestCount}`,
+          `  Total chars saved: ${totalCharsSaved.toLocaleString()}`,
+        ];
+
+        if (engine) {
+          const stats = engine.getStats();
+          lines.push("  Mode: cloud-api (in-process ContextPilot engine)");
+          lines.push(`  Live index: ${engine.isLive ? "active" : "warming"}`);
+          lines.push(`  Nodes: ${Number(stats.num_nodes ?? 0)}`);
+          lines.push(`  Active nodes: ${Number(stats.active_nodes ?? 0)}`);
+          lines.push(`  Requests tracked: ${Number(stats.num_requests ?? 0)}`);
+          lines.push(`  Total searches: ${Number(stats.total_searches ?? 0)}`);
+          lines.push(`  Total insertions: ${Number(stats.total_insertions ?? 0)}`);
+          lines.push(`  Total removals: ${Number(stats.total_removals ?? 0)}`);
+          lines.push(`  Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`);
+        }
+
+        if (client) {
+          const [health, remoteStats] = await Promise.all([client.health(), client.getStats()]);
+          lines.push("  Mode: sglang (remote ContextPilot index)");
+          lines.push(`  Index server URL: ${config.indexServerUrl}`);
+          lines.push(`  Index server health: ${formatJson(health)}`);
+          lines.push(`  Index server stats: ${formatJson(remoteStats)}`);
+        }
+
         return {
           content: [
             {
               type: "text" as const,
-              text: [
-                "ContextPilot Engine Status:",
-                "  Mode: in-process (native TypeScript)",
-                `  Requests optimized: ${requestCount}`,
-                `  Total chars saved: ${totalCharsSaved.toLocaleString()}`,
-                `  Backend: ${config.backendProvider}`,
-                `  Scope: ${config.scope}`,
-              ].join("\n"),
+              text: lines.join("\n"),
             },
           ],
         };

From f60b8fb8e54a5ec37d93f89702f6249ed13c01ef Mon Sep 17 00:00:00 2001
From: dalongbao <ryantsui786@gmail.com>
Date: Wed, 1 Apr 2026 23:56:31 +0100
Subject: [PATCH 6/8] fix: working plugin

---
 .gitignore                                  |   1 +
 contextpilot/server/http_server.py          | 314 +++++++++--------
 openclaw-plugin/README.md                   | 128 ++++---
 openclaw-plugin/benchmark.sh                | 183 ++++++++++
 openclaw-plugin/openclaw.plugin.json        |  33 +-
 openclaw-plugin/package-lock.json           |  22 ++
 openclaw-plugin/package.json                |  11 +-
 openclaw-plugin/src/engine/cache-control.ts |  27 ++
 openclaw-plugin/src/engine/dedup.ts         |  28 +-
 openclaw-plugin/src/engine/tree-nodes.ts    |   6 +-
 openclaw-plugin/src/index.ts                | 357 ++++++++------------
 openclaw-plugin/test-e2e.ts                 | 188 +++++++++++
 12 files changed, 835 insertions(+), 463 deletions(-)
 create mode 100755 openclaw-plugin/benchmark.sh
 create mode 100644 openclaw-plugin/package-lock.json
 create mode 100644 openclaw-plugin/test-e2e.ts

diff --git a/.gitignore b/.gitignore
index 55ae470..e4d3908 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ dist/
 */.DS_Store
 *.DS_Store
 
+node_modules/
diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py
index c71af9a..82ecadd 100644
--- a/contextpilot/server/http_server.py
+++ b/contextpilot/server/http_server.py
@@ -25,6 +25,7 @@
 import os
 import re
 import uuid
+from dataclasses import dataclass, field as dc_field
 from typing import List, Dict, Any, Optional, cast
 from contextlib import asynccontextmanager
 
@@ -105,13 +106,9 @@
 # skip-old / dedup-new / reorder-new behaviour.  Single-conversation
 # model (one user at a time).  Resets when the system prompt changes.
 
-from dataclasses import dataclass, field as dc_field
-
-
 @dataclass
 class _InterceptConvState:
-    """Global intercept state for the current conversation."""
-
+    """Per-session intercept state for a single conversation."""
     # Cached copy of the full messages array after modification (reorder/dedup).
     # On subsequent turns, old messages are replaced with these cached versions
     # so the inference engine's prefix cache sees identical tokens.
@@ -132,7 +129,10 @@ class _InterceptConvState:
     last_message_count: int = 0
 
 
-_intercept_state = _InterceptConvState()
+# Per-session state dict keyed by session fingerprint (hash of first user msg).
+# This allows concurrent multi-user sessions to each maintain their own state.
+_intercept_states: dict[str, _InterceptConvState] = {}
+_MAX_TRACKED_SESSIONS = 64  # LRU eviction threshold
 
 # TTFT tracking for averages across a session
 _ttft_history: List[float] = []
@@ -876,19 +876,13 @@ async def reset_index():
 
     After reset, you must call /reorder again before other operations.
     """
-    global \
-        _index, \
-        _str_to_id, \
-        _id_to_str, \
-        _next_str_id, \
-        _intercept_index, \
-        _intercept_state
+    global _index, _str_to_id, _id_to_str, _next_str_id, _intercept_index, _intercept_states
 
     # Reset conversation tracker
     reset_conversation_tracker()
 
-    # Reset intercept conversation state
-    _intercept_state = _InterceptConvState()
+    # Reset all per-session intercept states
+    _intercept_states.clear()
     _intercept_index = None
 
     # Reset string-to-ID mapping
@@ -1186,31 +1180,72 @@ def _hash_text(text: str) -> str:
     return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()[:16]
 
 
+def _session_fingerprint(body: Dict[str, Any]) -> str:
+    """Derive a session fingerprint from the first user message.
+
+    In a multi-turn conversation, messages grow but the first user message
+    stays constant.  Hashing it gives a stable per-session key that lets
+    concurrent users each maintain their own intercept state.
+    """
+    msgs = body.get("messages") or []
+    # Find the first user message (usually msg[0] or msg[1] after system)
+    for msg in msgs[:3]:
+        if isinstance(msg, dict) and msg.get("role") == "user":
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                # OpenAI format: [{type: text, text: "..."}]
+                parts = [p.get("text", "") for p in content
+                         if isinstance(p, dict)]
+                content = "".join(parts)
+            return _hash_text(str(content))
+    # Fallback: hash all messages (shouldn't happen in practice)
+    return _hash_text(json.dumps(msgs[:2], sort_keys=True))
+
+
 def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
-    """Return the global intercept state, resetting if the conversation changed.
+    """Return per-session intercept state, creating or resetting as needed.
+
+    Uses the first user message as a session fingerprint so concurrent
+    multi-user sessions each get their own state.
 
     Detection: in a multi-turn agent conversation the messages array only
     grows.  If the count drops, either a new session started or the host
-    compacted old messages.  Either way, reset all state: the old KV cache
+    compacted old messages.  Either way, reset state: the old KV cache
     entries are gone (compaction rewrites content), so cached_messages,
     seen_doc_hashes, and reorder state are all invalid.
     """
-    global _intercept_state
+    global _intercept_states
+    session_key = _session_fingerprint(body)
     msg_count = len(body.get("messages") or [])
-    if msg_count < _intercept_state.last_message_count:
+
+    state = _intercept_states.get(session_key)
+
+    if state is None:
+        # New session
+        state = _InterceptConvState()
+        state.system_processed = True
         logger.info(
-            f"Intercept: message count dropped "
-            f"({msg_count} < {_intercept_state.last_message_count}), "
-            f"resetting all state (compaction or new session)"
+            f"Intercept: new session {session_key[:8]}… "
+            f"({msg_count} msgs, {len(_intercept_states)} active sessions)"
         )
-        _intercept_state = _InterceptConvState()
-        # Skip reorder for the first post-compaction tool result:
-        # prefix cache is fully invalidated, nothing to align with.
-        # Go straight to dedup mode so docs are registered for future turns.
-        _intercept_state.first_tool_result_done = True
-        _intercept_state.system_processed = True
-    _intercept_state.last_message_count = msg_count
-    return _intercept_state
+        # Evict oldest sessions if over limit
+        if len(_intercept_states) >= _MAX_TRACKED_SESSIONS:
+            oldest_key = next(iter(_intercept_states))
+            del _intercept_states[oldest_key]
+            logger.info(f"Intercept: evicted session {oldest_key[:8]}…")
+        _intercept_states[session_key] = state
+    elif msg_count < state.last_message_count:
+        logger.info(
+            f"Intercept: session {session_key[:8]}… message count dropped "
+            f"({msg_count} < {state.last_message_count}), "
+            f"resetting state (compaction or restart)"
+        )
+        state = _InterceptConvState()
+        state.system_processed = True
+        _intercept_states[session_key] = state
+
+    state.last_message_count = msg_count
+    return state
 
 
 def _deduplicate_docs(docs: List[str], state: _InterceptConvState) -> tuple:
@@ -1261,6 +1296,16 @@ def _strip_external_content_ids(body: Any) -> Any:
 _OPENAI_CHAT = "openai_chat"
 _ANTHROPIC_MESSAGES = "anthropic_messages"
 
+# Hop-by-hop headers that must not be forwarded by proxies.
+_HOP_BY_HOP = frozenset((
+    "host", "connection", "keep-alive", "transfer-encoding",
+    "te", "trailer", "upgrade", "proxy-authorization",
+    "proxy-authenticate", "content-length",
+))
+
+# Previous message hashes for prefix divergence detection.
+_debug_prev_msg_hashes: List[str] = []
+
 
 def _doc_preview(doc: str, max_len: int = 60) -> str:
     """Truncate a document string for log preview."""
@@ -1368,14 +1413,12 @@ async def _intercept_and_forward(request: Request, api_format: str):
     total_reordered = 0
     total_deduped = 0
     total_slimmed = 0
-    tool_results_skipped = 0  # TODO: never incremented — wire up or remove
-    _chars_before_slim = 0
-    _chars_after_slim = 0
+    chars_before_slim = 0
+    chars_after_slim = 0
     system_count = 0
     tool_result_count = 0
-    reorder_details = []  # collect per-source reorder info
+    reorder_details = []
     _dedup_result = DedupResult()
-    state = _intercept_state
 
     # ── Debug: log conversation shape, divergence, and tool_result details ──
     _debug_messages = body.get("messages") or []
@@ -1383,12 +1426,11 @@ async def _intercept_and_forward(request: Request, api_format: str):
 
     # Per-message hashes for this request
     _debug_msg_hashes = []
-    if logger.isEnabledFor(logging.DEBUG):
-        for m in _debug_messages:
-            h = hashlib.sha256(
-                json.dumps(m, sort_keys=True, ensure_ascii=False).encode()
-            ).hexdigest()[:12]
-            _debug_msg_hashes.append(h)
+    for m in _debug_messages:
+        h = hashlib.sha256(
+            json.dumps(m, sort_keys=True, ensure_ascii=False).encode()
+        ).hexdigest()[:12]
+        _debug_msg_hashes.append(h)
 
     # Build tool_call_id → function name mapping from assistant messages
     _tool_call_names = {}
@@ -1421,7 +1463,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
             _chars = len(_content_str)
             _is_compacted = "[compacted:" in _content_str
             _preview = _content_str[:150].replace("\n", "\\n")
-            logger.info(
+            logger.debug(
                 f"  msg[{idx}] role={_role} fn={_fn_label} "
                 f"tool_call_id={_tc_id} "
                 f"chars={_chars} compacted={_is_compacted} "
@@ -1439,43 +1481,18 @@ async def _intercept_and_forward(request: Request, api_format: str):
                     _chars = len(_tc_str)
                     _is_compacted = "[compacted:" in _tc_str
                     _preview = _tc_str[:150].replace("\n", "\\n")
-                    logger.info(
+                    logger.debug(
                         f"  msg[{idx}].content[{bi}] type=tool_result "
                         f"tool_use_id={_tu_id} chars={_chars} "
                         f"compacted={_is_compacted} preview: {_preview}"
                     )
 
-    global _debug_prev_msg_hashes
-    if "_debug_prev_msg_hashes" not in globals():
-        _debug_prev_msg_hashes = []
-
-    _prev_n = len(_debug_prev_msg_hashes)
-    if _prev_n > 0 and _prev_n <= _debug_msg_count:
-        _first_diff = None
-        for idx in range(_prev_n):
-            if _debug_msg_hashes[idx] != _debug_prev_msg_hashes[idx]:
-                _first_diff = idx
-                break
-        if _first_diff is not None:
-            _diff_msg = _debug_messages[_first_diff]
-            _diff_role = _diff_msg.get("role", "?")
-            _diff_content = str(_diff_msg.get("content", ""))
-            logger.warning(
-                f"Intercept PREFIX MISMATCH at msg[{_first_diff}] "
-                f"(role={_diff_role}), "
-                f"hash was {_debug_prev_msg_hashes[_first_diff]} "
-                f"now {_debug_msg_hashes[_first_diff]}. "
-                f"Content preview ({len(_diff_content)} chars): "
-                f"{_diff_content[:300]}..."
-            )
-        else:
-            logger.info(
-                f"Intercept: {_debug_msg_count} msgs (prev={_prev_n}), "
-                f"prefix[:{_prev_n}] MATCH, "
-                f"{_debug_msg_count - _prev_n} new msgs"
-            )
-    else:
-        logger.info(f"Intercept: {_debug_msg_count} msgs (first request or reset)")
+    # Per-session debug logging (uses session fingerprint, not global state)
+    _session_key = _session_fingerprint(body)
+    _session_tag = _session_key[:8]
+    logger.info(
+        f"Intercept: session={_session_tag} {_debug_msg_count} msgs"
+    )
 
     _debug_prev_msg_hashes = list(_debug_msg_hashes)
 
@@ -1484,7 +1501,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
 
     if config.enabled:
         try:
-            # body is already a fresh copy from _strip_external_content_ids
+            body = copy.deepcopy(body)
 
             # ── Conversation-aware state (single-conversation model) ──
             state = _get_intercept_state(body)
@@ -1493,15 +1510,44 @@ async def _intercept_and_forward(request: Request, api_format: str):
             # On subsequent turns, the host sends original (unmodified)
             # messages.  Replace them with our cached modified versions
             # so the inference engine's prefix cache sees identical tokens.
+            # IMPORTANT: Only replace if the old messages actually match
+            # (same session/user).  Without this check, concurrent requests
+            # from different sessions would get cross-contaminated.
             old_msg_count = len(state.cached_messages)
             if old_msg_count > 0:
                 msgs = body.get("messages", [])
                 if len(msgs) >= old_msg_count:
-                    msgs[:old_msg_count] = copy.deepcopy(state.cached_messages)
-                    logger.info(
-                        f"Intercept: replaced {old_msg_count} old messages "
-                        f"with cached versions for prefix cache consistency"
-                    )
+                    # Verify prefix match before replacing
+                    prefix_ok = True
+                    for _ci in range(old_msg_count):
+                        _cached_h = hashlib.sha256(
+                            json.dumps(state.cached_messages[_ci],
+                                       sort_keys=True,
+                                       ensure_ascii=False).encode()
+                        ).hexdigest()[:16]
+                        _current_h = hashlib.sha256(
+                            json.dumps(msgs[_ci],
+                                       sort_keys=True,
+                                       ensure_ascii=False).encode()
+                        ).hexdigest()[:16]
+                        if _cached_h != _current_h:
+                            prefix_ok = False
+                            break
+                    if prefix_ok:
+                        msgs[:old_msg_count] = copy.deepcopy(
+                            state.cached_messages)
+                        logger.info(
+                            f"Intercept: replaced {old_msg_count} old "
+                            f"messages with cached versions for prefix "
+                            f"cache consistency"
+                        )
+                    else:
+                        logger.info(
+                            f"Intercept: prefix mismatch at msg[{_ci}], "
+                            f"skipping cached message replay "
+                            f"(different session/user)"
+                        )
+                        old_msg_count = 0
                 handler.restore_system(body, state.cached_system)
 
             multi = handler.extract_all(body, config)
@@ -1523,7 +1569,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
                             }
                         )
                         handler.reconstruct_system(
-                            body, extraction, reordered_docs, sys_idx, config
+                            body, extraction, reordered_docs, sys_idx
                         )
                         total_reordered += len(extraction.documents)
                         system_count = 1
@@ -1570,8 +1616,8 @@ async def _intercept_and_forward(request: Request, api_format: str):
                                 f"previous tool result ({orig_chars} chars). "
                                 f"Refer to the earlier result above.]"
                             ]
-                            _chars_before_slim += orig_chars
-                            _chars_after_slim += len(new_docs[0])
+                            chars_before_slim += orig_chars
+                            chars_after_slim += len(new_docs[0])
                             total_slimmed += deduped
                         reorder_details.append(
                             {
@@ -1626,13 +1672,8 @@ async def _intercept_and_forward(request: Request, api_format: str):
                         single_doc.tool_call_id
                     )
 
-            if (
-                total_reordered > 0
-                or total_deduped > 0
-                or total_slimmed > 0
-                or tool_results_skipped > 0
-            ):
-                saved = _chars_before_slim - _chars_after_slim
+            if total_reordered > 0 or total_deduped > 0 or total_slimmed > 0:
+                saved = chars_before_slim - chars_after_slim
                 saved_tokens = saved // 4 if saved > 0 else 0
                 logger.info(
                     f"Intercept ({api_format}): reordered {total_reordered}, "
@@ -1648,8 +1689,8 @@ async def _intercept_and_forward(request: Request, api_format: str):
                     _dedup_result = dedup_responses_api(body, chunk_modulus=_chunk_modulus)
 
                 if _dedup_result.chars_saved > 0:
-                    _chars_before_slim += _dedup_result.chars_before
-                    _chars_after_slim += _dedup_result.chars_after
+                    chars_before_slim += _dedup_result.chars_before
+                    chars_after_slim += _dedup_result.chars_after
                     logger.info(
                         f"Dedup ({api_format}): "
                         f"blocks={_dedup_result.blocks_deduped}/{_dedup_result.blocks_total}, "
@@ -1697,22 +1738,6 @@ async def _intercept_and_forward(request: Request, api_format: str):
     else:
         target_url = f"{infer_api_url}{handler.target_path()}"
 
-    # Build outbound headers: forward everything except X-ContextPilot-*
-    # and hop-by-hop headers that must not be forwarded by proxies.
-    _HOP_BY_HOP = frozenset(
-        (
-            "host",
-            "connection",
-            "keep-alive",
-            "transfer-encoding",
-            "te",
-            "trailer",
-            "upgrade",
-            "proxy-authorization",
-            "proxy-authenticate",
-            "content-length",
-        )
-    )
     if _cloud_mode and _cloud_adapter is not None and _cloud_api_key:
         outbound_headers = _cloud_adapter.get_auth_headers(_cloud_api_key)
     else:
@@ -1732,34 +1757,30 @@ async def _intercept_and_forward(request: Request, api_format: str):
         total_reordered > 0
         or total_deduped > 0
         or total_slimmed > 0
-        or tool_results_skipped > 0
         or _dedup_result.chars_saved > 0
     )
     if _has_activity:
-        cp_response_headers["X-ContextPilot-Result"] = json.dumps(
-            {
-                "intercepted": True,
-                "documents_reordered": total_reordered > 0,
-                "total_documents": total_reordered,
-                "documents_deduplicated": total_deduped,
-                "documents_slimmed": total_slimmed,
-                "chars_before_slim": _chars_before_slim,
-                "chars_after_slim": _chars_after_slim,
-                "chars_saved": _chars_before_slim - _chars_after_slim,
-                "tool_results_skipped": tool_results_skipped,
-                "message_count": state.last_message_count,
-                "sources": {
-                    "system": system_count,
-                    "tool_results": tool_result_count,
-                },
-                "reorder_details": reorder_details,
-                "dedup": {
-                    "blocks_deduped": _dedup_result.blocks_deduped,
-                    "blocks_total": _dedup_result.blocks_total,
-                    "chars_saved": _dedup_result.chars_saved,
-                },
-            }
-        )
+        cp_response_headers["X-ContextPilot-Result"] = json.dumps({
+            "intercepted": True,
+            "documents_reordered": total_reordered > 0,
+            "total_documents": total_reordered,
+            "documents_deduplicated": total_deduped,
+            "documents_slimmed": total_slimmed,
+            "chars_before_slim": chars_before_slim,
+            "chars_after_slim": chars_after_slim,
+            "chars_saved": chars_before_slim - chars_after_slim,
+            "message_count": state.last_message_count,
+            "sources": {
+                "system": system_count,
+                "tool_results": tool_result_count,
+            },
+            "reorder_details": reorder_details,
+            "dedup": {
+                "blocks_deduped": _dedup_result.blocks_deduped,
+                "blocks_total": _dedup_result.blocks_total,
+                "chars_saved": _dedup_result.chars_saved,
+            },
+        })
 
     is_stream = body.get("stream", False)
 
@@ -1785,7 +1806,7 @@ async def _stream_with_headers():
                     async for chunk in resp.content.iter_any():
                         if not _ttft_logged:
                             _ttft_ms = (time.monotonic() - _request_start) * 1000
-                            _saved = _chars_before_slim - _chars_after_slim
+                            _saved = chars_before_slim - chars_after_slim
                             _log_ttft(_ttft_ms, total_slimmed, _saved)
                             _ttft_logged = True
                         yield chunk
@@ -1795,12 +1816,9 @@ async def _stream_with_headers():
             status, fwd_headers = cast(tuple[int, Dict[str, str]], first_event)
 
             async def _stream_content_only():
-                try:
-                    async for event in stream_iter:
-                        if isinstance(event, bytes):
-                            yield event
-                finally:
-                    await stream_iter.aclose()
+                async for event in stream_iter:
+                    if isinstance(event, bytes):
+                        yield event
 
             return StreamingResponse(
                 _stream_content_only(),
@@ -1814,13 +1832,9 @@ async def _stream_content_only():
                 target_url, json=body, headers=outbound_headers
             ) as resp:
                 _ttft_ms = (time.monotonic() - _request_start) * 1000
-                _saved = _chars_before_slim - _chars_after_slim
+                _saved = chars_before_slim - chars_after_slim
                 _log_ttft(_ttft_ms, total_slimmed, _saved)
-                try:
-                    result = await resp.json()
-                except (json.JSONDecodeError, aiohttp.ContentTypeError):
-                    text = await resp.text()
-                    raise HTTPException(status_code=resp.status, detail=text[:500])
+                result = await resp.json()
 
                 # ── Cloud mode: track cache metrics from response ──
                 if (
@@ -1858,7 +1872,7 @@ async def _stream_content_only():
 
     except aiohttp.ClientError as e:
         logger.error(f"Error forwarding intercepted request: {e}")
-        raise HTTPException(status_code=502, detail="Backend connection error")
+        raise HTTPException(status_code=502, detail=f"Backend error: {str(e)}")
 
 
 @app.post("/v1/chat/completions")
@@ -1938,9 +1952,9 @@ async def proxy_engine(path: str, request: Request):
                     body["rid"] = request_id
                     body["request_id"] = request_id
 
-            body.setdefault("temperature", 0)
+            body["temperature"] = 0
             if _cloud_mode:
-                body.setdefault("top_p", 0)
+                body["top_p"] = 0
 
             dedup_result = DedupResult()
             try:
@@ -2153,7 +2167,7 @@ def main():
         os.environ["CONTEXTPILOT_CLOUD_API_KEY"] = args.cloud_api_key
 
     # Also set global config for direct access
-    global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode, _chunk_modulus
+    global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode
     _max_tokens = args.max_tokens
     _infer_api_url = args.infer_api_url.rstrip("/")
     _stateless_mode = args.stateless
diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md
index 851533c..c36fc7c 100644
--- a/openclaw-plugin/README.md
+++ b/openclaw-plugin/README.md
@@ -1,90 +1,108 @@
-# @contextpilot/openclaw-plugin
+# @contextpilot/contextpilot
 
-OpenClaw native plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context reuse. **Zero external dependencies** — no Python, no proxy server, just install and go.
+OpenClaw plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context optimization. **Zero external dependencies** — no Python, no proxy server, just install and go.
 
 ## What It Does
 
-ContextPilot optimizes every LLM request by:
+ContextPilot registers as an OpenClaw **Context Engine** and optimizes every LLM request by:
 
-1. **Extracting** documents from system prompts and tool results
+1. **Extracting** documents from tool results
 2. **Reordering** documents for maximum prefix cache sharing across turns
 3. **Deduplicating** repeated content blocks with compact reference hints
-4. **Injecting** provider-specific cache control markers (Anthropic `cache_control`)
+4. **Injecting** cache control markers (Anthropic `cache_control: { type: "ephemeral" }`)
 
-All processing happens in-process inside the OpenClaw plugin — no external services needed.
+All processing happens in-process — no external services needed.
 
 ## Installation
 
+### From npm (when published)
+
 ```bash
-openclaw plugins install @contextpilot/openclaw-plugin
+openclaw plugins install @contextpilot/contextpilot
+```
+
+### From local path (development)
+
+Add to `~/.openclaw/openclaw.json`:
+
+```json
+{
+  "plugins": {
+    "load": {
+      "paths": [
+        "/path/to/ContextPilot/openclaw-plugin"
+      ]
+    }
+  }
+}
 ```
 
 ## Configuration
 
-In `~/.openclaw/openclaw.json`:
+In `~/.openclaw/openclaw.json`, enable the plugin and set it as the context engine:
 
-```json5
+```json
 {
-  plugins: {
-    entries: {
+  "plugins": {
+    "slots": {
+      "contextEngine": "contextpilot"
+    },
+    "entries": {
       "contextpilot": {
-        enabled: true,
-        config: {
-          // "anthropic" (default) or "openai"
-          "backendProvider": "anthropic",
-          
-          // What to optimize: "all" (default), "system", or "tool_results"
+        "enabled": true,
+        "config": {
           "scope": "all"
         }
       }
     }
+  },
+  "tools": {
+    "allow": ["contextpilot"]
   }
 }
 ```
 
-Set your API key:
-
-```bash
-export ANTHROPIC_API_KEY="sk-ant-xxx"
-# or
-export OPENAI_API_KEY="sk-xxx"
-```
-
-Then select a ContextPilot model (e.g., `contextpilot/claude-sonnet-4-6`) and start using OpenClaw.
+### Scope Options
 
-## Available Models
+| Scope | Tool Results | Description |
+|:------|:------------:|:------------|
+| `all` (default) | Optimized | Optimize all tool results |
+| `tool_results` | Optimized | Same as `all` |
 
-### Anthropic backend (default)
-
-| Model ID | Name |
-|----------|------|
-| `contextpilot/claude-opus-4-6` | Claude Opus 4.6 (ContextPilot) |
-| `contextpilot/claude-sonnet-4-6` | Claude Sonnet 4.6 (ContextPilot) |
-
-### OpenAI backend
-
-| Model ID | Name |
-|----------|------|
-| `contextpilot/gpt-4o` | GPT-4o (ContextPilot) |
-| `contextpilot/gpt-4o-mini` | GPT-4o Mini (ContextPilot) |
-
-Any model ID works via dynamic resolution — use `contextpilot/<any-model-id>`.
+> **Note:** System prompt optimization is not currently available — OpenClaw's context engine API does not expose the system prompt to plugins.
 
 ## How It Works
 
 ```
-OpenClaw request
+OpenClaw agent request
   ↓
-ContextPilot Plugin (wrapStreamFn)
-  ├─ Extract documents from system/tool_results
+ContextPilot Context Engine (assemble hook)
+  ├─ Convert OpenClaw message format (toolResult → tool_result)
+  ├─ Extract documents from tool results
   ├─ Reorder for prefix cache sharing
   ├─ Deduplicate repeated blocks
   ├─ Inject cache_control markers
   ↓
-Optimized request → LLM Backend (Anthropic/OpenAI)
+Optimized context → LLM Backend
 ```
 
-The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercept requests before they reach the backend. All optimization is done in-process in TypeScript.
+The plugin registers as an OpenClaw Context Engine using `api.registerContextEngine()`. The `assemble()` hook intercepts context assembly before each LLM call.
+
+## Files
+
+```
+openclaw-plugin/
+├── openclaw.plugin.json   # Plugin manifest (id: "contextpilot")
+├── package.json           # npm package (@contextpilot/contextpilot)
+├── src/
+│   ├── index.ts           # Plugin entry point
+│   └── engine/
+│       ├── cache-control.ts   # Cache control injection
+│       ├── dedup.ts           # Content deduplication
+│       ├── extract.ts         # Document extraction
+│       └── live-index.ts      # Reordering engine
+└── tsconfig.json
+```
 
 ## Agent Tool
 
@@ -92,13 +110,17 @@ The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercep
 |------|-------------|
 | `contextpilot_status` | Check engine status, request count, and chars saved |
 
-## Scope Control
+> **Note:** The status tool is registered but may not be visible to agents due to OpenClaw plugin API limitations.
+
+## Verifying It Works
 
-| Scope | System Prompt | Tool Results |
-|:---:|:---:|:---:|
-| `all` (default) | Optimized | Optimized |
-| `system` | Optimized | Untouched |
-| `tool_results` | Untouched | Optimized |
+Check the gateway logs for ContextPilot output:
+
+```
+[ContextPilot] assemble() called with 84 messages
+[ContextPilot] Extractions found - system: 0 tool: 1 singleDoc: 3
+[ContextPilot] Optimization complete. Chars saved: 2389
+```
 
 ## License
 
diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh
new file mode 100755
index 0000000..b913178
--- /dev/null
+++ b/openclaw-plugin/benchmark.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+#
+# ContextPilot OpenClaw Plugin Benchmark
+# Compares token usage and cache hits with and without the plugin
+#
+# Usage: ./benchmark.sh [num_iterations]
+#
+
+set -e
+
+NUM_ITERATIONS=${1:-3}
+OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json"
+BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.backup"
+GATEWAY_LOG="/tmp/gw-benchmark.log"
+
+# Test that triggers multiple file reads to show dedup benefit
+TEST_FILES=(
+    "/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts"
+    "/home/ryan/ContextPilot/openclaw-plugin/src/engine/cache-control.ts"
+    "/home/ryan/ContextPilot/openclaw-plugin/src/index.ts"
+)
+
+echo "=========================================="
+echo "ContextPilot OpenClaw Plugin Benchmark"
+echo "=========================================="
+echo "Iterations: $NUM_ITERATIONS"
+echo ""
+
+# Backup config
+cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG"
+
+cleanup() {
+    echo ""
+    echo "Restoring original config..."
+    cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG"
+    rm -f "$BACKUP_CONFIG"
+    pkill -9 -f "openclaw gateway" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+restart_gateway() {
+    pkill -9 -f "openclaw gateway" 2>/dev/null || true
+    sleep 2
+    openclaw gateway > "$GATEWAY_LOG" 2>&1 &
+    sleep 5
+}
+
+run_multi_read_test() {
+    local label=$1
+
+    echo "Running $label test..."
+    echo "  Reading ${#TEST_FILES[@]} files multiple times to trigger dedup..."
+
+    # First, read all files
+    for f in "${TEST_FILES[@]}"; do
+        openclaw agent --agent main --message "Read $f" > /dev/null 2>&1
+    done
+
+    # Then read them again (should trigger dedup on second pass)
+    for f in "${TEST_FILES[@]}"; do
+        openclaw agent --agent main --message "Read $f again and count lines" > /dev/null 2>&1
+    done
+
+    echo "  Done."
+}
+
+extract_stats() {
+    local log_file=$1
+
+    # Extract chars saved
+    local chars_saved=$(grep -oP "Chars saved: \K\d+" "$log_file" 2>/dev/null | tail -1 || echo "0")
+
+    # Extract cache stats from usage blocks
+    local cache_read=$(grep -oP '"cacheRead": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
+    local cache_write=$(grep -oP '"cacheWrite": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
+    local input_tokens=$(grep -oP '"input": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
+
+    echo "$chars_saved $cache_read $cache_write $input_tokens"
+}
+
+# ==========================================
+# Test WITH ContextPilot enabled
+# ==========================================
+echo "----------------------------------------"
+echo "Test 1: WITH ContextPilot enabled"
+echo "----------------------------------------"
+
+# Ensure plugin is enabled
+python3 << 'PYTHON'
+import json
+config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"])
+with open(config_path, 'r') as f:
+    config = json.load(f)
+if 'plugins' not in config:
+    config['plugins'] = {}
+if 'slots' not in config['plugins']:
+    config['plugins']['slots'] = {}
+config['plugins']['slots']['contextEngine'] = 'contextpilot'
+if 'entries' not in config['plugins']:
+    config['plugins']['entries'] = {}
+if 'contextpilot' not in config['plugins']['entries']:
+    config['plugins']['entries']['contextpilot'] = {}
+config['plugins']['entries']['contextpilot']['enabled'] = True
+with open(config_path, 'w') as f:
+    json.dump(config, f, indent=2)
+PYTHON
+
+restart_gateway
+run_multi_read_test "WITH_CONTEXTPILOT"
+
+WITH_STATS=$(extract_stats "$GATEWAY_LOG")
+WITH_CHARS=$(echo $WITH_STATS | cut -d' ' -f1)
+WITH_CACHE_READ=$(echo $WITH_STATS | cut -d' ' -f2)
+WITH_CACHE_WRITE=$(echo $WITH_STATS | cut -d' ' -f3)
+WITH_INPUT=$(echo $WITH_STATS | cut -d' ' -f4)
+
+echo ""
+echo "  Chars saved by dedup: $WITH_CHARS"
+echo "  Cache read tokens: $WITH_CACHE_READ"
+echo "  Cache write tokens: $WITH_CACHE_WRITE"
+echo "  Input tokens: $WITH_INPUT"
+
+# ==========================================
+# Test WITHOUT ContextPilot (disabled)
+# ==========================================
+echo ""
+echo "----------------------------------------"
+echo "Test 2: WITHOUT ContextPilot (disabled)"
+echo "----------------------------------------"
+
+# Disable the plugin
+python3 << 'PYTHON'
+import json
+config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"])
+with open(config_path, 'r') as f:
+    config = json.load(f)
+if 'plugins' in config:
+    if 'slots' in config['plugins']:
+        config['plugins']['slots'].pop('contextEngine', None)
+    if 'entries' in config['plugins'] and 'contextpilot' in config['plugins']['entries']:
+        config['plugins']['entries']['contextpilot']['enabled'] = False
+with open(config_path, 'w') as f:
+    json.dump(config, f, indent=2)
+PYTHON
+
+restart_gateway
+run_multi_read_test "WITHOUT_CONTEXTPILOT"
+
+WITHOUT_STATS=$(extract_stats "$GATEWAY_LOG")
+WITHOUT_CHARS=$(echo $WITHOUT_STATS | cut -d' ' -f1)
+WITHOUT_CACHE_READ=$(echo $WITHOUT_STATS | cut -d' ' -f2)
+WITHOUT_CACHE_WRITE=$(echo $WITHOUT_STATS | cut -d' ' -f3)
+WITHOUT_INPUT=$(echo $WITHOUT_STATS | cut -d' ' -f4)
+
+echo ""
+echo "  Chars saved by dedup: $WITHOUT_CHARS (expected: 0)"
+echo "  Cache read tokens: $WITHOUT_CACHE_READ"
+echo "  Cache write tokens: $WITHOUT_CACHE_WRITE"
+echo "  Input tokens: $WITHOUT_INPUT"
+
+# ==========================================
+# Results Summary
+# ==========================================
+echo ""
+echo "=========================================="
+echo "RESULTS SUMMARY"
+echo "=========================================="
+echo ""
+echo "                        WITH      WITHOUT"
+echo "                     ContextPilot  Plugin"
+echo "----------------------------------------"
+printf "Chars deduped:       %8s    %8s\n" "$WITH_CHARS" "$WITHOUT_CHARS"
+printf "Cache read tokens:   %8s    %8s\n" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ"
+printf "Cache write tokens:  %8s    %8s\n" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE"
+printf "Input tokens:        %8s    %8s\n" "$WITH_INPUT" "$WITHOUT_INPUT"
+echo ""
+
+if [ "$WITH_CHARS" -gt "0" ]; then
+    echo "ContextPilot deduplication saved $WITH_CHARS characters"
+    # Rough estimate: 4 chars per token
+    tokens_saved=$((WITH_CHARS / 4))
+    echo "Estimated token savings: ~$tokens_saved tokens"
+fi
diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json
index 94ec378..f9faee6 100644
--- a/openclaw-plugin/openclaw.plugin.json
+++ b/openclaw-plugin/openclaw.plugin.json
@@ -1,41 +1,12 @@
 {
   "id": "contextpilot",
   "name": "ContextPilot",
-  "description": "Faster long-context inference via in-process context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing. No external dependencies.",
-  "version": "0.2.0",
-  "providers": ["contextpilot"],
-  "providerAuthEnvVars": {
-    "contextpilot": ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"]
-  },
-  "providerAuthChoices": [
-    {
-      "provider": "contextpilot",
-      "method": "api-key",
-      "choiceId": "contextpilot-api-key",
-      "choiceLabel": "Backend API key (Anthropic or OpenAI)",
-      "groupId": "contextpilot",
-      "groupLabel": "ContextPilot",
-      "cliFlag": "--anthropic-api-key",
-      "cliOption": "--anthropic-api-key <key>",
-      "cliDescription": "API key for the backend LLM provider",
-      "onboardingScopes": ["text-inference"]
-    }
-  ],
+  "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.",
+  "version": "0.3.0",
   "configSchema": {
     "type": "object",
     "additionalProperties": false,
     "properties": {
-      "backendProvider": {
-        "type": "string",
-        "enum": ["anthropic", "openai", "sglang"],
-        "description": "Backend LLM provider type",
-        "default": "anthropic"
-      },
-      "indexServerUrl": {
-        "type": "string",
-        "description": "ContextPilot index server URL (used in SGLang mode)",
-        "default": "http://localhost:8765"
-      },
       "scope": {
         "type": "string",
         "enum": ["all", "system", "tool_results"],
diff --git a/openclaw-plugin/package-lock.json b/openclaw-plugin/package-lock.json
new file mode 100644
index 0000000..aeda12e
--- /dev/null
+++ b/openclaw-plugin/package-lock.json
@@ -0,0 +1,22 @@
+{
+  "name": "@contextpilot/openclaw-plugin",
+  "version": "0.2.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "@contextpilot/openclaw-plugin",
+      "version": "0.2.0",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@sinclair/typebox": "^0.34.49"
+      }
+    },
+    "node_modules/@sinclair/typebox": {
+      "version": "0.34.49",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.49.tgz",
+      "integrity": "sha512-brySQQs7Jtn0joV8Xh9ZV/hZb9Ozb0pmazDIASBkYKCjXrXU3mpcFahmK/z4YDhGkQvP9mWJbVyahdtU5wQA+A==",
+      "license": "MIT"
+    }
+  }
+}
diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json
index 58defc7..7f03fc8 100644
--- a/openclaw-plugin/package.json
+++ b/openclaw-plugin/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "@contextpilot/openclaw-plugin",
+  "name": "@contextpilot/contextpilot",
   "version": "0.2.0",
   "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.",
   "type": "module",
@@ -21,11 +21,16 @@
     "llm"
   ],
   "openclaw": {
-    "extensions": ["./src/index.ts"]
+    "extensions": [
+      "./src/index.ts"
+    ]
   },
   "files": [
     "src/",
     "openclaw.plugin.json",
     "README.md"
-  ]
+  ],
+  "dependencies": {
+    "@sinclair/typebox": "^0.34.49"
+  }
 }
diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts
index 53d48e7..6ab3901 100644
--- a/openclaw-plugin/src/engine/cache-control.ts
+++ b/openclaw-plugin/src/engine/cache-control.ts
@@ -102,6 +102,30 @@ function injectToolResultCacheControl(
         }
 
         const message = msg as MessageBlock;
+
+        // Handle OpenClaw's toolResult role (content is the tool result itself)
+        if (message.role === 'toolResult') {
+            const toolResultContent = message.content ?? '';
+            let totalChars = 0;
+
+            if (typeof toolResultContent === 'string') {
+                totalChars = toolResultContent.length;
+            } else if (Array.isArray(toolResultContent)) {
+                totalChars = toolResultContent.reduce((sum, inner) => {
+                    if (isRecord(inner) && inner.type === 'text') {
+                        return sum + (typeof inner.text === 'string' ? inner.text.length : 0);
+                    }
+                    return sum;
+                }, 0);
+            }
+
+            if (totalChars >= MIN_CONTENT_LENGTH_FOR_CACHE) {
+                (message as any).cache_control = cc;
+            }
+            continue;
+        }
+
+        // Handle Anthropic's user message with tool_result blocks
         if (message.role !== 'user' || !Array.isArray(message.content)) {
             continue;
         }
@@ -121,6 +145,9 @@ function injectToolResultCacheControl(
 }
 
 export function injectAnthropicCacheControl(body: Record<string, unknown>): Record<string, unknown> {
+    if (!body || typeof body !== 'object') {
+        return body ?? {};
+    }
     const copiedBody = structuredClone(body);
     injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
     injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts
index 14d79f4..eb3acb0 100644
--- a/openclaw-plugin/src/engine/dedup.ts
+++ b/openclaw-plugin/src/engine/dedup.ts
@@ -172,11 +172,22 @@ export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptio
 
     for (let idx = 0; idx < messages.length; idx++) {
         const msg = messages[idx];
-        if (!msg || typeof msg !== 'object' || msg.role !== 'tool') {
+        if (!msg || typeof msg !== 'object') {
+            continue;
+        }
+        // Support both OpenAI 'tool' role and OpenClaw 'toolResult' role
+        if (msg.role !== 'tool' && msg.role !== 'toolResult') {
             continue;
         }
 
-        const content = msg.content || '';
+        // For toolResult role, content might be an array of {type: "text", text: "..."} blocks
+        let content = msg.content || '';
+        if (Array.isArray(content)) {
+            content = content
+                .filter((b: any) => b?.type === 'text')
+                .map((b: any) => b.text || '')
+                .join('\n');
+        }
         if (typeof content !== 'string' || content.length < minContentChars) {
             continue;
         }
@@ -234,7 +245,18 @@ export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptio
         if (dedupedInThis > 0) {
             const originalLen = content.length;
             const newContent = newBlocks.join('\n\n');
-            msg.content = newContent;
+
+            // Preserve original content format
+            if (Array.isArray(msg.content)) {
+                // For array content, update the first text block
+                const textBlockIdx = msg.content.findIndex((b: any) => b?.type === 'text');
+                if (textBlockIdx >= 0) {
+                    (msg.content as any[])[textBlockIdx].text = newContent;
+                }
+            } else {
+                msg.content = newContent;
+            }
+
             const newLen = newContent.length;
             result.charsBefore += originalLen;
             result.charsAfter += newLen;
diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts
index 3f9b380..e7b3c7a 100644
--- a/openclaw-plugin/src/engine/tree-nodes.ts
+++ b/openclaw-plugin/src/engine/tree-nodes.ts
@@ -30,7 +30,7 @@ export class ClusterNode {
     }
 
     get isLeaf(): boolean {
-        return this.children.length === 0;
+        return !Array.isArray(this.children) || this.children.length === 0;
     }
 
     get isRoot(): boolean {
@@ -50,6 +50,10 @@ export class ClusterNode {
     }
 
     addChild(childId: number): void {
+        // Defensive: ensure children is an array
+        if (!Array.isArray(this.children)) {
+            this.children = [];
+        }
         if (!this.children.includes(childId) && childId !== this.nodeId) {
             this.children.push(childId);
         }
diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts
index fbd1384..e3736ff 100644
--- a/openclaw-plugin/src/index.ts
+++ b/openclaw-plugin/src/index.ts
@@ -1,47 +1,21 @@
 import { Type } from "@sinclair/typebox";
-import {
-  definePluginEntry,
-  type ProviderResolveDynamicModelContext,
-  type ProviderWrapStreamFnContext,
-} from "openclaw/plugin-sdk/plugin-entry";
-import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth";
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+import { delegateCompactionToRuntime } from "openclaw/plugin-sdk/core";
 
 import { injectCacheControl } from "./engine/cache-control.js";
-import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js";
+import { dedupChatCompletions } from "./engine/dedup.js";
 import { getFormatHandler, type InterceptConfig } from "./engine/extract.js";
-import { ContextPilotIndexClient } from "./engine/http-client.js";
 import { ContextPilot } from "./engine/live-index.js";
 
-const PROVIDER_ID = "contextpilot";
-type BackendProvider = "anthropic" | "openai" | "sglang";
+type Scope = "all" | "system" | "tool_results";
 
-function parseBackendProvider(value: unknown): BackendProvider {
-  if (value === "openai" || value === "sglang") {
-    return value;
-  }
-  return "anthropic";
-}
-
-function parseScope(value: unknown): "all" | "system" | "tool_results" {
+function parseScope(value: unknown): Scope {
   if (value === "system" || value === "tool_results" || value === "all") {
     return value;
   }
   return "all";
 }
 
-function detectApiFormat(
-  body: Record<string, unknown>,
-  backendProvider: BackendProvider,
-): "openai_chat" | "anthropic_messages" {
-  if (backendProvider === "anthropic") {
-    return "anthropic_messages";
-  }
-  if (backendProvider === "openai") {
-    return "openai_chat";
-  }
-  return "system" in body ? "anthropic_messages" : "openai_chat";
-}
-
 function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] {
   const [reordered] = engine.reorder(docs);
   if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) {
@@ -54,243 +28,182 @@ function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] {
   return candidate as string[];
 }
 
-async function reorderWithClient(
-  client: ContextPilotIndexClient,
-  docs: string[],
-): Promise<string[]> {
-  const encodedDocs = docs.map((doc) => Array.from(doc, (ch) => ch.charCodeAt(0)));
-  const result = await client.reorder(encodedDocs, 0.001, false, "average");
-
-  if (result === null) {
-    return docs;
-  }
-
-  const [, originalIndices] = result;
-  if (!Array.isArray(originalIndices) || originalIndices.length !== docs.length) {
-    return docs;
-  }
-
-  const reordered = originalIndices.map((index) => {
-    if (typeof index !== "number" || index < 0 || index >= docs.length) {
-      return null;
-    }
-    return docs[index];
-  });
-
-  return reordered.includes(null) ? docs : (reordered as string[]);
-}
-
-function formatJson(value: unknown): string {
-  return value === null || value === undefined ? "unavailable" : JSON.stringify(value);
+interface Message {
+  role: string;
+  content: unknown;
 }
 
 export default definePluginEntry({
   id: "contextpilot",
   name: "ContextPilot",
-  description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.",
+  description: "Optimizes context via reordering, deduplication, and cache control injection.",
   register: (api) => {
     const config = {
-      backendProvider: parseBackendProvider(api.pluginConfig?.backendProvider),
       scope: parseScope(api.pluginConfig?.scope),
-      indexServerUrl: String(api.pluginConfig?.indexServerUrl || "http://localhost:8765"),
     };
 
-    const isSglang = config.backendProvider === "sglang";
-    const engine = isSglang ? null : new ContextPilot(0.001, false, "average");
-    const client = isSglang ? new ContextPilotIndexClient(config.indexServerUrl) : null;
+    // Initialize the ContextPilot engine for reordering
+    const engine = new ContextPilot(0.001, false, "average");
 
-    let requestCount = 0;
+    let assembleCount = 0;
     let totalCharsSaved = 0;
 
-    api.registerProvider({
-      id: PROVIDER_ID,
-      label: "ContextPilot",
-      docsPath: "/providers/contextpilot",
-      envVars: isSglang
-        ? []
-        : [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"],
-      auth: isSglang
-        ? []
-        : [
-          createProviderApiKeyAuthMethod({
-            providerId: PROVIDER_ID,
-            methodId: "api-key",
-            label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key",
-            hint: "API key for the backend LLM provider",
-            optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey",
-            flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key",
-            envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY",
-            promptMessage: "Enter your API key",
-            defaultModel:
-              config.backendProvider === "anthropic"
-                ? "contextpilot/claude-sonnet-4-6"
-                : "contextpilot/gpt-4o",
-          }),
-        ],
-      resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => {
-        if (config.backendProvider === "sglang") {
-          return {
-            id: ctx.modelId,
-            name: ctx.modelId,
-            provider: PROVIDER_ID,
-            baseUrl: config.indexServerUrl,
-            api: "openai-completions",
-            reasoning: false,
-            input: ["text", "image"] as Array<"text" | "image">,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-            contextWindow: 200000,
-            maxTokens: 16384,
-          };
-        }
+    // Register as a Context Engine - this intercepts context assembly
+    api.registerContextEngine("contextpilot", () => ({
+      info: {
+        id: "contextpilot",
+        name: "ContextPilot",
+        ownsCompaction: false,
+      },
 
-        const isAnthropic = config.backendProvider === "anthropic";
-        return {
-          id: ctx.modelId,
-          name: ctx.modelId,
-          provider: PROVIDER_ID,
-          baseUrl: isAnthropic ? "https://api.anthropic.com/v1" : "https://api.openai.com/v1",
-          api: isAnthropic ? "anthropic-messages" : "openai-completions",
-          reasoning: false,
-          input: ["text", "image"] as Array<"text" | "image">,
-          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-          contextWindow: 200000,
-          maxTokens: 16384,
-        };
+      async ingest() {
+        return { ingested: true };
       },
-      wrapStreamFn: (ctx: ProviderWrapStreamFnContext) => {
-        const originalStreamFn = ctx.streamFn;
-        if (!originalStreamFn) return undefined;
 
-        return async (params) => {
-          const request = params as { body?: unknown };
-          if (!request.body) {
-            return originalStreamFn(params);
-          }
+      async assemble({ messages, system }: { messages: Message[]; system?: string }) {
+        const interceptConfig: InterceptConfig = {
+          enabled: true,
+          mode: "auto",
+          tag: "document",
+          separator: "---",
+          alpha: 0.001,
+          linkageMethod: "average",
+          scope: config.scope,
+        };
 
-          const body = structuredClone(request.body) as Record<string, unknown>;
-          const apiFormat = detectApiFormat(body, config.backendProvider);
+        // OpenClaw uses role: "toolResult" instead of Anthropic's user+tool_result blocks
+        // Convert to Anthropic format for our extractors
+        const convertedMessages = messages.map((msg, idx) => {
+          if (msg.role === "toolResult") {
+            const content = typeof msg.content === "string"
+              ? msg.content
+              : Array.isArray(msg.content)
+                ? (msg.content as any[]).map(b => b?.text || "").join("\n")
+                : "";
+            return {
+              role: "user",
+              content: [{
+                type: "tool_result",
+                tool_use_id: (msg as any).tool_use_id || (msg as any).toolUseId || `tool_${idx}`,
+                content: content,
+              }],
+            };
+          }
+          return msg;
+        });
 
-          const interceptConfig: InterceptConfig = {
-            enabled: true,
-            mode: "auto",
-            tag: "document",
-            separator: "---",
-            alpha: 0.001,
-            linkageMethod: "average",
-            scope: config.scope,
-          };
+        const convertedBody: Record<string, unknown> = {
+          messages: convertedMessages,
+          system: system,
+        };
 
-          const handler = getFormatHandler(apiFormat);
-          const multi = handler.extractAll(body, interceptConfig);
+        const handler = getFormatHandler("anthropic_messages");
+        const multi = handler.extractAll(convertedBody, interceptConfig);
 
-          const reorderDocs = async (docs: string[]): Promise<string[]> => {
-            if (docs.length < 2) {
-              return docs;
-            }
-            if (client) {
-              return reorderWithClient(client, docs);
-            }
-            if (engine) {
-              return reorderWithEngine(engine, docs);
-            }
+        const reorderDocs = (docs: string[]): string[] => {
+          if (docs.length < 2) {
             return docs;
-          };
-
-          if (multi.systemExtraction) {
-            const [extraction, sysIdx] = multi.systemExtraction;
-            if (extraction.documents.length >= 2) {
-              const reordered = await reorderDocs(extraction.documents);
-              handler.reconstructSystem(body, extraction, reordered, sysIdx);
-            }
           }
+          return reorderWithEngine(engine, docs);
+        };
 
-          for (const [extraction, location] of multi.toolExtractions) {
-            if (extraction.documents.length >= 2) {
-              const reordered = await reorderDocs(extraction.documents);
-              handler.reconstructToolResult(body, extraction, reordered, location);
-            }
+        // Reorder documents in system prompt
+        if (multi.systemExtraction) {
+          const [extraction, sysIdx] = multi.systemExtraction;
+          if (extraction.documents.length >= 2) {
+            const reordered = reorderDocs(extraction.documents);
+            handler.reconstructSystem(convertedBody, extraction, reordered, sysIdx);
           }
+        }
 
-          if (apiFormat === "openai_chat") {
-            const dedupResult = dedupChatCompletions(body);
-            totalCharsSaved += dedupResult.charsSaved;
+        // Reorder documents in tool results
+        for (const [extraction, location] of multi.toolExtractions) {
+          if (extraction.documents.length >= 2) {
+            const reordered = reorderDocs(extraction.documents);
+            handler.reconstructToolResult(convertedBody, extraction, reordered, location);
           }
-          if (body.input && Array.isArray(body.input)) {
-            const dedupResult = dedupResponsesApi(body);
-            totalCharsSaved += dedupResult.charsSaved;
+        }
+
+        // Map converted messages back to original format (toolResult role)
+        const finalMessages = (convertedBody.messages as any[]).map((msg, idx) => {
+          const original = messages[idx];
+          if (original?.role === "toolResult") {
+            const block = Array.isArray(msg.content) ? msg.content[0] : null;
+            const extractedContent = block?.content;
+
+            if (Array.isArray(original.content)) {
+              const newContentArray = (original.content as any[]).map(b => {
+                if (b?.type === "text" && typeof extractedContent === "string") {
+                  return { ...b, text: extractedContent };
+                }
+                return b;
+              });
+              return { ...original, content: newContentArray };
+            } else if (typeof extractedContent === "string") {
+              return { ...original, content: extractedContent };
+            }
+            return original;
           }
+          return msg;
+        });
+
+        // Build final body with potentially reordered messages
+        const finalBody: Record<string, unknown> = {
+          messages: finalMessages,
+          system: system,
+        };
+
+        // Deduplicate repeated content
+        const dedupResult = dedupChatCompletions(finalBody);
+        totalCharsSaved += dedupResult.charsSaved;
 
-          const optimizedBody = isSglang
-            ? body
-            : injectCacheControl(body, config.backendProvider === "anthropic" ? "anthropic" : "openai");
+        // Inject cache control markers
+        const optimizedBody = injectCacheControl(finalBody, "anthropic");
 
-          requestCount++;
+        assembleCount++;
 
-          return originalStreamFn({
-            ...params,
-            body: optimizedBody,
-          });
+        // Log savings periodically (every 5 requests or when significant savings)
+        if (dedupResult.charsSaved > 0 || assembleCount % 5 === 0) {
+          const estimatedTokensSaved = Math.round(totalCharsSaved / 4);
+          const estimatedCostSaved = (estimatedTokensSaved * 0.003 / 1000).toFixed(4); // $3/MTok input
+          console.error(`[ContextPilot] Stats: ${assembleCount} requests, ${totalCharsSaved.toLocaleString()} chars saved (~${estimatedTokensSaved.toLocaleString()} tokens, ~$${estimatedCostSaved})`);
+        }
+
+        // Return optimized messages
+        return {
+          messages: (optimizedBody.messages as Message[]) || messages,
+          system: optimizedBody.system as string | undefined,
+          estimatedTokens: 0,
         };
       },
-      augmentModelCatalog: () => {
-        if (config.backendProvider === "sglang") {
-          return [
-            { id: "default", name: "SGLang Default (ContextPilot)", provider: PROVIDER_ID },
-          ];
-        }
 
-        const isAnthropic = config.backendProvider === "anthropic";
-        if (isAnthropic) {
-          return [
-            { id: "claude-opus-4-6", name: "Claude Opus 4.6 (ContextPilot)", provider: PROVIDER_ID },
-            {
-              id: "claude-sonnet-4-6",
-              name: "Claude Sonnet 4.6 (ContextPilot)",
-              provider: PROVIDER_ID,
-            },
-          ];
-        }
-        return [
-          { id: "gpt-4o", name: "GPT-4o (ContextPilot)", provider: PROVIDER_ID },
-          { id: "gpt-4o-mini", name: "GPT-4o Mini (ContextPilot)", provider: PROVIDER_ID },
-        ];
+      async compact(params) {
+        return await delegateCompactionToRuntime(params);
       },
-    });
+    }));
 
+    // Register status tool
     api.registerTool({
       name: "contextpilot_status",
       description: "Report ContextPilot engine state",
       parameters: Type.Object({}),
       async execute(_toolCallId: string, _params: unknown) {
+        const stats = engine.getStats();
         const lines = [
           "ContextPilot Engine Status:",
-          `  Backend: ${config.backendProvider}`,
           `  Scope: ${config.scope}`,
-          `  Requests optimized: ${requestCount}`,
+          `  Contexts assembled: ${assembleCount}`,
           `  Total chars saved: ${totalCharsSaved.toLocaleString()}`,
+          `  Live index: ${engine.isLive ? "active" : "warming"}`,
+          `  Nodes: ${Number(stats.num_nodes ?? 0)}`,
+          `  Active nodes: ${Number(stats.active_nodes ?? 0)}`,
+          `  Requests tracked: ${Number(stats.num_requests ?? 0)}`,
+          `  Total searches: ${Number(stats.total_searches ?? 0)}`,
+          `  Total insertions: ${Number(stats.total_insertions ?? 0)}`,
+          `  Total removals: ${Number(stats.total_removals ?? 0)}`,
+          `  Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`,
         ];
 
-        if (engine) {
-          const stats = engine.getStats();
-          lines.push("  Mode: cloud-api (in-process ContextPilot engine)");
-          lines.push(`  Live index: ${engine.isLive ? "active" : "warming"}`);
-          lines.push(`  Nodes: ${Number(stats.num_nodes ?? 0)}`);
-          lines.push(`  Active nodes: ${Number(stats.active_nodes ?? 0)}`);
-          lines.push(`  Requests tracked: ${Number(stats.num_requests ?? 0)}`);
-          lines.push(`  Total searches: ${Number(stats.total_searches ?? 0)}`);
-          lines.push(`  Total insertions: ${Number(stats.total_insertions ?? 0)}`);
-          lines.push(`  Total removals: ${Number(stats.total_removals ?? 0)}`);
-          lines.push(`  Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`);
-        }
-
-        if (client) {
-          const [health, remoteStats] = await Promise.all([client.health(), client.getStats()]);
-          lines.push("  Mode: sglang (remote ContextPilot index)");
-          lines.push(`  Index server URL: ${config.indexServerUrl}`);
-          lines.push(`  Index server health: ${formatJson(health)}`);
-          lines.push(`  Index server stats: ${formatJson(remoteStats)}`);
-        }
-
         return {
           content: [
             {
diff --git a/openclaw-plugin/test-e2e.ts b/openclaw-plugin/test-e2e.ts
new file mode 100644
index 0000000..6b2f6f6
--- /dev/null
+++ b/openclaw-plugin/test-e2e.ts
@@ -0,0 +1,188 @@
+#!/usr/bin/env npx tsx
+/**
+ * E2E test for ContextPilot plugin
+ *
+ * Run: npx tsx test-e2e.ts
+ * Requires: ANTHROPIC_API_KEY in environment
+ */
+
+import { ContextPilot } from './src/engine/live-index.js';
+import { getFormatHandler, type InterceptConfig } from './src/engine/extract.js';
+import { injectCacheControl } from './src/engine/cache-control.js';
+import { dedupChatCompletions } from './src/engine/dedup.js';
+
+const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
+if (!ANTHROPIC_API_KEY) {
+  console.error('Error: ANTHROPIC_API_KEY not set');
+  process.exit(1);
+}
+
+// Simulated system prompt with multiple documents (like Claude Code's context)
+const systemPromptWithDocs = `You are a helpful coding assistant.
+
+<documents>
+<document>
+# File: src/index.ts
+export function main() {
+  console.log("Hello world");
+  const result = processData(getData());
+  return result;
+}
+
+function getData() {
+  return { items: [1, 2, 3, 4, 5] };
+}
+
+function processData(data: { items: number[] }) {
+  return data.items.map(x => x * 2);
+}
+</document>
+<document>
+# File: src/utils.ts
+export function formatOutput(data: number[]): string {
+  return data.join(', ');
+}
+
+export function validateInput(input: unknown): boolean {
+  return Array.isArray(input) && input.every(x => typeof x === 'number');
+}
+
+export function calculateSum(numbers: number[]): number {
+  return numbers.reduce((a, b) => a + b, 0);
+}
+</document>
+<document>
+# File: README.md
+# My Project
+
+This is a sample project demonstrating the ContextPilot optimization.
+
+## Installation
+npm install
+
+## Usage
+npm start
+
+## Features
+- Data processing
+- Input validation
+- Output formatting
+</document>
+</documents>
+
+Answer questions about the code above.`;
+
+// Build Anthropic Messages API request body
+const requestBody = {
+  model: 'claude-sonnet-4-6',
+  max_tokens: 256,
+  system: systemPromptWithDocs,
+  messages: [
+    {
+      role: 'user',
+      content: 'What does the main function do? Be brief.'
+    }
+  ]
+};
+
+async function runTest() {
+  console.log('=== ContextPilot E2E Test ===\n');
+
+  // 1. Initialize engine
+  const engine = new ContextPilot(0.001, false, 'average');
+  console.log('1. Engine initialized');
+
+  // 2. Extract documents
+  const interceptConfig: InterceptConfig = {
+    enabled: true,
+    mode: 'auto',
+    tag: 'document',
+    separator: '---',
+    alpha: 0.001,
+    linkageMethod: 'average',
+    scope: 'all'
+  };
+
+  const body = structuredClone(requestBody);
+  const handler = getFormatHandler('anthropic_messages');
+  const multi = handler.extractAll(body, interceptConfig);
+
+  console.log(`2. Extracted ${multi.totalDocuments} documents from system prompt`);
+
+  // 3. Reorder documents
+  if (multi.systemExtraction) {
+    const [extraction, sysIdx] = multi.systemExtraction;
+    console.log(`   Original order: ${extraction.documents.map((_, i) => i).join(', ')}`);
+
+    if (extraction.documents.length >= 2) {
+      const [reordered] = engine.reorder(extraction.documents);
+      if (Array.isArray(reordered) && Array.isArray(reordered[0])) {
+        const reorderedDocs = reordered[0] as string[];
+        handler.reconstructSystem(body, extraction, reorderedDocs, sysIdx);
+        console.log('3. Documents reordered for prefix cache optimization');
+      }
+    }
+  }
+
+  // 4. Inject cache control
+  const optimizedBody = injectCacheControl(body, 'anthropic');
+  console.log('4. Cache control markers injected');
+
+  // 5. Show optimization summary
+  const originalLen = JSON.stringify(requestBody).length;
+  const optimizedLen = JSON.stringify(optimizedBody).length;
+  console.log(`\n=== Optimization Summary ===`);
+  console.log(`Original request size: ${originalLen} chars`);
+  console.log(`Optimized request size: ${optimizedLen} chars`);
+  console.log(`Cache control added: ${JSON.stringify(optimizedBody).includes('cache_control')}`);
+
+  // 6. Make real API call
+  console.log('\n=== Making API Call ===');
+  console.log('Calling Anthropic API with optimized request...\n');
+
+  const response = await fetch('https://api.anthropic.com/v1/messages', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-api-key': ANTHROPIC_API_KEY,
+      'anthropic-version': '2023-06-01',
+      'anthropic-beta': 'prompt-caching-2024-07-31'
+    },
+    body: JSON.stringify(optimizedBody)
+  });
+
+  const result = await response.json();
+
+  if (result.error) {
+    console.error('API Error:', result.error);
+    process.exit(1);
+  }
+
+  console.log('=== Response ===');
+  console.log('Model:', result.model);
+  console.log('Stop reason:', result.stop_reason);
+  console.log('\nAssistant:', result.content?.[0]?.text || '(no text)');
+
+  console.log('\n=== Usage ===');
+  console.log('Input tokens:', result.usage?.input_tokens);
+  console.log('Output tokens:', result.usage?.output_tokens);
+  if (result.usage?.cache_creation_input_tokens) {
+    console.log('Cache creation tokens:', result.usage.cache_creation_input_tokens);
+  }
+  if (result.usage?.cache_read_input_tokens) {
+    console.log('Cache read tokens:', result.usage.cache_read_input_tokens);
+  }
+
+  console.log('\n=== Engine Stats ===');
+  const stats = engine.getStats();
+  console.log('Nodes:', stats.num_nodes);
+  console.log('Active nodes:', stats.active_nodes);
+  console.log('Total insertions:', stats.total_insertions);
+
+  console.log('\n✓ E2E test complete');
+}
+
+runTest().catch(err => {
+  console.error('Test failed:', err);
+  process.exit(1);
+});

From 5b408db634b7f947a35602383006c4f3dcc40dc8 Mon Sep 17 00:00:00 2001
From: dalongbao <ryantsui786@gmail.com>
Date: Fri, 3 Apr 2026 16:10:51 +0100
Subject: [PATCH 7/8] openclaw plugin bench

---
 openclaw-plugin/README.md    |  18 ++-
 openclaw-plugin/benchmark.sh | 242 +++++++++++++++++------------------
 2 files changed, 131 insertions(+), 129 deletions(-)

diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md
index c36fc7c..edd6dbd 100644
--- a/openclaw-plugin/README.md
+++ b/openclaw-plugin/README.md
@@ -114,14 +114,24 @@ openclaw-plugin/
 
 ## Verifying It Works
 
-Check the gateway logs for ContextPilot output:
+Check the gateway logs:
 
 ```
-[ContextPilot] assemble() called with 84 messages
-[ContextPilot] Extractions found - system: 0 tool: 1 singleDoc: 3
-[ContextPilot] Optimization complete. Chars saved: 2389
+[ContextPilot] Stats: 5 requests, 28,356 chars saved (~7,089 tokens, ~$0.0213)
 ```
 
+## Expected Savings
+
+Savings depend on conversation length and repeated content:
+
+| Scenario | Chars Saved | Token Reduction |
+|:---------|------------:|----------------:|
+| Short session (few tool calls) | 0-5K | ~0-5% |
+| Medium session (10+ file reads) | 20-50K | ~10-20% |
+| Long session (repeated large files) | 100K+ | ~30-50% |
+
+Run `./benchmark.sh` to measure with/without comparison on your workload.
+
 ## License
 
 Apache-2.0
diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh
index b913178..02dd583 100755
--- a/openclaw-plugin/benchmark.sh
+++ b/openclaw-plugin/benchmark.sh
@@ -1,183 +1,175 @@
 #!/bin/bash
 #
-# ContextPilot OpenClaw Plugin Benchmark
-# Compares token usage and cache hits with and without the plugin
-#
-# Usage: ./benchmark.sh [num_iterations]
+# ContextPilot Token Usage Benchmark
+# Compares prefill/input tokens with and without the plugin
 #
 
 set -e
 
-NUM_ITERATIONS=${1:-3}
 OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json"
-BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.backup"
-GATEWAY_LOG="/tmp/gw-benchmark.log"
+BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak"
+LOG_WITH="/tmp/gw-with-cp.log"
+LOG_WITHOUT="/tmp/gw-without-cp.log"
 
-# Test that triggers multiple file reads to show dedup benefit
-TEST_FILES=(
-    "/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts"
-    "/home/ryan/ContextPilot/openclaw-plugin/src/engine/cache-control.ts"
-    "/home/ryan/ContextPilot/openclaw-plugin/src/index.ts"
-)
+TEST_FILE="/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts"
 
 echo "=========================================="
-echo "ContextPilot OpenClaw Plugin Benchmark"
+echo "ContextPilot Token Usage Benchmark"
 echo "=========================================="
-echo "Iterations: $NUM_ITERATIONS"
-echo ""
 
 # Backup config
 cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG"
 
 cleanup() {
     echo ""
-    echo "Restoring original config..."
+    echo "Restoring config..."
     cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG"
     rm -f "$BACKUP_CONFIG"
-    pkill -9 -f "openclaw gateway" 2>/dev/null || true
+    openclaw gateway stop 2>/dev/null || pkill -9 -f "openclaw" 2>/dev/null || true
 }
 trap cleanup EXIT
 
-restart_gateway() {
-    pkill -9 -f "openclaw gateway" 2>/dev/null || true
-    sleep 2
-    openclaw gateway > "$GATEWAY_LOG" 2>&1 &
-    sleep 5
+enable_contextpilot() {
+    python3 << 'PYTHON'
+import json, os
+path = os.path.expanduser("~/.openclaw/openclaw.json")
+with open(path) as f: c = json.load(f)
+c.setdefault('plugins', {}).setdefault('slots', {})['contextEngine'] = 'contextpilot'
+c['plugins'].setdefault('entries', {}).setdefault('contextpilot', {})['enabled'] = True
+with open(path, 'w') as f: json.dump(c, f, indent=2)
+PYTHON
 }
 
-run_multi_read_test() {
-    local label=$1
-
-    echo "Running $label test..."
-    echo "  Reading ${#TEST_FILES[@]} files multiple times to trigger dedup..."
-
-    # First, read all files
-    for f in "${TEST_FILES[@]}"; do
-        openclaw agent --agent main --message "Read $f" > /dev/null 2>&1
-    done
+disable_contextpilot() {
+    python3 << 'PYTHON'
+import json, os
+path = os.path.expanduser("~/.openclaw/openclaw.json")
+with open(path) as f: c = json.load(f)
+if 'plugins' in c:
+    c['plugins'].get('slots', {}).pop('contextEngine', None)
+    if 'contextpilot' in c['plugins'].get('entries', {}):
+        c['plugins']['entries']['contextpilot']['enabled'] = False
+with open(path, 'w') as f: json.dump(c, f, indent=2)
+PYTHON
+}
 
-    # Then read them again (should trigger dedup on second pass)
-    for f in "${TEST_FILES[@]}"; do
-        openclaw agent --agent main --message "Read $f again and count lines" > /dev/null 2>&1
-    done
+restart_gateway() {
+    local logfile=$1
+    echo "  Stopping gateway..."
+    openclaw gateway stop 2>/dev/null || true
+    pkill -9 -f "openclaw" 2>/dev/null || true
+    sleep 3
+    echo "  Starting gateway..."
+    openclaw gateway > "$logfile" 2>&1 &
+    sleep 6
+    if ! pgrep -f "openclaw" > /dev/null; then
+        echo "  ERROR: Gateway failed to start"
+        cat "$logfile" | tail -10
+        exit 1
+    fi
+    echo "  Gateway running."
+}
 
+run_test_sequence() {
+    echo "  Reading file 3 times to build up context..."
+    timeout 60 openclaw agent --agent main --message "Read $TEST_FILE and count functions" > /dev/null 2>&1 || true
+    timeout 60 openclaw agent --agent main --message "Read $TEST_FILE again" > /dev/null 2>&1 || true
+    timeout 60 openclaw agent --agent main --message "Read $TEST_FILE one more time and summarize" > /dev/null 2>&1 || true
     echo "  Done."
 }
 
-extract_stats() {
-    local log_file=$1
-
-    # Extract chars saved
-    local chars_saved=$(grep -oP "Chars saved: \K\d+" "$log_file" 2>/dev/null | tail -1 || echo "0")
-
-    # Extract cache stats from usage blocks
-    local cache_read=$(grep -oP '"cacheRead": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
-    local cache_write=$(grep -oP '"cacheWrite": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
-    local input_tokens=$(grep -oP '"input": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0")
+extract_last_usage() {
+    local logfile=$1
+    # Find the last complete usage block and extract values
+    local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
+    local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
+    local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
+    echo "$input $cache_read $cache_write"
+}
 
-    echo "$chars_saved $cache_read $cache_write $input_tokens"
+extract_chars_saved() {
+    local logfile=$1
+    # Look for ContextPilot stats line
+    grep "Stats:" "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+(?= chars saved)' || echo "0"
 }
 
 # ==========================================
-# Test WITH ContextPilot enabled
+# Test WITH ContextPilot
 # ==========================================
-echo "----------------------------------------"
+echo ""
 echo "Test 1: WITH ContextPilot enabled"
 echo "----------------------------------------"
+enable_contextpilot
+restart_gateway "$LOG_WITH"
+run_test_sequence
 
-# Ensure plugin is enabled
-python3 << 'PYTHON'
-import json
-config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"])
-with open(config_path, 'r') as f:
-    config = json.load(f)
-if 'plugins' not in config:
-    config['plugins'] = {}
-if 'slots' not in config['plugins']:
-    config['plugins']['slots'] = {}
-config['plugins']['slots']['contextEngine'] = 'contextpilot'
-if 'entries' not in config['plugins']:
-    config['plugins']['entries'] = {}
-if 'contextpilot' not in config['plugins']['entries']:
-    config['plugins']['entries']['contextpilot'] = {}
-config['plugins']['entries']['contextpilot']['enabled'] = True
-with open(config_path, 'w') as f:
-    json.dump(config, f, indent=2)
-PYTHON
-
-restart_gateway
-run_multi_read_test "WITH_CONTEXTPILOT"
-
-WITH_STATS=$(extract_stats "$GATEWAY_LOG")
-WITH_CHARS=$(echo $WITH_STATS | cut -d' ' -f1)
-WITH_CACHE_READ=$(echo $WITH_STATS | cut -d' ' -f2)
-WITH_CACHE_WRITE=$(echo $WITH_STATS | cut -d' ' -f3)
-WITH_INPUT=$(echo $WITH_STATS | cut -d' ' -f4)
+WITH_USAGE=$(extract_last_usage "$LOG_WITH")
+WITH_INPUT=$(echo $WITH_USAGE | cut -d' ' -f1)
+WITH_CACHE_READ=$(echo $WITH_USAGE | cut -d' ' -f2)
+WITH_CACHE_WRITE=$(echo $WITH_USAGE | cut -d' ' -f3)
+WITH_CHARS=$(extract_chars_saved "$LOG_WITH")
 
 echo ""
-echo "  Chars saved by dedup: $WITH_CHARS"
-echo "  Cache read tokens: $WITH_CACHE_READ"
-echo "  Cache write tokens: $WITH_CACHE_WRITE"
-echo "  Input tokens: $WITH_INPUT"
+echo "  Results:"
+echo "    Input tokens:  $WITH_INPUT"
+echo "    Cache read:    $WITH_CACHE_READ"
+echo "    Cache write:   $WITH_CACHE_WRITE"
+echo "    Chars deduped: $WITH_CHARS"
 
 # ==========================================
-# Test WITHOUT ContextPilot (disabled)
+# Test WITHOUT ContextPilot
 # ==========================================
 echo ""
+echo "Test 2: WITHOUT ContextPilot"
 echo "----------------------------------------"
-echo "Test 2: WITHOUT ContextPilot (disabled)"
-echo "----------------------------------------"
+disable_contextpilot
+restart_gateway "$LOG_WITHOUT"
+run_test_sequence
 
-# Disable the plugin
-python3 << 'PYTHON'
-import json
-config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"])
-with open(config_path, 'r') as f:
-    config = json.load(f)
-if 'plugins' in config:
-    if 'slots' in config['plugins']:
-        config['plugins']['slots'].pop('contextEngine', None)
-    if 'entries' in config['plugins'] and 'contextpilot' in config['plugins']['entries']:
-        config['plugins']['entries']['contextpilot']['enabled'] = False
-with open(config_path, 'w') as f:
-    json.dump(config, f, indent=2)
-PYTHON
-
-restart_gateway
-run_multi_read_test "WITHOUT_CONTEXTPILOT"
-
-WITHOUT_STATS=$(extract_stats "$GATEWAY_LOG")
-WITHOUT_CHARS=$(echo $WITHOUT_STATS | cut -d' ' -f1)
-WITHOUT_CACHE_READ=$(echo $WITHOUT_STATS | cut -d' ' -f2)
-WITHOUT_CACHE_WRITE=$(echo $WITHOUT_STATS | cut -d' ' -f3)
-WITHOUT_INPUT=$(echo $WITHOUT_STATS | cut -d' ' -f4)
+WITHOUT_USAGE=$(extract_last_usage "$LOG_WITHOUT")
+WITHOUT_INPUT=$(echo $WITHOUT_USAGE | cut -d' ' -f1)
+WITHOUT_CACHE_READ=$(echo $WITHOUT_USAGE | cut -d' ' -f2)
+WITHOUT_CACHE_WRITE=$(echo $WITHOUT_USAGE | cut -d' ' -f3)
 
 echo ""
-echo "  Chars saved by dedup: $WITHOUT_CHARS (expected: 0)"
-echo "  Cache read tokens: $WITHOUT_CACHE_READ"
-echo "  Cache write tokens: $WITHOUT_CACHE_WRITE"
-echo "  Input tokens: $WITHOUT_INPUT"
+echo "  Results:"
+echo "    Input tokens:  $WITHOUT_INPUT"
+echo "    Cache read:    $WITHOUT_CACHE_READ"
+echo "    Cache write:   $WITHOUT_CACHE_WRITE"
+echo "    Chars deduped: 0 (plugin disabled)"
 
 # ==========================================
-# Results Summary
+# Summary
 # ==========================================
 echo ""
 echo "=========================================="
-echo "RESULTS SUMMARY"
+echo "COMPARISON"
 echo "=========================================="
 echo ""
-echo "                        WITH      WITHOUT"
-echo "                     ContextPilot  Plugin"
-echo "----------------------------------------"
-printf "Chars deduped:       %8s    %8s\n" "$WITH_CHARS" "$WITHOUT_CHARS"
-printf "Cache read tokens:   %8s    %8s\n" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ"
-printf "Cache write tokens:  %8s    %8s\n" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE"
-printf "Input tokens:        %8s    %8s\n" "$WITH_INPUT" "$WITHOUT_INPUT"
+printf "%-20s %12s %12s\n" "" "WITH CP" "WITHOUT CP"
+printf "%-20s %12s %12s\n" "--------------------" "------------" "------------"
+printf "%-20s %12s %12s\n" "Input tokens" "$WITH_INPUT" "$WITHOUT_INPUT"
+printf "%-20s %12s %12s\n" "Cache read" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ"
+printf "%-20s %12s %12s\n" "Cache write" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE"
+printf "%-20s %12s %12s\n" "Chars deduped" "$WITH_CHARS" "0"
 echo ""
 
-if [ "$WITH_CHARS" -gt "0" ]; then
-    echo "ContextPilot deduplication saved $WITH_CHARS characters"
-    # Rough estimate: 4 chars per token
+# Calculate differences
+if [ "$WITH_INPUT" -gt 0 ] && [ "$WITHOUT_INPUT" -gt 0 ]; then
+    if [ "$WITH_INPUT" -lt "$WITHOUT_INPUT" ]; then
+        diff=$((WITHOUT_INPUT - WITH_INPUT))
+        pct=$((diff * 100 / WITHOUT_INPUT))
+        echo ">>> ContextPilot reduced input tokens by $diff ($pct% savings)"
+    elif [ "$WITH_INPUT" -gt "$WITHOUT_INPUT" ]; then
+        diff=$((WITH_INPUT - WITHOUT_INPUT))
+        pct=$((diff * 100 / WITHOUT_INPUT))
+        echo ">>> ContextPilot added $diff tokens ($pct% overhead)"
+    else
+        echo ">>> No difference in input tokens"
+    fi
+fi
+
+if [ "$WITH_CHARS" -gt 0 ]; then
     tokens_saved=$((WITH_CHARS / 4))
-    echo "Estimated token savings: ~$tokens_saved tokens"
+    echo ">>> Deduplication removed ~$tokens_saved tokens worth of repeated content"
 fi

From a388ae59c80cede82d939d3b2a53b0794c0edfb3 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 7 Apr 2026 00:09:39 +0200
Subject: [PATCH 8/8] fix for tests

---
 contextpilot/server/http_server.py            | 109 ++++++++------
 openclaw-plugin/benchmark.sh                  |  27 ++--
 openclaw-plugin/openclaw.plugin.json          |   4 +-
 .../src/engine/compute-distance.ts            |  10 --
 .../src/engine/conversation-tracker.ts        |  12 +-
 openclaw-plugin/src/engine/dedup.ts           |   3 +-
 openclaw-plugin/src/engine/eviction-heap.ts   |   4 +-
 openclaw-plugin/src/engine/inter-scheduler.ts |   6 +-
 openclaw-plugin/src/engine/live-index.ts      | 138 +++++++++---------
 tests/test_http_intercept.py                  |  11 +-
 10 files changed, 169 insertions(+), 155 deletions(-)

diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py
index 82ecadd..38f9b1e 100644
--- a/contextpilot/server/http_server.py
+++ b/contextpilot/server/http_server.py
@@ -129,9 +129,10 @@ class _InterceptConvState:
     last_message_count: int = 0
 
 
-# Per-session state dict keyed by session fingerprint (hash of first user msg).
+# Per-session state dict keyed by session fingerprint (system prompt + first user msg).
 # This allows concurrent multi-user sessions to each maintain their own state.
 _intercept_states: dict[str, _InterceptConvState] = {}
+_intercept_states_lock = asyncio.Lock()
 _MAX_TRACKED_SESSIONS = 64  # LRU eviction threshold
 
 # TTFT tracking for averages across a session
@@ -1181,32 +1182,47 @@ def _hash_text(text: str) -> str:
 
 
 def _session_fingerprint(body: Dict[str, Any]) -> str:
-    """Derive a session fingerprint from the first user message.
+    """Derive a session fingerprint from the system prompt + first user message.
 
-    In a multi-turn conversation, messages grow but the first user message
-    stays constant.  Hashing it gives a stable per-session key that lets
-    concurrent users each maintain their own intercept state.
+    In a multi-turn conversation, messages grow but the system prompt and
+    first user message stay constant.  Hashing both gives a stable per-session
+    key that lets concurrent users each maintain their own intercept state,
+    even if different users share the same first user message.
     """
     msgs = body.get("messages") or []
+    parts_to_hash: list[str] = []
+
+    # Include system prompt for differentiation between sessions
+    system = body.get("system")
+    if system:
+        parts_to_hash.append(str(system)[:500])
+
     # Find the first user message (usually msg[0] or msg[1] after system)
-    for msg in msgs[:3]:
-        if isinstance(msg, dict) and msg.get("role") == "user":
+    for msg in msgs[:5]:
+        if isinstance(msg, dict) and msg.get("role") == "system":
+            parts_to_hash.append(str(msg.get("content", ""))[:500])
+        elif isinstance(msg, dict) and msg.get("role") == "user":
             content = msg.get("content", "")
             if isinstance(content, list):
                 # OpenAI format: [{type: text, text: "..."}]
-                parts = [p.get("text", "") for p in content
-                         if isinstance(p, dict)]
-                content = "".join(parts)
-            return _hash_text(str(content))
-    # Fallback: hash all messages (shouldn't happen in practice)
-    return _hash_text(json.dumps(msgs[:2], sort_keys=True))
+                text_parts = [p.get("text", "") for p in content
+                              if isinstance(p, dict)]
+                content = "".join(text_parts)
+            parts_to_hash.append(str(content))
+            break
+
+    if not parts_to_hash:
+        # Fallback: hash first two messages
+        return _hash_text(json.dumps(msgs[:2], sort_keys=True))
+
+    return _hash_text("\x00".join(parts_to_hash))
 
 
-def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
+async def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
     """Return per-session intercept state, creating or resetting as needed.
 
-    Uses the first user message as a session fingerprint so concurrent
-    multi-user sessions each get their own state.
+    Uses the system prompt + first user message as a session fingerprint so
+    concurrent multi-user sessions each get their own state.
 
     Detection: in a multi-turn agent conversation the messages array only
     grows.  If the count drops, either a new session started or the host
@@ -1218,31 +1234,32 @@ def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
     session_key = _session_fingerprint(body)
     msg_count = len(body.get("messages") or [])
 
-    state = _intercept_states.get(session_key)
+    async with _intercept_states_lock:
+        state = _intercept_states.get(session_key)
 
-    if state is None:
-        # New session
-        state = _InterceptConvState()
-        state.system_processed = True
-        logger.info(
-            f"Intercept: new session {session_key[:8]}… "
-            f"({msg_count} msgs, {len(_intercept_states)} active sessions)"
-        )
-        # Evict oldest sessions if over limit
-        if len(_intercept_states) >= _MAX_TRACKED_SESSIONS:
-            oldest_key = next(iter(_intercept_states))
-            del _intercept_states[oldest_key]
-            logger.info(f"Intercept: evicted session {oldest_key[:8]}…")
-        _intercept_states[session_key] = state
-    elif msg_count < state.last_message_count:
-        logger.info(
-            f"Intercept: session {session_key[:8]}… message count dropped "
-            f"({msg_count} < {state.last_message_count}), "
-            f"resetting state (compaction or restart)"
-        )
-        state = _InterceptConvState()
-        state.system_processed = True
-        _intercept_states[session_key] = state
+        if state is None:
+            # New session
+            state = _InterceptConvState()
+            state.system_processed = True
+            logger.info(
+                f"Intercept: new session {session_key[:8]}… "
+                f"({msg_count} msgs, {len(_intercept_states)} active sessions)"
+            )
+            # Evict oldest sessions if over limit
+            if len(_intercept_states) >= _MAX_TRACKED_SESSIONS:
+                oldest_key = next(iter(_intercept_states))
+                del _intercept_states[oldest_key]
+                logger.info(f"Intercept: evicted session {oldest_key[:8]}…")
+            _intercept_states[session_key] = state
+        elif msg_count < state.last_message_count:
+            logger.info(
+                f"Intercept: session {session_key[:8]}… message count dropped "
+                f"({msg_count} < {state.last_message_count}), "
+                f"resetting state (compaction or restart)"
+            )
+            state = _InterceptConvState()
+            state.system_processed = True
+            _intercept_states[session_key] = state
 
     state.last_message_count = msg_count
     return state
@@ -1303,8 +1320,6 @@ def _strip_external_content_ids(body: Any) -> Any:
     "proxy-authenticate", "content-length",
 ))
 
-# Previous message hashes for prefix divergence detection.
-_debug_prev_msg_hashes: List[str] = []
 
 
 def _doc_preview(doc: str, max_len: int = 60) -> str:
@@ -1494,8 +1509,6 @@ async def _intercept_and_forward(request: Request, api_format: str):
         f"Intercept: session={_session_tag} {_debug_msg_count} msgs"
     )
 
-    _debug_prev_msg_hashes = list(_debug_msg_hashes)
-
     # ── Format handler (strategy pattern) ────────────────────────────
     handler = get_format_handler(api_format)
 
@@ -1504,7 +1517,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
             body = copy.deepcopy(body)
 
             # ── Conversation-aware state (single-conversation model) ──
-            state = _get_intercept_state(body)
+            state = await _get_intercept_state(body)
 
             # ── Replace old messages with cached (modified) versions ──
             # On subsequent turns, the host sends original (unmodified)
@@ -1952,9 +1965,9 @@ async def proxy_engine(path: str, request: Request):
                     body["rid"] = request_id
                     body["request_id"] = request_id
 
-            body["temperature"] = 0
+            body.setdefault("temperature", 0)
             if _cloud_mode:
-                body["top_p"] = 0
+                body.setdefault("top_p", 0)
 
             dedup_result = DedupResult()
             try:
@@ -2167,7 +2180,7 @@ def main():
         os.environ["CONTEXTPILOT_CLOUD_API_KEY"] = args.cloud_api_key
 
     # Also set global config for direct access
-    global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode
+    global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode, _chunk_modulus
     _max_tokens = args.max_tokens
     _infer_api_url = args.infer_api_url.rstrip("/")
     _stateless_mode = args.stateless
diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh
index 02dd583..a703866 100755
--- a/openclaw-plugin/benchmark.sh
+++ b/openclaw-plugin/benchmark.sh
@@ -11,7 +11,8 @@ BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak"
 LOG_WITH="/tmp/gw-with-cp.log"
 LOG_WITHOUT="/tmp/gw-without-cp.log"
 
-TEST_FILE="/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_FILE="${SCRIPT_DIR}/src/engine/dedup.ts"
 
 echo "=========================================="
 echo "ContextPilot Token Usage Benchmark"
@@ -25,7 +26,7 @@ cleanup() {
     echo "Restoring config..."
     cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG"
     rm -f "$BACKUP_CONFIG"
-    openclaw gateway stop 2>/dev/null || pkill -9 -f "openclaw" 2>/dev/null || true
+    openclaw gateway stop 2>/dev/null || pkill -f "openclaw gateway" 2>/dev/null || true
 }
 trap cleanup EXIT
 
@@ -57,7 +58,7 @@ restart_gateway() {
     local logfile=$1
     echo "  Stopping gateway..."
     openclaw gateway stop 2>/dev/null || true
-    pkill -9 -f "openclaw" 2>/dev/null || true
+    pkill -f "openclaw gateway" 2>/dev/null || true
     sleep 3
     echo "  Starting gateway..."
     openclaw gateway > "$logfile" 2>&1 &
@@ -81,16 +82,16 @@ run_test_sequence() {
 extract_last_usage() {
     local logfile=$1
     # Find the last complete usage block and extract values
-    local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
-    local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
-    local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0")
+    local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0")
+    local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0")
+    local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0")
     echo "$input $cache_read $cache_write"
 }
 
 extract_chars_saved() {
     local logfile=$1
     # Look for ContextPilot stats line
-    grep "Stats:" "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+(?= chars saved)' || echo "0"
+    grep "Stats:" "$logfile" 2>/dev/null | tail -1 | sed -n 's/.*\([0-9][0-9,]*\) chars saved.*/\1/p' | tr -d ',' || echo "0"
 }
 
 # ==========================================
@@ -104,9 +105,9 @@ restart_gateway "$LOG_WITH"
 run_test_sequence
 
 WITH_USAGE=$(extract_last_usage "$LOG_WITH")
-WITH_INPUT=$(echo $WITH_USAGE | cut -d' ' -f1)
-WITH_CACHE_READ=$(echo $WITH_USAGE | cut -d' ' -f2)
-WITH_CACHE_WRITE=$(echo $WITH_USAGE | cut -d' ' -f3)
+WITH_INPUT=$(echo "$WITH_USAGE" | cut -d' ' -f1)
+WITH_CACHE_READ=$(echo "$WITH_USAGE" | cut -d' ' -f2)
+WITH_CACHE_WRITE=$(echo "$WITH_USAGE" | cut -d' ' -f3)
 WITH_CHARS=$(extract_chars_saved "$LOG_WITH")
 
 echo ""
@@ -127,9 +128,9 @@ restart_gateway "$LOG_WITHOUT"
 run_test_sequence
 
 WITHOUT_USAGE=$(extract_last_usage "$LOG_WITHOUT")
-WITHOUT_INPUT=$(echo $WITHOUT_USAGE | cut -d' ' -f1)
-WITHOUT_CACHE_READ=$(echo $WITHOUT_USAGE | cut -d' ' -f2)
-WITHOUT_CACHE_WRITE=$(echo $WITHOUT_USAGE | cut -d' ' -f3)
+WITHOUT_INPUT=$(echo "$WITHOUT_USAGE" | cut -d' ' -f1)
+WITHOUT_CACHE_READ=$(echo "$WITHOUT_USAGE" | cut -d' ' -f2)
+WITHOUT_CACHE_WRITE=$(echo "$WITHOUT_USAGE" | cut -d' ' -f3)
 
 echo ""
 echo "  Results:"
diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json
index f9faee6..a8c336e 100644
--- a/openclaw-plugin/openclaw.plugin.json
+++ b/openclaw-plugin/openclaw.plugin.json
@@ -2,14 +2,14 @@
   "id": "contextpilot",
   "name": "ContextPilot",
   "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.",
-  "version": "0.3.0",
+  "version": "0.2.0",
   "configSchema": {
     "type": "object",
     "additionalProperties": false,
     "properties": {
       "scope": {
         "type": "string",
-        "enum": ["all", "system", "tool_results"],
+        "enum": ["all", "tool_results"],
         "description": "Which messages ContextPilot optimizes",
         "default": "all"
       }
diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts
index 8aad2d5..5ae024f 100644
--- a/openclaw-plugin/src/engine/compute-distance.ts
+++ b/openclaw-plugin/src/engine/compute-distance.ts
@@ -60,21 +60,11 @@ export function computeDistancesBatch(
         return Array.from({ length: nQueries }, () => new Array<number>(nTargets).fill(0));
     }
 
-    const totalPairs = nQueries * nTargets;
     const distances: number[][] = Array.from(
         { length: nQueries },
         () => new Array<number>(nTargets).fill(1.0)
     );
 
-    if (totalPairs < 1000) {
-        for (let i = 0; i < nQueries; i += 1) {
-            for (let j = 0; j < nTargets; j += 1) {
-                distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha);
-            }
-        }
-        return distances;
-    }
-
     for (let i = 0; i < nQueries; i += 1) {
         for (let j = 0; j < nTargets; j += 1) {
             distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha);
diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts
index 845ee68..5e56f39 100644
--- a/openclaw-plugin/src/engine/conversation-tracker.ts
+++ b/openclaw-plugin/src/engine/conversation-tracker.ts
@@ -26,16 +26,18 @@ export interface ConversationTrackerStats {
 export class ConversationTracker {
     private _requests: Map<string, RequestHistory>;
     private _hintTemplate: string;
+    private _maxTrackedRequests: number;
     private _stats: {
         totalRequests: number;
         totalDedupCalls: number;
         totalDocsDeduplicated: number;
     };
 
-    constructor(hintTemplate?: string) {
+    constructor(hintTemplate?: string, maxTrackedRequests: number = 256) {
         this._requests = new Map<string, RequestHistory>();
         this._hintTemplate =
             hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn.";
+        this._maxTrackedRequests = maxTrackedRequests;
         this._stats = {
             totalRequests: 0,
             totalDedupCalls: 0,
@@ -63,6 +65,14 @@ export class ConversationTracker {
         this._requests.set(requestId, history);
         this._stats.totalRequests += 1;
 
+        // LRU eviction: remove oldest entries when over limit
+        if (this._requests.size > this._maxTrackedRequests) {
+            const oldest = this._requests.keys().next().value;
+            if (oldest !== undefined) {
+                this._requests.delete(oldest);
+            }
+        }
+
         return history;
     }
 
diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts
index eb3acb0..bb173f1 100644
--- a/openclaw-plugin/src/engine/dedup.ts
+++ b/openclaw-plugin/src/engine/dedup.ts
@@ -75,7 +75,8 @@ function emptyDedupResult(): DedupResult {
 export function hashString(str: string): number {
     let h = 5381;
     for (let i = 0; i < str.length; i++) {
-        h = ((h << 5) + h + str.charCodeAt(i)) & 0xFFFFFFFF;
+        // Use Math.imul for safe 32-bit multiplication to avoid float overflow
+        h = (Math.imul(h, 33) + str.charCodeAt(i)) | 0;
     }
     return h >>> 0;
 }
diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts
index 69de6ff..2c61c43 100644
--- a/openclaw-plugin/src/engine/eviction-heap.ts
+++ b/openclaw-plugin/src/engine/eviction-heap.ts
@@ -215,9 +215,7 @@ export class EvictionHeap {
             this._metadata.delete(nodeId);
         }
 
-        if (this._inHeap.has(nodeId)) {
-            this._inHeap.set(nodeId, false);
-        }
+        this._inHeap.delete(nodeId);
     }
 
     getNodeByRequestId(requestId: string): NodeMetadata | null {
diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts
index 702eebc..88ad6e3 100644
--- a/openclaw-plugin/src/engine/inter-scheduler.ts
+++ b/openclaw-plugin/src/engine/inter-scheduler.ts
@@ -15,7 +15,7 @@ export class InterContextScheduler {
         const searchPaths = clusteringResult.searchPaths;
 
         const groupsByRoot = this._groupByRootPrefix(searchPaths);
-        const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths, reorderedContexts);
+        const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths);
 
         const allGroupsWithInfo: Array<[number, number[]]> = [];
         for (const groupIndices of sortedGroups) {
@@ -61,10 +61,8 @@ export class InterContextScheduler {
 
     _sortGroupsByPathLength(
         groupsByRoot: Map<number, number[]>,
-        searchPaths: number[][],
-        contexts: number[][]
+        searchPaths: number[][]
     ): number[][] {
-        void contexts;
         const sortedGroups: number[][] = [];
 
         for (const groupIndices of groupsByRoot.values()) {
diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts
index 777b1e8..29ad83a 100644
--- a/openclaw-plugin/src/engine/live-index.ts
+++ b/openclaw-plugin/src/engine/live-index.ts
@@ -6,7 +6,7 @@ import { IntraContextOrderer } from './intra-ordering.js';
 import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js';
 import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js';
 import { EvictionHeap } from './eviction-heap.js';
-import crypto from 'crypto';
+import * as crypto from 'node:crypto';
 
 export function computePrefixLength(list1: number[], list2: number[]): number {
     let length = 0;
@@ -50,8 +50,8 @@ export class ContextPilot extends ContextIndex {
     
     static readonly _DEFAULT_CONVERSATION = "_default";
 
-    constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: string = "average", batchSize: number = 10000) {
-        super(alpha, useGpu, linkageMethod, batchSize);
+    constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: "single" | "complete" | "average" = "average", batchSize: number = 10000) {
+        super({ alpha, useGpu, linkageMethod, batchSize });
     }
 
     getAllRequestIds(): Set<string> {
@@ -448,14 +448,15 @@ export class ContextPilot extends ContextIndex {
 
         const newNodeId = this.nextNodeId++;
         const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []);
-        const originalIndices = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set<number>();
-        
+        const originalIndices: Set<number> = sourceNode.originalIndices ? new Set<number>(sourceNode.originalIndices) : new Set<number>();
+
         const newNode = new ClusterNode(
             newNodeId,
-            content,
+            new Set<number>(content),
+            originalIndices,
+            0.0,
             [],
-            parentId,
-            originalIndices
+            parentId
         );
         
         if (sourceNode.docIds) {
@@ -475,15 +476,14 @@ export class ContextPilot extends ContextIndex {
 
         const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0;
         
-        const metadata = new NodeMetadata(
-            newNodeId,
-            isLeaf ? initialTokens : 0,
-            isLeaf ? Math.max(0, initialTokens - parentTokens) : 0,
+        const metadata = new NodeMetadata(newNodeId, {
+            totalTokens: isLeaf ? initialTokens : 0,
+            extraTokens: isLeaf ? Math.max(0, initialTokens - parentTokens) : 0,
             searchPath,
-            sourceNode.docIds ? [...sourceNode.docIds] : null,
+            docIds: sourceNode.docIds ? [...sourceNode.docIds] : null,
             isLeaf,
-            requestId
-        );
+            requestId,
+        });
         
         this.metadata.set(newNodeId, metadata);
 
@@ -629,15 +629,14 @@ export class ContextPilot extends ContextIndex {
                 leafDocIds = node.docIds || node.doc_ids;
             }
 
-            const metadata = new NodeMetadata(
-                nodeId,
+            const metadata = new NodeMetadata(nodeId, {
                 totalTokens,
                 extraTokens,
                 searchPath,
-                leafDocIds,
+                docIds: leafDocIds,
                 isLeaf,
-                requestId
-            );
+                requestId,
+            });
 
             this.metadata.set(nodeId, metadata);
 
@@ -767,7 +766,8 @@ export class ContextPilot extends ContextIndex {
             if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) {
                 const docs = this._getNodeDocs(currentId);
                 if (docs && currentId !== this.rootId) {
-                    const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                    const docsSet = new Set(docs);
+                    const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length;
                     const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false;
                     return [currentPath, currentId, overlap, hasPrefix];
                 }
@@ -798,7 +798,8 @@ export class ContextPilot extends ContextIndex {
 
             for (let j = 0; j < childIds.length; j++) {
                 const docs = childDocsList[j];
-                const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                const docsSet = new Set(docs);
+                const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length;
                 if (overlap === 0) continue;
                 
                 const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j];
@@ -814,7 +815,8 @@ export class ContextPilot extends ContextIndex {
                 if (currentId !== this.rootId) {
                     const docs = this._getNodeDocs(currentId);
                     if (docs) {
-                        const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length;
+                        const docsSet2 = new Set(docs);
+                        const overlap = Array.from(contextSet).filter(x => docsSet2.has(x)).length;
                         return [currentPath, currentId, overlap, true];
                     }
                 }
@@ -930,10 +932,11 @@ export class ContextPilot extends ContextIndex {
         const newNodeId = this.nextNodeId++;
         const newNode = new ClusterNode(
             newNodeId,
-            context,
+            new Set(context),
+            new Set([newNodeId]),
+            0.0,
             [],
-            parentNode.nodeId,
-            new Set([newNodeId])
+            parentNode.nodeId
         );
 
         this.nodes.set(newNodeId, newNode);
@@ -942,15 +945,14 @@ export class ContextPilot extends ContextIndex {
         const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
         const newSearchPath = [...searchPath, parentNode.children.length - 1];
 
-        const metadata = new NodeMetadata(
-            newNodeId,
+        const metadata = new NodeMetadata(newNodeId, {
             totalTokens,
-            Math.max(0, totalTokens - parentTokens),
-            newSearchPath,
-            context,
-            true,
-            requestId
-        );
+            extraTokens: Math.max(0, totalTokens - parentTokens),
+            searchPath: newSearchPath,
+            docIds: context,
+            isLeaf: true,
+            requestId,
+        });
 
         this.metadata.set(newNodeId, metadata);
         this._requestToNode.set(requestId, newNodeId);
@@ -975,10 +977,11 @@ export class ContextPilot extends ContextIndex {
         const newLeafId = this.nextNodeId++;
         const newLeaf = new ClusterNode(
             newLeafId,
-            context,
+            new Set(context),
+            new Set([newLeafId]),
+            0.0,
             [],
-            parentNode.nodeId,
-            new Set([newLeafId])
+            parentNode.nodeId
         );
 
         this.nodes.set(newLeafId, newLeaf);
@@ -987,15 +990,14 @@ export class ContextPilot extends ContextIndex {
         const newSearchPath = [...parentSearchPath, parentNode.children.length - 1];
         const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
 
-        const newMetadata = new NodeMetadata(
-            newLeafId,
+        const newMetadata = new NodeMetadata(newLeafId, {
             totalTokens,
-            Math.max(0, totalTokens - parentTokens),
-            newSearchPath,
-            context,
-            true,
-            requestId
-        );
+            extraTokens: Math.max(0, totalTokens - parentTokens),
+            searchPath: newSearchPath,
+            docIds: context,
+            isLeaf: true,
+            requestId,
+        });
 
         this.metadata.set(newLeafId, newMetadata);
         this._requestToNode.set(requestId, newLeafId);
@@ -1042,10 +1044,11 @@ export class ContextPilot extends ContextIndex {
         
         const newInternal = new ClusterNode(
             newInternalId,
-            Array.from(allContent),
+            allContent,
+            new Set(),
+            0.0,
             [leafNode.nodeId],
-            parentId,
-            new Set()
+            parentId
         );
         newInternal.docIds = [...sharedPrefix];
 
@@ -1066,15 +1069,14 @@ export class ContextPilot extends ContextIndex {
 
         const internalPath = [...parentSearchPath, leafChildIdx];
 
-        const internalMeta = new NodeMetadata(
-            newInternalId,
-            internalTokens,
-            Math.max(0, internalTokens - parentTokens),
-            internalPath,
-            [...sharedPrefix],
-            false,
-            null
-        );
+        const internalMeta = new NodeMetadata(newInternalId, {
+            totalTokens: internalTokens,
+            extraTokens: Math.max(0, internalTokens - parentTokens),
+            searchPath: internalPath,
+            docIds: [...sharedPrefix],
+            isLeaf: false,
+            requestId: null,
+        });
         this.metadata.set(newInternalId, internalMeta);
 
         if (leafMeta) {
@@ -1087,10 +1089,11 @@ export class ContextPilot extends ContextIndex {
 
         const newLeaf = new ClusterNode(
             newLeafId,
-            context,
+            new Set(context),
+            new Set([newLeafId]),
+            0.0,
             [],
-            newInternalId,
-            new Set([newLeafId])
+            newInternalId
         );
         newLeaf.docIds = [...context];
 
@@ -1099,15 +1102,14 @@ export class ContextPilot extends ContextIndex {
 
         const newLeafPath = [...internalPath, 1];
 
-        const newLeafMeta = new NodeMetadata(
-            newLeafId,
+        const newLeafMeta = new NodeMetadata(newLeafId, {
             totalTokens,
-            Math.max(0, totalTokens - internalTokens),
-            newLeafPath,
-            [...context],
-            true,
-            requestId
-        );
+            extraTokens: Math.max(0, totalTokens - internalTokens),
+            searchPath: newLeafPath,
+            docIds: [...context],
+            isLeaf: true,
+            requestId,
+        });
 
         this.metadata.set(newLeafId, newLeafMeta);
         this._requestToNode.set(requestId, newLeafId);
diff --git a/tests/test_http_intercept.py b/tests/test_http_intercept.py
index d746595..1b4be7b 100644
--- a/tests/test_http_intercept.py
+++ b/tests/test_http_intercept.py
@@ -118,18 +118,19 @@ def client(mock_session):
     original_session = http_mod._aiohttp_session
     original_url = http_mod._infer_api_url
     original_intercept_index = http_mod._intercept_index
-    original_state = http_mod._intercept_state
+    original_states = http_mod._intercept_states.copy()
     http_mod._aiohttp_session = mock_session
     http_mod._infer_api_url = "http://mock-backend:30000"
     http_mod._intercept_index = None  # reset so each test starts fresh
-    http_mod._intercept_state = http_mod._InterceptConvState()
+    http_mod._intercept_states.clear()
     try:
         yield TestClient(app, raise_server_exceptions=False)
     finally:
         http_mod._aiohttp_session = original_session
         http_mod._infer_api_url = original_url
         http_mod._intercept_index = original_intercept_index
-        http_mod._intercept_state = original_state
+        http_mod._intercept_states.clear()
+        http_mod._intercept_states.update(original_states)
 
 
 # ============================================================================
@@ -146,7 +147,7 @@ def _warmup(client, path, body):
     resp = client.post(path, json=body)
     assert resp.status_code == 200
     # Keep _intercept_index primed, but reset conversation tracking.
-    http_mod._intercept_state = http_mod._InterceptConvState()
+    http_mod._intercept_states.clear()
     return resp
 
 
@@ -1005,7 +1006,7 @@ def _make_body(marker_id):
         content1 = mock_session._last_json["messages"][3]["content"]
 
         # Reset intercept state for clean comparison
-        http_mod._intercept_state = http_mod._InterceptConvState()
+        http_mod._intercept_states.clear()
 
         # Request 2 with different id "bbbb"
         resp2 = client.post("/v1/chat/completions", json=_make_body("cccc2222dddd3333"))