From 6cfd909b34712a6a1e977e8e7858f26356e96d9e Mon Sep 17 00:00:00 2001 From: SecretSettler Date: Wed, 25 Mar 2026 01:19:45 +0000 Subject: [PATCH 1/8] =?UTF-8?q?feat:=20native=20TypeScript=20OpenClaw=20pl?= =?UTF-8?q?ugin=20=E2=80=94=20zero=20external=20dependencies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete rewrite of the OpenClaw plugin to run entirely in-process without requiring the Python proxy server. Users just install and go: openclaw plugins install @contextpilot/openclaw-plugin Engine modules ported from Python to TypeScript: - engine/extract.ts: Document extraction from system/tool_results (XML tags, numbered lists, JSON results, markdown headers, separators) for both OpenAI Chat and Anthropic Messages API formats (1042→969 lines) - engine/dedup.ts: Cross-turn block-level deduplication using content-defined chunking + SHA-256 hashing (250→355 lines) - engine/cache-control.ts: Anthropic cache_control injection for system messages and tool_result content blocks (144 lines) - engine/reorder.ts: Simplified LCP-based document reordering that maximizes prefix cache sharing across turns — no numpy/scipy needed Plugin integration: - Uses OpenClaw's wrapStreamFn to intercept requests before they reach the LLM backend, apply all optimizations, then forward - Registers contextpilot provider with dynamic model resolution - contextpilot_status tool reports engine state and savings --- openclaw-plugin/README.md | 105 +++ openclaw-plugin/openclaw.plugin.json | 42 + openclaw-plugin/package.json | 31 + openclaw-plugin/src/engine/cache-control.ts | 144 +++ openclaw-plugin/src/engine/dedup.ts | 355 +++++++ openclaw-plugin/src/engine/extract.ts | 969 ++++++++++++++++++++ openclaw-plugin/src/engine/reorder.ts | 109 +++ openclaw-plugin/src/index.ts | 175 ++++ openclaw-plugin/tsconfig.json | 15 + 9 files changed, 1945 insertions(+) create mode 100644 openclaw-plugin/README.md create mode 100644 openclaw-plugin/openclaw.plugin.json create mode 100644 openclaw-plugin/package.json create mode 100644 openclaw-plugin/src/engine/cache-control.ts create mode 100644 openclaw-plugin/src/engine/dedup.ts create mode 100644 openclaw-plugin/src/engine/extract.ts create mode 100644 openclaw-plugin/src/engine/reorder.ts create mode 100644 openclaw-plugin/src/index.ts create mode 100644 openclaw-plugin/tsconfig.json diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md new file mode 100644 index 0000000..851533c --- /dev/null +++ b/openclaw-plugin/README.md @@ -0,0 +1,105 @@ +# @contextpilot/openclaw-plugin + +OpenClaw native plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context reuse. **Zero external dependencies** — no Python, no proxy server, just install and go. + +## What It Does + +ContextPilot optimizes every LLM request by: + +1. **Extracting** documents from system prompts and tool results +2. **Reordering** documents for maximum prefix cache sharing across turns +3. **Deduplicating** repeated content blocks with compact reference hints +4. **Injecting** provider-specific cache control markers (Anthropic `cache_control`) + +All processing happens in-process inside the OpenClaw plugin — no external services needed. + +## Installation + +```bash +openclaw plugins install @contextpilot/openclaw-plugin +``` + +## Configuration + +In `~/.openclaw/openclaw.json`: + +```json5 +{ + plugins: { + entries: { + "contextpilot": { + enabled: true, + config: { + // "anthropic" (default) or "openai" + "backendProvider": "anthropic", + + // What to optimize: "all" (default), "system", or "tool_results" + "scope": "all" + } + } + } + } +} +``` + +Set your API key: + +```bash +export ANTHROPIC_API_KEY="sk-ant-xxx" +# or +export OPENAI_API_KEY="sk-xxx" +``` + +Then select a ContextPilot model (e.g., `contextpilot/claude-sonnet-4-6`) and start using OpenClaw. + +## Available Models + +### Anthropic backend (default) + +| Model ID | Name | +|----------|------| +| `contextpilot/claude-opus-4-6` | Claude Opus 4.6 (ContextPilot) | +| `contextpilot/claude-sonnet-4-6` | Claude Sonnet 4.6 (ContextPilot) | + +### OpenAI backend + +| Model ID | Name | +|----------|------| +| `contextpilot/gpt-4o` | GPT-4o (ContextPilot) | +| `contextpilot/gpt-4o-mini` | GPT-4o Mini (ContextPilot) | + +Any model ID works via dynamic resolution — use `contextpilot/`. + +## How It Works + +``` +OpenClaw request + ↓ +ContextPilot Plugin (wrapStreamFn) + ├─ Extract documents from system/tool_results + ├─ Reorder for prefix cache sharing + ├─ Deduplicate repeated blocks + ├─ Inject cache_control markers + ↓ +Optimized request → LLM Backend (Anthropic/OpenAI) +``` + +The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercept requests before they reach the backend. All optimization is done in-process in TypeScript. + +## Agent Tool + +| Tool | Description | +|------|-------------| +| `contextpilot_status` | Check engine status, request count, and chars saved | + +## Scope Control + +| Scope | System Prompt | Tool Results | +|:---:|:---:|:---:| +| `all` (default) | Optimized | Optimized | +| `system` | Optimized | Untouched | +| `tool_results` | Untouched | Optimized | + +## License + +Apache-2.0 diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json new file mode 100644 index 0000000..c1b5f9a --- /dev/null +++ b/openclaw-plugin/openclaw.plugin.json @@ -0,0 +1,42 @@ +{ + "id": "contextpilot", + "name": "ContextPilot", + "description": "Faster long-context inference via in-process context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing. No external dependencies.", + "version": "0.2.0", + "providers": ["contextpilot"], + "providerAuthEnvVars": { + "contextpilot": ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] + }, + "providerAuthChoices": [ + { + "provider": "contextpilot", + "method": "api-key", + "choiceId": "contextpilot-api-key", + "choiceLabel": "Backend API key (Anthropic or OpenAI)", + "groupId": "contextpilot", + "groupLabel": "ContextPilot", + "cliFlag": "--anthropic-api-key", + "cliOption": "--anthropic-api-key ", + "cliDescription": "API key for the backend LLM provider", + "onboardingScopes": ["text-inference"] + } + ], + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "backendProvider": { + "type": "string", + "enum": ["anthropic", "openai"], + "description": "Backend LLM provider type", + "default": "anthropic" + }, + "scope": { + "type": "string", + "enum": ["all", "system", "tool_results"], + "description": "Which messages ContextPilot optimizes", + "default": "all" + } + } + } +} diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json new file mode 100644 index 0000000..58defc7 --- /dev/null +++ b/openclaw-plugin/package.json @@ -0,0 +1,31 @@ +{ + "name": "@contextpilot/openclaw-plugin", + "version": "0.2.0", + "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.", + "type": "module", + "license": "Apache-2.0", + "author": "ContextPilot Contributors", + "repository": { + "type": "git", + "url": "https://github.com/EfficientContext/ContextPilot.git", + "directory": "openclaw-plugin" + }, + "keywords": [ + "openclaw", + "openclaw-plugin", + "contextpilot", + "kv-cache", + "context-reuse", + "prompt-cache", + "dedup", + "llm" + ], + "openclaw": { + "extensions": ["./src/index.ts"] + }, + "files": [ + "src/", + "openclaw.plugin.json", + "README.md" + ] +} diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts new file mode 100644 index 0000000..53d48e7 --- /dev/null +++ b/openclaw-plugin/src/engine/cache-control.ts @@ -0,0 +1,144 @@ +export const MIN_CONTENT_LENGTH_FOR_CACHE = 1024; +export const CACHE_CONTROL_EPHEMERAL = { type: 'ephemeral' } as const; + +type CacheControl = typeof CACHE_CONTROL_EPHEMERAL; + +interface TextBlock extends Record { + type?: unknown; + text?: unknown; + cache_control?: CacheControl; +} + +interface ToolResultBlock extends Record { + type?: unknown; + content?: unknown; + cache_control?: CacheControl; +} + +interface MessageBlock extends Record { + role?: unknown; + content?: unknown; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function injectSystemCacheControl( + body: Record, + cc: CacheControl +): Record { + const system = body.system; + if (system === undefined || system === null) { + return body; + } + + if (typeof system === 'string') { + body.system = [{ type: 'text', text: system, cache_control: cc }]; + return body; + } + + if (Array.isArray(system) && system.length > 0) { + const lastBlock = system[system.length - 1]; + if (isRecord(lastBlock)) { + lastBlock.cache_control = cc; + } + } + + return body; +} + +function maybeAddCacheControlToToolResult(block: ToolResultBlock, cc: CacheControl): void { + const toolResultContent = block.content ?? ''; + + if (typeof toolResultContent === 'string') { + if (toolResultContent.length >= MIN_CONTENT_LENGTH_FOR_CACHE) { + block.cache_control = cc; + } + return; + } + + if (!Array.isArray(toolResultContent)) { + return; + } + + const totalChars = toolResultContent.reduce((sum, inner) => { + if (!isRecord(inner) || inner.type !== 'text') { + return sum; + } + return sum + (typeof inner.text === 'string' ? inner.text.length : 0); + }, 0); + + if (totalChars < MIN_CONTENT_LENGTH_FOR_CACHE || toolResultContent.length === 0) { + return; + } + + let lastTextBlock: TextBlock | null = null; + for (let i = toolResultContent.length - 1; i >= 0; i -= 1) { + const inner = toolResultContent[i]; + if (isRecord(inner) && inner.type === 'text') { + lastTextBlock = inner as TextBlock; + break; + } + } + + if (lastTextBlock !== null) { + lastTextBlock.cache_control = cc; + } +} + +function injectToolResultCacheControl( + body: Record, + cc: CacheControl +): Record { + const messages = body.messages; + if (!Array.isArray(messages) || messages.length === 0) { + return body; + } + + for (const msg of messages) { + if (!isRecord(msg)) { + continue; + } + + const message = msg as MessageBlock; + if (message.role !== 'user' || !Array.isArray(message.content)) { + continue; + } + + for (const block of message.content) { + if (!isRecord(block)) { + continue; + } + if (block.type !== 'tool_result' && block.type !== 'toolResult') { + continue; + } + maybeAddCacheControlToToolResult(block as ToolResultBlock, cc); + } + } + + return body; +} + +export function injectAnthropicCacheControl(body: Record): Record { + const copiedBody = structuredClone(body); + injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); + injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); + return copiedBody; +} + +export function injectOpenAICacheControl(body: Record): Record { + // OpenAI prompt caching is automatic and prefix-based, so no explicit + // cache_control block injection is required at request construction time. + return body; +} + +export function injectCacheControl( + body: Record, + provider: 'anthropic' | 'openai' +): Record { + if (provider === 'anthropic') { + return injectAnthropicCacheControl(body); + } + return injectOpenAICacheControl(body); +} diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts new file mode 100644 index 0000000..14d79f4 --- /dev/null +++ b/openclaw-plugin/src/engine/dedup.ts @@ -0,0 +1,355 @@ +import * as crypto from 'node:crypto'; + +export const MIN_BLOCK_CHARS = 80; +export const MIN_CONTENT_CHARS = 500; + +export const CHUNK_MODULUS = 13; +export const CHUNK_MIN_LINES = 5; +export const CHUNK_MAX_LINES = 40; + +export interface DedupResult { + blocksDeduped: number; + blocksTotal: number; + charsBefore: number; + charsAfter: number; + charsSaved: number; +} + +export interface DedupOptions { + minBlockChars?: number; + minContentChars?: number; + chunkModulus?: number; +} + +type SeenBlock = [number, string, number]; + +interface OpenAIToolCall { + id?: string; + function?: { + name?: string; + }; +} + +interface OpenAIAssistantMessage { + role?: string; + tool_calls?: OpenAIToolCall[]; +} + +interface OpenAIToolMessage { + role?: string; + content?: string; + tool_call_id?: string; + name?: string; +} + +interface ChatCompletionsBody { + messages?: OpenAIToolMessage[]; +} + +interface ResponsesFunctionCallItem { + type?: string; + call_id?: string; + name?: string; +} + +interface ResponsesFunctionCallOutputItem { + type?: string; + call_id?: string; + output?: string; +} + +interface ResponsesApiBody { + input?: ResponsesFunctionCallOutputItem[]; +} + +function emptyDedupResult(): DedupResult { + return { + blocksDeduped: 0, + blocksTotal: 0, + charsBefore: 0, + charsAfter: 0, + charsSaved: 0 + }; +} + +export function hashString(str: string): number { + let h = 5381; + for (let i = 0; i < str.length; i++) { + h = ((h << 5) + h + str.charCodeAt(i)) & 0xFFFFFFFF; + } + return h >>> 0; +} + +export function buildToolNameMapOpenai(messages: OpenAIAssistantMessage[]): Record { + const mapping: Record = {}; + for (const msg of messages) { + if (!msg || typeof msg !== 'object' || msg.role !== 'assistant') { + continue; + } + + for (const tc of msg.tool_calls || []) { + if (!tc || typeof tc !== 'object') { + continue; + } + const tcId = tc.id || ''; + const fn = tc.function; + if (fn && typeof fn === 'object' && fn.name) { + mapping[tcId] = fn.name; + } + } + } + return mapping; +} + +export function buildToolNameMapResponses(items: ResponsesFunctionCallItem[]): Record { + const mapping: Record = {}; + for (const item of items) { + if (item && typeof item === 'object' && item.type === 'function_call') { + const callId = item.call_id || ''; + const name = item.name || ''; + if (callId && name) { + mapping[callId] = name; + } + } + } + return mapping; +} + +export function contentDefinedChunking( + text: string, + chunkModulus: number = CHUNK_MODULUS +): string[] { + const lines = text.split('\n'); + if (lines.length <= CHUNK_MIN_LINES) { + return [text]; + } + + const blocks: string[] = []; + let current: string[] = []; + + for (const line of lines) { + current.push(line); + const lineHash = hashString(line.trim()) & 0xFFFFFFFF; + const isBoundary = ( + lineHash % chunkModulus === 0 && current.length >= CHUNK_MIN_LINES + ) || current.length >= CHUNK_MAX_LINES; + + if (isBoundary) { + blocks.push(current.join('\n')); + current = []; + } + } + + if (current.length > 0) { + if (blocks.length > 0 && current.length < CHUNK_MIN_LINES) { + blocks[blocks.length - 1] += `\n${current.join('\n')}`; + } else { + blocks.push(current.join('\n')); + } + } + + return blocks; +} + +export function hashBlock(block: string): string { + const normalized = block.trim(); + return crypto.createHash('sha256').update(normalized, 'utf8').digest('hex').slice(0, 20); +} + +export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptions = {}): DedupResult { + const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS; + const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS; + const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS; + + const messages = body?.messages; + if (!Array.isArray(messages) || messages.length === 0) { + return emptyDedupResult(); + } + + const toolNames = buildToolNameMapOpenai(messages); + const seenBlocks = new Map(); + const result = emptyDedupResult(); + + for (let idx = 0; idx < messages.length; idx++) { + const msg = messages[idx]; + if (!msg || typeof msg !== 'object' || msg.role !== 'tool') { + continue; + } + + const content = msg.content || ''; + if (typeof content !== 'string' || content.length < minContentChars) { + continue; + } + + const toolCallId = msg.tool_call_id || ''; + const fnName = toolNames[toolCallId] || msg.name || 'tool'; + + const blocks = contentDefinedChunking(content, chunkModulus); + if (blocks.length < 2) { + for (const block of blocks) { + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, 0]); + } + } + } + continue; + } + + const newBlocks: string[] = []; + let dedupedInThis = 0; + + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length < minBlockChars) { + newBlocks.push(block); + continue; + } + + const h = hashBlock(block); + result.blocksTotal += 1; + + const seen = seenBlocks.get(h); + if (seen && seen[0] !== idx) { + const origFn = seen[1]; + const firstLine = block.trim().split('\n')[0].slice(0, 80); + const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`; + const charsSaved = block.length - ref.length; + if (charsSaved > 0) { + newBlocks.push(ref); + dedupedInThis += 1; + result.blocksDeduped += 1; + } else { + newBlocks.push(block); + } + } else { + if (!seen) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + newBlocks.push(block); + } + } + + if (dedupedInThis > 0) { + const originalLen = content.length; + const newContent = newBlocks.join('\n\n'); + msg.content = newContent; + const newLen = newContent.length; + result.charsBefore += originalLen; + result.charsAfter += newLen; + result.charsSaved += (originalLen - newLen); + } else { + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + } + } + } + } + + return result; +} + +export function dedupResponsesApi(body: ResponsesApiBody, opts: DedupOptions = {}): DedupResult { + const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS; + const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS; + const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS; + + const inputItems = body?.input; + if (!Array.isArray(inputItems) || inputItems.length === 0) { + return emptyDedupResult(); + } + + const fnNames = buildToolNameMapResponses(inputItems); + const seenBlocks = new Map(); + const result = emptyDedupResult(); + + for (let idx = 0; idx < inputItems.length; idx++) { + const item = inputItems[idx]; + if (!item || typeof item !== 'object' || item.type !== 'function_call_output') { + continue; + } + + const output = item.output || ''; + if (typeof output !== 'string' || output.length < minContentChars) { + continue; + } + + const callId = item.call_id || ''; + const fnName = fnNames[callId] || callId || 'tool'; + + const blocks = contentDefinedChunking(output, chunkModulus); + if (blocks.length < 2) { + for (const block of blocks) { + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, 0]); + } + } + } + continue; + } + + const newBlocks: string[] = []; + let dedupedInThis = 0; + + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length < minBlockChars) { + newBlocks.push(block); + continue; + } + + const h = hashBlock(block); + result.blocksTotal += 1; + + const seen = seenBlocks.get(h); + if (seen && seen[0] !== idx) { + const origFn = seen[1]; + const firstLine = block.trim().split('\n')[0].slice(0, 80); + const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`; + const charsSaved = block.length - ref.length; + if (charsSaved > 0) { + newBlocks.push(ref); + dedupedInThis += 1; + result.blocksDeduped += 1; + } else { + newBlocks.push(block); + } + } else { + if (!seen) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + newBlocks.push(block); + } + } + + if (dedupedInThis > 0) { + const originalLen = output.length; + const newOutput = newBlocks.join('\n\n'); + item.output = newOutput; + const newLen = newOutput.length; + result.charsBefore += originalLen; + result.charsAfter += newLen; + result.charsSaved += (originalLen - newLen); + } else { + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + } + } + } + } + + return result; +} diff --git a/openclaw-plugin/src/engine/extract.ts b/openclaw-plugin/src/engine/extract.ts new file mode 100644 index 0000000..10b8c67 --- /dev/null +++ b/openclaw-plugin/src/engine/extract.ts @@ -0,0 +1,969 @@ +import * as crypto from 'crypto'; + +/** + * HTTP Intercept Parser for ContextPilot + * + * Pure parsing/extraction/reconstruction logic for intercepting LLM API requests. + * Extracts documents from system messages, supports reordering, and reconstructs + * the request body with reordered documents. + * + * No server dependencies — independently testable. + */ + +const _KNOWN_WRAPPER_TAGS = new Set(["documents", "contexts", "docs", "passages", "references", "files"]); +const _KNOWN_ITEM_TAGS = new Set(["document", "context", "doc", "passage", "reference", "file"]); + +const _NUMBERED_RE = /\[(\d+)\]\s*/; +const _SEPARATOR_PATTERNS = ["---", "==="]; +const _SINGLE_DOC_MIN_CHARS = 200; + +export interface InterceptConfig { + enabled: boolean; + mode: string; + tag: string; + separator: string; + alpha: number; + linkageMethod: string; + scope: string; +} + +export interface ExtractionResult { + documents: string[]; + prefix: string; + suffix: string; + mode: string; + wrapperTag: string; + itemTag: string; + separatorChar: string; + originalContent: string; + jsonItems: any[] | null; +} + +export interface ToolResultLocation { + msgIndex: number; + blockIndex: number; // -1 = content is string + innerBlockIndex: number; // For Anthropic nested content blocks +} + +export interface SingleDocExtraction { + content: string; + contentHash: string; + toolCallId: string; +} + +export class MultiExtractionResult { + systemExtraction: [ExtractionResult, number] | null = null; + toolExtractions: [ExtractionResult, ToolResultLocation][] = []; + singleDocExtractions: [SingleDocExtraction, ToolResultLocation][] = []; + + get hasExtractions(): boolean { + return ( + this.systemExtraction !== null || + this.toolExtractions.length > 0 || + this.singleDocExtractions.length > 0 + ); + } + + get totalDocuments(): number { + let total = this.singleDocExtractions.length; + if (this.systemExtraction) { + total += this.systemExtraction[0].documents.length; + } + for (const [ext, _] of this.toolExtractions) { + total += ext.documents.length; + } + return total; + } +} + +/** + * Parse X-ContextPilot-* headers into an InterceptConfig. + */ +export function parseInterceptHeaders(headers: Record): InterceptConfig { + const get = (name: string, def: string = ""): string => { + const key = `x-contextpilot-${name}`; + for (const [k, v] of Object.entries(headers)) { + if (k.toLowerCase() === key) { + return v; + } + } + return def; + }; + + const enabledStr = get("enabled", "true").toLowerCase(); + const enabled = !["false", "0", "no"].includes(enabledStr); + + let scope = get("scope", "all").toLowerCase(); + if (!["system", "tool_results", "all"].includes(scope)) { + scope = "all"; + } + + return { + enabled, + mode: get("mode", "auto").toLowerCase(), + tag: get("tag", "document").toLowerCase(), + separator: get("separator", "---"), + alpha: parseFloat(get("alpha", "0.001")) || 0.001, + linkageMethod: get("linkage", "average"), + scope + }; +} + +// ── Document extraction ───────────────────────────────────────────────────── + +function _escapeRegExp(string: string): string { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +export function extractXmlTags(text: string, config: InterceptConfig): ExtractionResult | null { + let itemTagsToTry: string[] = []; + let wrapperTagsToTry: string[] = []; + + if (config.mode === "xml_tag") { + itemTagsToTry.push(config.tag); + wrapperTagsToTry.push(config.tag + "s"); + for (const t of _KNOWN_ITEM_TAGS) { + if (t !== config.tag) itemTagsToTry.push(t); + } + for (const t of _KNOWN_WRAPPER_TAGS) { + if (t !== config.tag + "s") wrapperTagsToTry.push(t); + } + } else { + itemTagsToTry = Array.from(_KNOWN_ITEM_TAGS); + wrapperTagsToTry = Array.from(_KNOWN_WRAPPER_TAGS); + } + + for (const wrapperTag of wrapperTagsToTry) { + const wrapperPattern = new RegExp(`(<${wrapperTag}(?:\\s[^>]*)?>)(.*?)()`, "s"); + const wrapperMatch = wrapperPattern.exec(text); + if (!wrapperMatch) continue; + + const innerText = wrapperMatch[2]; + const prefix = text.substring(0, wrapperMatch.index); + const suffix = text.substring(wrapperMatch.index + wrapperMatch[0].length); + + for (const itemTag of itemTagsToTry) { + const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)()`, "gs"); + let items: string[] = []; + while (true) { + const itemMatch = itemPattern.exec(innerText); + if (itemMatch === null) break; + items.push(itemMatch[2].trim()); + } + if (items.length > 0) { + return { + documents: items, + prefix, + suffix, + mode: "xml_tag", + wrapperTag, + itemTag, + separatorChar: "", + originalContent: text, + jsonItems: null + }; + } + } + } + + for (const itemTag of itemTagsToTry) { + const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)()`, "gs"); + const items: RegExpExecArray[] = []; + while (true) { + const match = itemPattern.exec(text); + if (match === null) break; + items.push(match); + } + + if (items.length >= 2) { + const firstStart = items[0].index; + const lastEnd = items[items.length - 1].index + items[items.length - 1][0].length; + return { + documents: items.map(m => m[2].trim()), + prefix: text.substring(0, firstStart), + suffix: text.substring(lastEnd), + mode: "xml_tag", + wrapperTag: "", + itemTag, + separatorChar: "", + originalContent: text, + jsonItems: null + }; + } + } + + return null; +} + +export function extractNumbered(text: string, config: InterceptConfig): ExtractionResult | null { + const splits = text.split(_NUMBERED_RE); + if (splits.length < 4) { + return null; + } + + const prefix = splits[0]; + const documents: string[] = []; + let i = 1; + while (i + 1 < splits.length) { + const docText = splits[i + 1].trim(); + if (docText) { + documents.push(docText); + } + i += 2; + } + + if (documents.length < 2) return null; + + return { + documents, + prefix, + suffix: "", + mode: "numbered", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: null + }; +} + +export function extractSeparator(text: string, config: InterceptConfig): ExtractionResult | null { + let sep = config.separator; + let parts: string[] = []; + let documents: string[] = []; + + if (config.mode === "auto") { + let found = false; + for (const candidate of _SEPARATOR_PATTERNS) { + const regex = new RegExp(`\\n${_escapeRegExp(candidate)}\\n`); + parts = text.split(regex); + if (parts.length >= 3) { + sep = candidate; + found = true; + break; + } + } + if (!found) return null; + documents = parts.map(p => p.trim()).filter(p => p); + } else { + const regex = new RegExp(`\\n${_escapeRegExp(sep)}\\n`); + parts = text.split(regex); + documents = parts.map(p => p.trim()).filter(p => p); + } + + if (documents.length < 2) return null; + + return { + documents, + prefix: "", + suffix: "", + mode: "separator", + wrapperTag: "", + itemTag: "", + separatorChar: sep, + originalContent: text, + jsonItems: null + }; +} + +export function extractMarkdownHeaders(text: string, config: InterceptConfig): ExtractionResult | null { + const parts = text.split(/(?=^#{1,2}\s)/m); + if (!parts || parts.length === 0) return null; + + let prefix = ""; + const sections: string[] = []; + + for (const part of parts) { + const stripped = part.trim(); + if (!stripped) continue; + + if (/^#{1,2}\s/.test(stripped)) { + sections.push(stripped); + } else { + prefix = part; + } + } + + if (sections.length < 2) return null; + + return { + documents: sections, + prefix, + suffix: "", + mode: "markdown_header", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: null + }; +} + +const _JSON_ID_KEYS = ["url", "path", "file", "filename", "uri", "href"]; + +function _extractJsonId(item: any): string | null { + for (const key of _JSON_ID_KEYS) { + if (item && typeof item === "object" && key in item) { + const val = item[key]; + if (typeof val === "string" && val.trim()) { + return val.trim(); + } + } + } + return null; +} + +export function extractJsonResults(text: string, config: InterceptConfig): ExtractionResult | null { + const stripped = text.trim(); + if (!stripped.startsWith("{")) return null; + + let obj: any; + try { + obj = JSON.parse(stripped); + } catch (e) { + return null; + } + + if (typeof obj !== "object" || obj === null) return null; + + const results = obj.results; + if (!Array.isArray(results) || results.length < 2) return null; + + const documents: string[] = []; + for (const item of results) { + if (typeof item === "object" && item !== null) { + const docId = _extractJsonId(item); + if (docId !== null) { + documents.push(docId); + } else { + documents.push(JSON.stringify(item)); + } + } else { + documents.push(JSON.stringify(item)); + } + } + + if (documents.length < 2) return null; + + return { + documents, + prefix: "", + suffix: "", + mode: "json_results", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: results + }; +} + +export function extractDocuments(text: string, config: InterceptConfig): ExtractionResult | null { + if (config.mode === "xml_tag") { + return extractXmlTags(text, config); + } else if (config.mode === "numbered") { + return extractNumbered(text, config); + } else if (config.mode === "json_results") { + return extractJsonResults(text, config); + } else if (config.mode === "separator") { + return extractSeparator(text, config); + } else if (config.mode === "markdown_header") { + return extractMarkdownHeaders(text, config); + } else { + let result = extractXmlTags(text, config); + if (result) return result; + result = extractNumbered(text, config); + if (result) return result; + result = extractJsonResults(text, config); + if (result) return result; + return null; + } +} + +// ── Reconstruction ─────────────────────────────────────────────────────────── + +export function reconstructContent(extraction: ExtractionResult, reorderedDocs: string[]): string { + if (extraction.mode === "xml_tag") { + return reconstructXml(extraction, reorderedDocs); + } else if (extraction.mode === "numbered") { + return reconstructNumbered(extraction, reorderedDocs); + } else if (extraction.mode === "json_results") { + return reconstructJsonResults(extraction, reorderedDocs); + } else if (extraction.mode === "separator") { + return reconstructSeparator(extraction, reorderedDocs); + } else if (extraction.mode === "markdown_header") { + return reconstructMarkdownHeaders(extraction, reorderedDocs); + } else { + return extraction.originalContent; + } +} + +export function reconstructXml(extraction: ExtractionResult, reorderedDocs: string[]): string { + const itemTag = extraction.itemTag; + const items = reorderedDocs.map(doc => `<${itemTag}>${doc}`).join("\n"); + + let block: string; + if (extraction.wrapperTag) { + const wrapper = extraction.wrapperTag; + block = `<${wrapper}>\n${items}\n`; + } else { + block = items; + } + + return extraction.prefix + block + extraction.suffix; +} + +export function reconstructNumbered(extraction: ExtractionResult, reorderedDocs: string[]): string { + const parts = extraction.prefix ? [extraction.prefix] : []; + for (let i = 0; i < reorderedDocs.length; i++) { + parts.push(`[${i + 1}] ${reorderedDocs[i]}`); + } + let result = parts.length > 0 ? parts.join("\n") : ""; + if (extraction.suffix) { + result += extraction.suffix; + } + return result; +} + +export function reconstructJsonResults(extraction: ExtractionResult, reorderedDocs: string[]): string { + const obj = JSON.parse(extraction.originalContent); + if (extraction.jsonItems !== null) { + const origDocs = extraction.documents; + const docToIndices: Record = {}; + for (let i = 0; i < origDocs.length; i++) { + if (!docToIndices[origDocs[i]]) { + docToIndices[origDocs[i]] = []; + } + docToIndices[origDocs[i]].push(i); + } + + const used = new Set(); + const reorderedItems: any[] = []; + for (const doc of reorderedDocs) { + const indices = docToIndices[doc] || []; + for (const idx of indices) { + if (!used.has(idx)) { + reorderedItems.push(extraction.jsonItems[idx]); + used.add(idx); + break; + } + } + } + obj.results = reorderedItems; + } else { + obj.results = reorderedDocs.map(doc => JSON.parse(doc)); + } + return JSON.stringify(obj, null, 2); +} + +export function reconstructSeparator(extraction: ExtractionResult, reorderedDocs: string[]): string { + const sep = extraction.separatorChar || "---"; + return reorderedDocs.join(`\n${sep}\n`); +} + +export function reconstructMarkdownHeaders(extraction: ExtractionResult, reorderedDocs: string[]): string { + const parts: string[] = []; + if (extraction.prefix.trim()) { + parts.push(extraction.prefix.trimEnd()); + } + parts.push(...reorderedDocs); + return parts.join("\n\n"); +} + +// ── OpenAI Chat format ────────────────────────────────────────────────────── + +export function extractFromOpenaiChat(body: any, config: InterceptConfig): [ExtractionResult, number] | null { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return null; + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "system") continue; + + const content = msg.content || ""; + if (typeof content === "string") { + const result = extractDocuments(content, config); + if (result) return [result, i]; + } else if (Array.isArray(content)) { + for (const block of content) { + if (block && typeof block === "object" && block.type === "text") { + const result = extractDocuments(block.text || "", config); + if (result) return [result, i]; + } + } + } + } + return null; +} + +export function reconstructOpenaiChat( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + systemMsgIndex: number +): any { + const newBody = structuredClone(body); + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = newBody.messages[systemMsgIndex]; + + if (typeof msg.content === "string") { + msg.content = newContent; + } else if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && block.type === "text") { + // Using dummy config since we just check if it was the block with documents + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + return newBody; +} + +// ── Anthropic Messages format ─────────────────────────────────────────────── + +export function extractFromAnthropicMessages(body: any, config: InterceptConfig): ExtractionResult | null { + const system = body?.system; + if (system === undefined || system === null) return null; + + if (typeof system === "string") { + return extractDocuments(system, config); + } else if (Array.isArray(system)) { + for (const block of system) { + if (block && typeof block === "object" && block.type === "text") { + const result = extractDocuments(block.text || "", config); + if (result) return result; + } + } + } + return null; +} + +export function reconstructAnthropicMessages( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[] +): any { + const newBody = structuredClone(body); + const newContent = reconstructContent(extraction, reorderedDocs); + + if (typeof newBody.system === "string") { + newBody.system = newContent; + } else if (Array.isArray(newBody.system)) { + for (const block of newBody.system) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + return newBody; +} + +// ── Tool result extraction ───────────────────────────────────────────────── + +export function extractFromOpenaiToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [ExtractionResult, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "tool" && msg?.role !== "toolResult") continue; + + const content = msg.content || ""; + if (typeof content === "string") { + const extraction = extractDocuments(content, config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }]); + } + } else if (Array.isArray(content)) { + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (block && typeof block === "object" && block.type === "text") { + const extraction = extractDocuments(block.text || "", config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]); + } + } + } + } + } + return results; +} + +export function extractFromAnthropicToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [ExtractionResult, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "user") continue; + + const content = msg.content; + if (!Array.isArray(content)) continue; + + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object" || (block.type !== "tool_result" && block.type !== "toolResult")) continue; + + const trContent = block.content || ""; + if (typeof trContent === "string") { + const extraction = extractDocuments(trContent, config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]); + } + } else if (Array.isArray(trContent)) { + for (let k = 0; k < trContent.length; k++) { + const inner = trContent[k]; + if (inner && typeof inner === "object" && inner.type === "text") { + const extraction = extractDocuments(inner.text || "", config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: k }]); + } + } + } + } + } + } + return results; +} + +// ── Tool result reconstruction ───────────────────────────────────────────── + +export function reconstructOpenaiToolResult( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + location: ToolResultLocation +): void { + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = body.messages[location.msgIndex]; + if (location.blockIndex === -1) { + msg.content = newContent; + } else { + msg.content[location.blockIndex].text = newContent; + } +} + +export function reconstructAnthropicToolResult( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + location: ToolResultLocation +): void { + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = body.messages[location.msgIndex]; + const block = msg.content[location.blockIndex]; + if (location.innerBlockIndex === -1) { + block.content = newContent; + } else { + block.content[location.innerBlockIndex].text = newContent; + } +} + +// ── Aggregate extraction ─────────────────────────────────────────────────── + +export function extractAllOpenai(body: any, config: InterceptConfig): MultiExtractionResult { + const result = new MultiExtractionResult(); + if (["system", "all"].includes(config.scope)) { + const sysResult = extractFromOpenaiChat(body, config); + if (sysResult) { + result.systemExtraction = sysResult; + } + } + if (["tool_results", "all"].includes(config.scope)) { + result.toolExtractions = extractFromOpenaiToolResults(body, config); + result.singleDocExtractions = extractSingleDocsFromOpenaiToolResults(body, config); + } + return result; +} + +export function extractAllAnthropic(body: any, config: InterceptConfig): MultiExtractionResult { + const result = new MultiExtractionResult(); + if (["system", "all"].includes(config.scope)) { + const sysExtraction = extractFromAnthropicMessages(body, config); + if (sysExtraction && sysExtraction.documents.length >= 2) { + result.systemExtraction = [sysExtraction, -1]; + } + } + if (["tool_results", "all"].includes(config.scope)) { + result.toolExtractions = extractFromAnthropicToolResults(body, config); + result.singleDocExtractions = extractSingleDocsFromAnthropicToolResults(body, config); + } + return result; +} + +// ── Single-document extraction (for cross-turn dedup) ───────────────────── + +function _makeSingleDoc(content: string, toolCallId: string = ""): SingleDocExtraction { + const stripped = content.trim(); + const contentHash = crypto.createHash("sha256").update(stripped).digest("hex"); + return { + content: stripped, + contentHash, + toolCallId + }; +} + +export function extractSingleDocsFromOpenaiToolResults( + body: any, config: InterceptConfig +): [SingleDocExtraction, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [SingleDocExtraction, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "tool" && msg?.role !== "toolResult") continue; + + const toolCallId = msg.tool_call_id || ""; + const content = msg.content || ""; + + if (typeof content === "string") { + const extraction = extractDocuments(content, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (content.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(content, toolCallId), + { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 } + ]); + } + } else if (Array.isArray(content)) { + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object" || block.type !== "text") continue; + + const text = block.text || ""; + const extraction = extractDocuments(text, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(text, toolCallId), + { msgIndex: i, blockIndex: j, innerBlockIndex: -1 } + ]); + } + } + } + } + return results; +} + +export function extractSingleDocsFromAnthropicToolResults( + body: any, config: InterceptConfig +): [SingleDocExtraction, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [SingleDocExtraction, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "user") continue; + + const content = msg.content; + if (!Array.isArray(content)) continue; + + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object") continue; + if (block.type !== "tool_result" && block.type !== "toolResult") continue; + + const toolUseId = block.tool_use_id || ""; + const trContent = block.content || ""; + + if (typeof trContent === "string") { + const extraction = extractDocuments(trContent, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (trContent.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(trContent, toolUseId), + { msgIndex: i, blockIndex: j, innerBlockIndex: -1 } + ]); + } + } else if (Array.isArray(trContent)) { + for (let k = 0; k < trContent.length; k++) { + const inner = trContent[k]; + if (!inner || typeof inner !== "object" || inner.type !== "text") continue; + + const text = inner.text || ""; + const extraction = extractDocuments(text, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(text, toolUseId), + { msgIndex: i, blockIndex: j, innerBlockIndex: k } + ]); + } + } + } + } + } + return results; +} + +// ── Single-document hint replacement ────────────────────────────────────── + +export function replaceSingleDocOpenai( + body: any, location: ToolResultLocation, hint: string +): void { + const msg = body.messages[location.msgIndex]; + if (location.blockIndex === -1) { + msg.content = hint; + } else { + msg.content[location.blockIndex].text = hint; + } +} + +export function replaceSingleDocAnthropic( + body: any, location: ToolResultLocation, hint: string +): void { + const msg = body.messages[location.msgIndex]; + const block = msg.content[location.blockIndex]; + if (location.innerBlockIndex === -1) { + block.content = hint; + } else { + block.content[location.innerBlockIndex].text = hint; + } +} + +// ── Format handler abstraction ───────────────────────────────────────────── + +export interface FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult; + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void; + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void; + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void; + toolCallPresent(body: any, toolCallId: string): boolean; + targetPath(): string; + cacheSystem(body: any): any; + restoreSystem(body: any, cached: any): void; +} + +export class OpenAIChatHandler implements FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult { + return extractAllOpenai(body, config); + } + + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void { + const newContent = reconstructContent(extraction, docs); + const msg = body.messages[sysIdx]; + if (typeof msg.content === "string") { + msg.content = newContent; + } else if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + } + + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void { + reconstructOpenaiToolResult(body, extraction, docs, location); + } + + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void { + replaceSingleDocOpenai(body, location, hint); + } + + toolCallPresent(body: any, toolCallId: string): boolean { + for (const msg of (body.messages || [])) { + if (msg.role === "tool" || msg.role === "toolResult") { + if (msg.tool_call_id === toolCallId) return true; + } + } + return false; + } + + targetPath(): string { + return "/v1/chat/completions"; + } + + cacheSystem(body: any): any { + return null; // System prompt is inside messages array + } + + restoreSystem(body: any, cached: any): void { + // No-op + } +} + +export class AnthropicMessagesHandler implements FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult { + return extractAllAnthropic(body, config); + } + + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void { + const newContent = reconstructContent(extraction, docs); + if (typeof body.system === "string") { + body.system = newContent; + } else if (Array.isArray(body.system)) { + for (const block of body.system) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + } + + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void { + reconstructAnthropicToolResult(body, extraction, docs, location); + } + + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void { + replaceSingleDocAnthropic(body, location, hint); + } + + toolCallPresent(body: any, toolCallId: string): boolean { + for (const msg of (body.messages || [])) { + if (msg.role === "user" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && + (block.type === "tool_result" || block.type === "toolResult") && + block.tool_use_id === toolCallId) { + return true; + } + } + } + } + return false; + } + + targetPath(): string { + return "/v1/messages"; + } + + cacheSystem(body: any): any { + return structuredClone(body.system); + } + + restoreSystem(body: any, cached: any): void { + if (cached !== null && cached !== undefined) { + body.system = structuredClone(cached); + } + } +} + +const _FORMAT_HANDLERS: Record = { + "openai_chat": new OpenAIChatHandler(), + "anthropic_messages": new AnthropicMessagesHandler() +}; + +export function getFormatHandler(apiFormat: string): FormatHandler { + return _FORMAT_HANDLERS[apiFormat] || _FORMAT_HANDLERS["openai_chat"]; +} diff --git a/openclaw-plugin/src/engine/reorder.ts b/openclaw-plugin/src/engine/reorder.ts new file mode 100644 index 0000000..9b4d90f --- /dev/null +++ b/openclaw-plugin/src/engine/reorder.ts @@ -0,0 +1,109 @@ +import * as crypto from 'node:crypto'; + +interface IndexedDoc { + doc: string; + hash: string; + originalIndex: number; + previousPosition: number; +} + +function hashDoc(doc: string): string { + return crypto.createHash('sha256').update(doc.trim()).digest('hex').slice(0, 16); +} + +function buildIndexMappings(entries: IndexedDoc[], total: number): [number[], number[]] { + const originalOrder = entries.map((entry) => entry.originalIndex); + + const newOrder = new Array(total); + for (let newIndex = 0; newIndex < entries.length; newIndex += 1) { + newOrder[entries[newIndex].originalIndex] = newIndex; + } + + return [originalOrder, newOrder]; +} + +function indexDocuments(docs: string[]): IndexedDoc[] { + return docs.map((doc, originalIndex) => ({ + doc, + hash: hashDoc(doc), + originalIndex, + previousPosition: Number.POSITIVE_INFINITY + })); +} + +export function reorderDocuments(docs: string[]): [string[], number[], number[]] { + const indexed = indexDocuments(docs); + indexed.sort((a, b) => { + const byHash = a.hash.localeCompare(b.hash); + if (byHash !== 0) { + return byHash; + } + return a.originalIndex - b.originalIndex; + }); + + const reorderedDocs = indexed.map((entry) => entry.doc); + const [originalOrder, newOrder] = buildIndexMappings(indexed, docs.length); + return [reorderedDocs, originalOrder, newOrder]; +} + +export class ReorderState { + private previousOrder: string[] = []; + + private hashToDoc: Map = new Map(); + + reorder(docs: string[]): [string[], number[], number[]] { + const indexed = indexDocuments(docs); + const previousPositions = new Map(); + + for (let i = 0; i < this.previousOrder.length; i += 1) { + const hash = this.previousOrder[i]; + if (!previousPositions.has(hash)) { + previousPositions.set(hash, i); + } + } + + const known: IndexedDoc[] = []; + const unknown: IndexedDoc[] = []; + + for (const entry of indexed) { + const previousPosition = previousPositions.get(entry.hash); + if (previousPosition === undefined) { + unknown.push(entry); + continue; + } + + known.push({ ...entry, previousPosition }); + } + + known.sort((a, b) => { + if (a.previousPosition !== b.previousPosition) { + return a.previousPosition - b.previousPosition; + } + return a.originalIndex - b.originalIndex; + }); + + unknown.sort((a, b) => { + const byHash = a.hash.localeCompare(b.hash); + if (byHash !== 0) { + return byHash; + } + return a.originalIndex - b.originalIndex; + }); + + const reordered = [...known, ...unknown]; + + this.previousOrder = reordered.map((entry) => entry.hash); + for (const entry of reordered) { + this.hashToDoc.set(entry.hash, entry.doc); + } + + const reorderedDocs = reordered.map((entry) => entry.doc); + const [originalOrder, newOrder] = buildIndexMappings(reordered, docs.length); + return [reorderedDocs, originalOrder, newOrder]; + } + + reset(): void { + this.previousOrder = []; + this.hashToDoc.clear(); + } +} diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts new file mode 100644 index 0000000..58fd015 --- /dev/null +++ b/openclaw-plugin/src/index.ts @@ -0,0 +1,175 @@ +import { Type } from "@sinclair/typebox"; +import { + definePluginEntry, + type ProviderResolveDynamicModelContext, + type ProviderWrapStreamFnContext, +} from "openclaw/plugin-sdk/plugin-entry"; +import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth"; + +import { injectCacheControl } from "./engine/cache-control.js"; +import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js"; +import { getFormatHandler, type InterceptConfig } from "./engine/extract.js"; +import { ReorderState } from "./engine/reorder.js"; + +const PROVIDER_ID = "contextpilot"; + +export default definePluginEntry({ + id: "contextpilot", + name: "ContextPilot", + description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.", + register: (api) => { + const config = { + backendProvider: api.pluginConfig?.backendProvider === "openai" ? "openai" : "anthropic", + scope: ["system", "tool_results", "all"].includes(String(api.pluginConfig?.scope)) + ? String(api.pluginConfig?.scope) + : "all", + }; + + const reorderState = new ReorderState(); + let requestCount = 0; + let totalCharsSaved = 0; + + api.registerProvider({ + id: PROVIDER_ID, + label: "ContextPilot", + docsPath: "/providers/contextpilot", + envVars: [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"], + auth: [ + createProviderApiKeyAuthMethod({ + providerId: PROVIDER_ID, + methodId: "api-key", + label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key", + hint: "API key for the backend LLM provider", + optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey", + flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key", + envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY", + promptMessage: "Enter your API key", + defaultModel: + config.backendProvider === "anthropic" + ? "contextpilot/claude-sonnet-4-6" + : "contextpilot/gpt-4o", + }), + ], + resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => { + const isAnthropic = config.backendProvider === "anthropic"; + return { + id: ctx.modelId, + name: ctx.modelId, + provider: PROVIDER_ID, + baseUrl: isAnthropic ? "https://api.anthropic.com/v1" : "https://api.openai.com/v1", + api: isAnthropic ? "anthropic-messages" : "openai-completions", + reasoning: false, + input: ["text", "image"] as Array<"text" | "image">, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 16384, + }; + }, + wrapStreamFn: (ctx: ProviderWrapStreamFnContext) => { + const originalStreamFn = ctx.streamFn; + if (!originalStreamFn) return undefined; + + return async (params) => { + const request = params as { body?: unknown }; + if (!request.body) { + return originalStreamFn(params); + } + + const body = structuredClone(request.body) as Record; + const apiFormat = config.backendProvider === "anthropic" + ? "anthropic_messages" + : "openai_chat"; + + const interceptConfig: InterceptConfig = { + enabled: true, + mode: "auto", + tag: "document", + separator: "---", + alpha: 0.001, + linkageMethod: "average", + scope: config.scope, + }; + + const handler = getFormatHandler(apiFormat); + const multi = handler.extractAll(body, interceptConfig); + + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructSystem(body, extraction, reordered, sysIdx); + } + } + + for (const [extraction, location] of multi.toolExtractions) { + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructToolResult(body, extraction, reordered, location); + } + } + + if (apiFormat === "openai_chat") { + const dedupResult = dedupChatCompletions(body); + totalCharsSaved += dedupResult.charsSaved; + } + if (body.input && Array.isArray(body.input)) { + const dedupResult = dedupResponsesApi(body); + totalCharsSaved += dedupResult.charsSaved; + } + + const optimizedBody = injectCacheControl( + body, + config.backendProvider === "anthropic" ? "anthropic" : "openai", + ); + + requestCount++; + + return originalStreamFn({ + ...params, + body: optimizedBody, + }); + }; + }, + augmentModelCatalog: () => { + const isAnthropic = config.backendProvider === "anthropic"; + if (isAnthropic) { + return [ + { id: "claude-opus-4-6", name: "Claude Opus 4.6 (ContextPilot)", provider: PROVIDER_ID }, + { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6 (ContextPilot)", + provider: PROVIDER_ID, + }, + ]; + } + return [ + { id: "gpt-4o", name: "GPT-4o (ContextPilot)", provider: PROVIDER_ID }, + { id: "gpt-4o-mini", name: "GPT-4o Mini (ContextPilot)", provider: PROVIDER_ID }, + ]; + }, + }); + + api.registerTool({ + name: "contextpilot_status", + description: "Report ContextPilot engine state", + parameters: Type.Object({}), + async execute(_toolCallId: string, _params: unknown) { + return { + content: [ + { + type: "text" as const, + text: [ + "ContextPilot Engine Status:", + " Mode: in-process (native TypeScript)", + ` Requests optimized: ${requestCount}`, + ` Total chars saved: ${totalCharsSaved.toLocaleString()}`, + ` Backend: ${config.backendProvider}`, + ` Scope: ${config.scope}`, + ].join("\n"), + }, + ], + }; + }, + }); + }, +}); diff --git a/openclaw-plugin/tsconfig.json b/openclaw-plugin/tsconfig.json new file mode 100644 index 0000000..017a5f9 --- /dev/null +++ b/openclaw-plugin/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "outDir": "dist", + "declaration": true, + "resolveJsonModule": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} From b96032478ede50e11840a795a07317c2c3bf8d6b Mon Sep 17 00:00:00 2001 From: SecretSettler Date: Wed, 25 Mar 2026 01:35:28 +0000 Subject: [PATCH 2/8] test: add comprehensive test suite for ContextPilot engine (38 tests) Tests cover all four engine modules: - extract (18 tests): XML/numbered/JSON extraction, OpenAI/Anthropic format handlers, system+tool_result extraction, reconstruction - dedup (7 tests): content-defined chunking, block hashing, cross-message deduplication for chat+responses API formats - cache-control (6 tests): Anthropic cache_control injection for system/tool_results, immutability, OpenAI no-op, dispatcher - reorder (7 tests): deterministic hash sort, cross-turn prefix stability, reset behavior, index mapping correctness All 38 tests pass in 18ms. --- openclaw-plugin/src/engine/engine.test.ts | 696 ++++++++++++++++++++++ 1 file changed, 696 insertions(+) create mode 100644 openclaw-plugin/src/engine/engine.test.ts diff --git a/openclaw-plugin/src/engine/engine.test.ts b/openclaw-plugin/src/engine/engine.test.ts new file mode 100644 index 0000000..dcf0cab --- /dev/null +++ b/openclaw-plugin/src/engine/engine.test.ts @@ -0,0 +1,696 @@ +import { describe, expect, it } from "vitest"; +import { + injectAnthropicCacheControl, + injectCacheControl, + injectOpenAICacheControl, +} from "./cache-control.js"; +import { + buildToolNameMapOpenai, + contentDefinedChunking, + dedupChatCompletions, + dedupResponsesApi, + hashBlock, +} from "./dedup.js"; +import { + extractAllOpenai, + extractDocuments, + extractFromAnthropicMessages, + extractFromAnthropicToolResults, + extractFromOpenaiChat, + extractFromOpenaiToolResults, + extractSingleDocsFromOpenaiToolResults, + getFormatHandler, + parseInterceptHeaders, + reconstructAnthropicToolResult, + reconstructContent, + reconstructOpenaiToolResult, +} from "./extract.js"; +import { ReorderState, reorderDocuments } from "./reorder.js"; + +const DEFAULT_CONFIG = parseInterceptHeaders({}); + +const OPENAI_CHAT_BODY = { + model: "claude-sonnet-4-6", + messages: [ + { + role: "system", + content: + "Doc A content hereDoc B content hereDoc C content here", + }, + { role: "user", content: "What do these docs say?" }, + ], +}; + +const ANTHROPIC_MESSAGES_BODY = { + model: "claude-sonnet-4-6", + system: + "Doc A content hereDoc B content here", + messages: [{ role: "user", content: "Summarize the documents." }], +}; + +const LARGE_CONTENT = "x".repeat(600) + "\n".repeat(20) + "y".repeat(600); + +const DEDUP_BODY = { + messages: [ + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_1", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", function: { name: "read_file", arguments: "{}" } }, + ], + }, + { role: "tool", tool_call_id: "call_1", content: LARGE_CONTENT }, + { role: "tool", tool_call_id: "call_2", content: LARGE_CONTENT }, + ], +}; + +function makeLargeContent(prefix: string): string { + return Array.from( + { length: 20 }, + (_, i) => `${prefix} line ${i} ${"z".repeat(60)}`, + ).join("\n"); +} + +describe("extract", () => { + it("parseInterceptHeaders parses X-ContextPilot-* headers and defaults", () => { + const parsed = parseInterceptHeaders({ + "X-ContextPilot-Enabled": "0", + "x-contextpilot-mode": "xml_tag", + "x-contextpilot-tag": "context", + "x-contextpilot-separator": "===", + "x-contextpilot-alpha": "0.05", + "x-contextpilot-linkage": "single", + "x-contextpilot-scope": "invalid", + }); + + expect(parsed).toEqual({ + enabled: false, + mode: "xml_tag", + tag: "context", + separator: "===", + alpha: 0.05, + linkageMethod: "single", + scope: "all", + }); + + const defaults = parseInterceptHeaders({}); + expect(defaults.enabled).toBe(true); + expect(defaults.mode).toBe("auto"); + expect(defaults.tag).toBe("document"); + expect(defaults.separator).toBe("---"); + expect(defaults.alpha).toBe(0.001); + expect(defaults.linkageMethod).toBe("average"); + expect(defaults.scope).toBe("all"); + }); + + it("extractDocuments extracts XML-tagged documents", () => { + const text = + "AB"; + const extraction = extractDocuments(text, DEFAULT_CONFIG); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("xml_tag"); + expect(extraction?.documents).toEqual(["A", "B"]); + expect(extraction?.wrapperTag).toBe("documents"); + expect(extraction?.itemTag).toBe("document"); + }); + + it("extractDocuments extracts numbered documents", () => { + const extraction = extractDocuments( + "[1] First doc\n[2] Second doc", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("numbered"); + expect(extraction?.documents).toEqual(["First doc", "Second doc"]); + }); + + it("extractDocuments extracts JSON results documents", () => { + const extraction = extractDocuments( + JSON.stringify({ results: [{ url: "a.com" }, { url: "b.com" }] }), + parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }), + ); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("json_results"); + expect(extraction?.documents).toEqual(["a.com", "b.com"]); + }); + + it("extractDocuments auto mode resolves XML > numbered > JSON", () => { + const xml = extractDocuments( + "[1] one[2] two", + DEFAULT_CONFIG, + ); + expect(xml?.mode).toBe("xml_tag"); + + const numbered = extractDocuments("[1] one\n[2] two", DEFAULT_CONFIG); + expect(numbered?.mode).toBe("numbered"); + + const json = extractDocuments( + JSON.stringify({ results: [{ url: "one" }, { url: "two" }] }), + DEFAULT_CONFIG, + ); + expect(json?.mode).toBe("json_results"); + }); + + it("extractDocuments returns null for fewer than two docs", () => { + const numberedSingle = extractDocuments( + "[1] Only one", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(numberedSingle).toBeNull(); + + const jsonSingle = extractDocuments( + JSON.stringify({ results: [{ url: "only-one" }] }), + parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }), + ); + expect(jsonSingle).toBeNull(); + }); + + it("reconstructContent rebuilds XML while preserving tags", () => { + const extraction = extractDocuments( + "prefixABsuffix", + DEFAULT_CONFIG, + ); + expect(extraction).not.toBeNull(); + if (!extraction) { + throw new Error("expected extraction"); + } + + const rebuilt = reconstructContent(extraction, ["B", "A"]); + expect(rebuilt).toContain("prefix"); + expect(rebuilt).toContain("suffix"); + expect(rebuilt).toContain(""); + expect(rebuilt).toContain("B"); + expect(rebuilt).toContain("A"); + }); + + it("reconstructContent rebuilds numbered format", () => { + const extraction = extractDocuments( + "Lead\n[1] First\n[2] Second", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(extraction).not.toBeNull(); + if (!extraction) { + throw new Error("expected extraction"); + } + + const rebuilt = reconstructContent(extraction, ["Second", "First"]); + expect(rebuilt).toContain("Lead"); + expect(rebuilt).toContain("[1] Second"); + expect(rebuilt).toContain("[2] First"); + }); + + it("extractFromOpenaiChat extracts from system message", () => { + const extraction = extractFromOpenaiChat(OPENAI_CHAT_BODY, DEFAULT_CONFIG); + expect(extraction).not.toBeNull(); + expect(extraction?.[1]).toBe(0); + expect(extraction?.[0].documents).toEqual([ + "Doc A content here", + "Doc B content here", + "Doc C content here", + ]); + }); + + it("extractFromAnthropicMessages extracts from system string", () => { + const extraction = extractFromAnthropicMessages( + ANTHROPIC_MESSAGES_BODY, + DEFAULT_CONFIG, + ); + expect(extraction).not.toBeNull(); + expect(extraction?.documents).toEqual([ + "Doc A content here", + "Doc B content here", + ]); + }); + + it("extractFromOpenaiToolResults extracts tool-result documents", () => { + const body = { + messages: [ + { role: "tool", content: "AB" }, + ], + }; + const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + expect(extractions[0]?.[0].documents).toEqual(["A", "B"]); + expect(extractions[0]?.[1]).toEqual({ + msgIndex: 0, + blockIndex: -1, + innerBlockIndex: -1, + }); + }); + + it("extractFromAnthropicToolResults extracts tool_result blocks", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { + type: "tool_result", + content: + "AB", + }, + ], + }, + ], + }; + const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + expect(extractions[0]?.[0].documents).toEqual(["A", "B"]); + expect(extractions[0]?.[1]).toEqual({ + msgIndex: 0, + blockIndex: 0, + innerBlockIndex: -1, + }); + }); + + it("FormatHandler OpenAI returns a working handler", () => { + const handler = getFormatHandler("openai_chat"); + expect(handler.targetPath()).toBe("/v1/chat/completions"); + + const body = structuredClone(OPENAI_CHAT_BODY); + const all = handler.extractAll(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.hasExtractions).toBe(true); + + if (!all.systemExtraction) { + throw new Error("expected system extraction"); + } + + handler.reconstructSystem( + body, + all.systemExtraction[0], + ["Doc C content here", "Doc B content here", "Doc A content here"], + all.systemExtraction[1], + ); + expect(body.messages[0]?.content).toContain("Doc C content here"); + }); + + it("FormatHandler Anthropic returns a working handler", () => { + const handler = getFormatHandler("anthropic_messages"); + expect(handler.targetPath()).toBe("/v1/messages"); + + const body = structuredClone(ANTHROPIC_MESSAGES_BODY); + const all = handler.extractAll(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.hasExtractions).toBe(true); + + if (!all.systemExtraction) { + throw new Error("expected system extraction"); + } + + handler.reconstructSystem( + body, + all.systemExtraction[0], + ["Doc B content here", "Doc A content here"], + all.systemExtraction[1], + ); + expect(body.system).toContain("Doc B content here"); + }); + + it("extractAllOpenai extracts from both system and tool results", () => { + const body = { + messages: [ + { + role: "system", + content: + "Sys ASys B", + }, + { + role: "tool", + content: + "Tool ATool B", + }, + ], + }; + + const all = extractAllOpenai(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.toolExtractions).toHaveLength(1); + expect(all.totalDocuments).toBe(4); + }); + + it("extractSingleDocsFromOpenaiToolResults extracts single long docs", () => { + const body = { + messages: [ + { + role: "tool", + tool_call_id: "call_99", + content: `Result:\n${"r".repeat(240)}`, + }, + ], + }; + + const extracted = extractSingleDocsFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extracted).toHaveLength(1); + expect(extracted[0]?.[0].toolCallId).toBe("call_99"); + expect(extracted[0]?.[0].content.length).toBeGreaterThanOrEqual(200); + expect(extracted[0]?.[0].contentHash).toMatch(/^[0-9a-f]{64}$/); + }); + + it("reconstructOpenaiToolResult reconstructs a tool result in-place", () => { + const body = { + messages: [ + { + role: "tool", + content: + "AB", + }, + ], + }; + + const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + const first = extractions[0]; + if (!first) { + throw new Error("expected extraction"); + } + + reconstructOpenaiToolResult(body, first[0], ["B", "A"], first[1]); + expect(body.messages[0]?.content).toContain("B"); + expect(body.messages[0]?.content).toContain("A"); + }); + + it("reconstructAnthropicToolResult reconstructs a tool result in-place", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { + type: "tool_result", + content: + "AB", + }, + ], + }, + ], + }; + + const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + const first = extractions[0]; + if (!first) { + throw new Error("expected extraction"); + } + + reconstructAnthropicToolResult(body, first[0], ["B", "A"], first[1]); + expect(body.messages[0]?.content[0]?.content).toContain("B"); + expect(body.messages[0]?.content[0]?.content).toContain("A"); + }); +}); + +describe("dedup", () => { + it("contentDefinedChunking splits text into multiple blocks at boundaries", () => { + const text = Array.from({ length: 12 }, (_, i) => `line-${i}`).join("\n"); + const blocks = contentDefinedChunking(text, 1); + expect(blocks).toHaveLength(2); + expect(blocks[0]?.split("\n")).toHaveLength(5); + expect(blocks[1]?.split("\n")).toHaveLength(7); + }); + + it("contentDefinedChunking returns one block for short text", () => { + const short = "a\nb\nc\nd\ne"; + const blocks = contentDefinedChunking(short); + expect(blocks).toEqual([short]); + }); + + it("hashBlock is consistent and returns 20-char hex", () => { + const h1 = hashBlock(" abc\n"); + const h2 = hashBlock("abc"); + expect(h1).toBe(h2); + expect(h1).toMatch(/^[0-9a-f]{20}$/); + }); + + it("dedupChatCompletions returns zero savings with no duplicates", () => { + const body = { + messages: [ + { + role: "assistant", + tool_calls: [ + { id: "a", function: { name: "read_file" } }, + { id: "b", function: { name: "read_file" } }, + ], + }, + { role: "tool", tool_call_id: "a", content: makeLargeContent("first") }, + { role: "tool", tool_call_id: "b", content: makeLargeContent("second") }, + ], + }; + + const before = body.messages[2]?.content; + const result = dedupChatCompletions(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBe(0); + expect(result.charsSaved).toBe(0); + expect(body.messages[2]?.content).toBe(before); + }); + + it("dedupChatCompletions dedups duplicate blocks and inserts references", () => { + const body = structuredClone(DEDUP_BODY); + const result = dedupChatCompletions(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + expect(body.messages[2]?.content).toContain( + "identical to earlier read_file result", + ); + }); + + it("dedupChatCompletions skips short content", () => { + const short = "s".repeat(300); + const body = { + messages: [ + { + role: "assistant", + tool_calls: [ + { id: "a", function: { name: "search" } }, + { id: "b", function: { name: "search" } }, + ], + }, + { role: "tool", tool_call_id: "a", content: short }, + { role: "tool", tool_call_id: "b", content: short }, + ], + }; + + const result = dedupChatCompletions(body); + expect(result.blocksTotal).toBe(0); + expect(result.blocksDeduped).toBe(0); + expect(result.charsSaved).toBe(0); + expect(body.messages[2]?.content).toBe(short); + }); + + it("dedupResponsesApi dedups duplicate function_call_output content", () => { + const body = { + input: [ + { type: "function_call", call_id: "r1", name: "search" }, + { type: "function_call", call_id: "r2", name: "search" }, + { type: "function_call_output", call_id: "r1", output: LARGE_CONTENT }, + { type: "function_call_output", call_id: "r2", output: LARGE_CONTENT }, + ], + }; + + const result = dedupResponsesApi(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + expect(body.input[3]?.output).toContain("identical to earlier search result"); + }); + + it("buildToolNameMapOpenai maps tool_call_id to function name", () => { + const mapping = buildToolNameMapOpenai([ + { + role: "assistant", + tool_calls: [ + { id: "id_1", function: { name: "read_file" } }, + { id: "id_2", function: { name: "search" } }, + ], + }, + { role: "user" }, + ]); + + expect(mapping).toEqual({ id_1: "read_file", id_2: "search" }); + }); +}); + +describe("cache-control", () => { + it("injectAnthropicCacheControl converts string system into array with cache_control", () => { + const body: Record = { system: "system text", messages: [] }; + const result = injectAnthropicCacheControl(body); + + const system = result.system as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + expect(Array.isArray(system)).toBe(true); + expect(system[0]).toEqual({ + type: "text", + text: "system text", + cache_control: { type: "ephemeral" }, + }); + }); + + it("injectAnthropicCacheControl adds cache_control to last system block", () => { + const body: Record = { + system: [ + { type: "text", text: "first" }, + { type: "text", text: "last" }, + ], + messages: [], + }; + const result = injectAnthropicCacheControl(body); + const system = result.system as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + + expect(system[0]?.cache_control).toBeUndefined(); + expect(system[1]?.cache_control).toEqual({ type: "ephemeral" }); + }); + + it("injectAnthropicCacheControl adds cache_control to large tool_result blocks", () => { + const body: Record = { + messages: [ + { + role: "user", + content: [ + { type: "tool_result", content: "x".repeat(1200) }, + { + type: "tool_result", + content: [ + { type: "text", text: "a".repeat(800) }, + { type: "text", text: "b".repeat(300) }, + ], + }, + ], + }, + ], + }; + + const result = injectAnthropicCacheControl(body); + const messages = result.messages as Array<{ + role?: string; + content?: Array<{ + type?: string; + content?: string | Array<{ type?: string; text?: string; cache_control?: { type: string } }>; + cache_control?: { type: string }; + }>; + }>; + + const firstToolResult = messages[0]?.content?.[0]; + const secondToolResult = messages[0]?.content?.[1]; + const secondInner = secondToolResult?.content as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + + expect(firstToolResult?.cache_control).toEqual({ type: "ephemeral" }); + expect(secondInner[0]?.cache_control).toBeUndefined(); + expect(secondInner[1]?.cache_control).toEqual({ type: "ephemeral" }); + }); + + it("injectAnthropicCacheControl does not mutate original body", () => { + const body: Record = { + system: "immutable", + messages: [{ role: "user", content: [] }], + }; + const snapshot = structuredClone(body); + const result = injectAnthropicCacheControl(body); + + expect(body).toEqual(snapshot); + expect(result).not.toBe(body); + }); + + it("injectOpenAICacheControl is a no-op", () => { + const body: Record = { + messages: [{ role: "system", content: "keep" }], + }; + const result = injectOpenAICacheControl(body); + expect(result).toBe(body); + }); + + it("injectCacheControl dispatches by provider", () => { + const anthropicBody: Record = { system: "hello", messages: [] }; + const openaiBody: Record = { messages: [] }; + + const anthropicResult = injectCacheControl(anthropicBody, "anthropic"); + const openaiResult = injectCacheControl(openaiBody, "openai"); + + expect(anthropicResult).not.toBe(anthropicBody); + expect(Array.isArray(anthropicResult.system)).toBe(true); + expect(openaiResult).toBe(openaiBody); + }); +}); + +describe("reorder", () => { + it("ReorderState first call matches deterministic hash sort", () => { + const docs = ["Doc C", "Doc A", "Doc B"]; + const state = new ReorderState(); + const [stateOrder] = state.reorder(docs); + const [statelessOrder] = reorderDocuments(docs); + expect(stateOrder).toEqual(statelessOrder); + }); + + it("ReorderState second call keeps known order and appends new docs", () => { + const state = new ReorderState(); + const [first] = state.reorder(["alpha", "beta", "gamma"]); + const [second] = state.reorder(["gamma", "alpha", "delta"]); + + const knownOrder = first.filter((doc) => doc === "gamma" || doc === "alpha"); + expect(second.slice(0, knownOrder.length)).toEqual(knownOrder); + expect(second[second.length - 1]).toBe("delta"); + }); + + it("ReorderState reset restores first-call behavior", () => { + const docs = ["alpha", "beta", "gamma"]; + const state = new ReorderState(); + + state.reorder(docs); + state.reorder(["gamma", "alpha", "delta"]); + state.reset(); + + const [afterReset] = state.reorder(docs); + const [expected] = reorderDocuments(docs); + expect(afterReset).toEqual(expected); + }); + + it("reorderDocuments is deterministic and stateless", () => { + const docs = ["one", "two", "three", "four"]; + const first = reorderDocuments(docs); + const second = reorderDocuments(docs); + expect(first).toEqual(second); + }); + + it("reorderDocuments returns correct originalOrder and newOrder mappings", () => { + const docs = ["one", "two", "three", "four"]; + const [reordered, originalOrder, newOrder] = reorderDocuments(docs); + + expect(originalOrder).toHaveLength(docs.length); + expect(newOrder).toHaveLength(docs.length); + + for (let newIndex = 0; newIndex < reordered.length; newIndex += 1) { + const originalIndex = originalOrder[newIndex]; + expect(reordered[newIndex]).toBe(docs[originalIndex]); + } + + for (let originalIndex = 0; originalIndex < docs.length; originalIndex += 1) { + const mappedNewIndex = newOrder[originalIndex]; + expect(reordered[mappedNewIndex]).toBe(docs[originalIndex]); + } + }); + + it("ReorderState preserves known-doc prefix stability across calls", () => { + const state = new ReorderState(); + const knownDocs = ["alpha", "beta", "gamma"]; + + const [first] = state.reorder(knownDocs); + const [second] = state.reorder(["gamma", "beta", "alpha", "delta"]); + const [third] = state.reorder(["alpha", "epsilon", "gamma", "beta", "zeta"]); + + const knownPrefix = first.filter((doc) => + knownDocs.includes(doc), + ); + + expect(second.slice(0, knownPrefix.length)).toEqual(knownPrefix); + expect(third.slice(0, knownPrefix.length)).toEqual(knownPrefix); + }); +}); From 08e5d3badad7393222c16283067865186de79324 Mon Sep 17 00:00:00 2001 From: SecretSettler Date: Wed, 25 Mar 2026 15:26:10 +0000 Subject: [PATCH 3/8] test: add E2E integration tests for full optimization pipeline (16 tests) Tests simulate the exact wrapStreamFn pipeline end-to-end: - Anthropic: system XML extraction+reorder+cache_control, tool_result cache injection, scope=system/tool_results filtering - OpenAI: system reorder, duplicate tool result dedup with reference hints, responses API dedup, passthrough for plain messages - Multi-turn: ReorderState preserves doc order across turns, reset clears history - Edge cases: empty body, no messages, single doc (no reorder), short content (no dedup), null messages, system as block array Total test suite: 54 tests (38 unit + 16 integration), all pass in 165ms. --- .../src/engine/integration.test.ts | 362 ++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 openclaw-plugin/src/engine/integration.test.ts diff --git a/openclaw-plugin/src/engine/integration.test.ts b/openclaw-plugin/src/engine/integration.test.ts new file mode 100644 index 0000000..74328a6 --- /dev/null +++ b/openclaw-plugin/src/engine/integration.test.ts @@ -0,0 +1,362 @@ +import { describe, it, expect } from "vitest"; +import { getFormatHandler, type InterceptConfig } from "./extract.js"; +import { dedupChatCompletions, dedupResponsesApi } from "./dedup.js"; +import { injectCacheControl } from "./cache-control.js"; +import { ReorderState } from "./reorder.js"; + +function runPipeline( + body: Record, + opts: { + provider?: "anthropic" | "openai"; + scope?: string; + reorderState?: ReorderState; + } = {} +): Record { + const provider = opts.provider ?? "anthropic"; + const scope = opts.scope ?? "all"; + const reorderState = opts.reorderState ?? new ReorderState(); + + const clonedBody = structuredClone(body); + const apiFormat = provider === "anthropic" ? "anthropic_messages" : "openai_chat"; + + const interceptConfig: InterceptConfig = { + enabled: true, + mode: "auto", + tag: "document", + separator: "---", + alpha: 0.001, + linkageMethod: "average", + scope, + }; + + const handler = getFormatHandler(apiFormat); + const multi = handler.extractAll(clonedBody, interceptConfig); + + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructSystem(clonedBody, extraction, reordered, sysIdx); + } + } + + for (const [extraction, location] of multi.toolExtractions) { + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructToolResult(clonedBody, extraction, reordered, location); + } + } + + if (apiFormat === "openai_chat") { + dedupChatCompletions(clonedBody as any); + } + if (clonedBody.input && Array.isArray(clonedBody.input)) { + dedupResponsesApi(clonedBody as any); + } + + return injectCacheControl(clonedBody, provider); +} + +describe("full pipeline — Anthropic", () => { + it("system prompt with XML documents gets reordered and cache-controlled", () => { + const body = { + model: "claude-sonnet-4-6", + system: `You are a helpful assistant.\n\n\nFirst document about TypeScript.\nIt has multiple lines.\n\n\nSecond document about Python.\nAlso multi-line.\n\n\nThird document about Rust.\nYet another multi-line doc.\n\n\nPlease answer based on the above.`, + messages: [{ role: "user", content: "Summarize the documents." }], + }; + + const reorderState = new ReorderState(); + const result = runPipeline(body, { provider: "anthropic", reorderState }); + + expect(Array.isArray(result.system)).toBe(true); + const systemArray = result.system as any[]; + + const lastBlock = systemArray[systemArray.length - 1]; + expect(lastBlock.cache_control).toEqual({ type: "ephemeral" }); + + const textContent = systemArray.map(b => b.text).join(""); + expect(textContent).toContain("You are a helpful assistant."); + expect(textContent).toContain("Please answer based on the above."); + + expect(textContent).toContain("First document about TypeScript."); + expect(textContent).toContain("Second document about Python."); + expect(textContent).toContain("Third document about Rust."); + }); + + it("Anthropic tool_result with large content gets cache_control", () => { + const body = { + model: "claude-sonnet-4-6", + system: "You are helpful.", + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: "A".repeat(2000) }, + ], + }, + ], + }; + + const result = runPipeline(body, { provider: "anthropic" }); + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + expect(content[0].cache_control).toEqual({ type: "ephemeral" }); + }); + + it("Anthropic scope=\"system\" only processes system, not tool results", () => { + const docText = `\nFirst document about TypeScript.\nIt has multiple lines.\n\nSecond document about Python.\nAlso multi-line.\n`; + const body = { + model: "claude-sonnet-4-6", + system: `You are helpful.\n${docText}`, + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: docText }, + ], + }, + ], + }; + + const reorderState = new ReorderState(); + // Reorder stability means it will process it + const result = runPipeline(body, { provider: "anthropic", scope: "system", reorderState }); + + // System should have its format modified to array due to reconstruction/cache control + expect(Array.isArray(result.system)).toBe(true); + + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + // Tool result shouldn't have been reconstructed into blocks of its internal documents + expect(content[0].content).toBe(docText); + }); + + it("Anthropic scope=\"tool_results\" only processes tools, not system", () => { + const docText = `\nFirst document about TypeScript.\nIt has multiple lines.\n\nSecond document about Python.\nAlso multi-line.\n`; + const body = { + model: "claude-sonnet-4-6", + system: `You are helpful.\n${docText}`, + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: docText }, + ], + }, + ], + }; + + const reorderState = new ReorderState(); + const result = runPipeline(body, { provider: "anthropic", scope: "tool_results", reorderState }); + + // System should not be processed for documents (though it may be arrayified for cache control) + // Cache control injects string to array conversion for Anthropic system if needed + if (Array.isArray(result.system)) { + const textContent = (result.system as any[]).map(b => b.text).join(""); + expect(textContent).toBe(`You are helpful.\n${docText}`); + } else { + expect(result.system).toBe(`You are helpful.\n${docText}`); + } + + // Tool results should be reconstructed/reordered + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + expect(typeof content[0].content).toBe("string"); + expect(content[0].content).toContain("First document about TypeScript."); + expect(content[0].content).toContain("Second document about Python."); + }); +}); + +describe("full pipeline — OpenAI", () => { + it("OpenAI chat system message with XML documents gets reordered", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C content" }, + { role: "user", content: "Hello" } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + const sysMsg = msgs[0].content; + expect(sysMsg).toContain("Doc A content"); + expect(sysMsg).toContain("Doc B content"); + expect(sysMsg).toContain("Doc C content"); + }); + + it("OpenAI chat with duplicate tool results gets deduped", () => { + const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n"); + const body = { + model: "gpt-4o", + messages: [ + { role: "assistant", content: null, tool_calls: [ + { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } } + ]}, + { role: "tool", tool_call_id: "call_1", content: sharedContent }, + { role: "tool", tool_call_id: "call_2", content: sharedContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + + expect(msgs[1].content).toBe(sharedContent); + expect(msgs[2].content).not.toBe(sharedContent); + expect(msgs[2].content).toContain("identical to earlier read_file result"); + }); + + it("OpenAI body with no extractable docs passes through unchanged", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "You are helpful." }, + { role: "user", content: "Hi" } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("OpenAI responses API format gets deduped", () => { + const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n"); + const body = { + input: [ + { type: "function_call_output", call_id: "c1", output: sharedContent }, + { type: "function_call_output", call_id: "c2", output: sharedContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const input = result.input as any[]; + + expect(input[0].output).toBe(sharedContent); + expect(input[1].output).not.toBe(sharedContent); + expect(input[1].output).toContain("identical"); + }); +}); + +describe("multi-turn state — reorder stability", () => { + it("reorder state preserves doc order across turns", () => { + const reorderState = new ReorderState(); + + const bodyTurn1 = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C content" } + ] + }; + + runPipeline(bodyTurn1, { provider: "openai", reorderState }); + + const bodyTurn2 = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C contentDoc D content" } + ] + }; + + const res2 = runPipeline(bodyTurn2, { provider: "openai", reorderState }); + const sysMsg2 = (res2.messages as any[])[0].content; + + // In multi-turn, ReorderState should put the new item (D) at top, and preserve relative ordering of A, B, C. + // We just verify all are present and stable. + expect(sysMsg2).toContain("Doc A content"); + expect(sysMsg2).toContain("Doc B content"); + expect(sysMsg2).toContain("Doc C content"); + expect(sysMsg2).toContain("Doc D content"); + }); + + it("reorder state reset clears history", () => { + const reorderState = new ReorderState(); + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B content" } + ] + }; + + runPipeline(body, { provider: "openai", reorderState }); + + reorderState.reset(); + + const res2 = runPipeline(body, { provider: "openai", reorderState }); + const sysMsg2 = (res2.messages as any[])[0].content; + + expect(sysMsg2).toContain("Doc A content"); + expect(sysMsg2).toContain("Doc B content"); + }); +}); + +describe("edge cases", () => { + it("empty body passes through", () => { + const result = runPipeline({}, { provider: "anthropic" }); + expect(result).toEqual({}); + }); + + it("body with no messages passes through", () => { + const body = { model: "gpt-4o" }; + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("body with single document doesn't get reordered", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Only Doc" } + ] + }; + const result = runPipeline(body, { provider: "openai" }); + // It should be unchanged + expect(result).toEqual(body); + }); + + it("very short tool result content not deduped", () => { + const shortContent = "Too short for dedup."; + const body = { + model: "gpt-4o", + messages: [ + { role: "assistant", content: null, tool_calls: [ + { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } } + ]}, + { role: "tool", tool_call_id: "call_1", content: shortContent }, + { role: "tool", tool_call_id: "call_2", content: shortContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + expect(msgs[1].content).toBe(shortContent); + expect(msgs[2].content).toBe(shortContent); + }); + + it("null/undefined messages gracefully handled", () => { + const body = { model: "gpt-4o", messages: null }; + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("Anthropic body with system as content block array", () => { + const body = { + model: "claude-sonnet-4-6", + system: [ + { type: "text", text: "AB" } + ], + messages: [{ role: "user", content: "hi" }] + }; + + const result = runPipeline(body, { provider: "anthropic" }); + const sys = result.system as any[]; + expect(Array.isArray(sys)).toBe(true); + // Last block should have cache_control + expect(sys[sys.length - 1].cache_control).toEqual({ type: "ephemeral" }); + + const fullText = sys.map(b => b.text).join(""); + expect(fullText).toContain("A"); + expect(fullText).toContain("B"); + }); +}); From 64e9d9053edd2b0e370444b91797e3334b69faa7 Mon Sep 17 00:00:00 2001 From: SecretSettler Date: Thu, 26 Mar 2026 13:37:19 +0000 Subject: [PATCH 4/8] feat: complete ContextPilot engine port to TypeScript (6145 lines) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full port of all Python ContextPilot modules to zero-dependency TypeScript: Core index (replaces numpy+scipy): - tree-nodes.ts: ClusterNode + NodeManager (334 lines) - compute-distance.ts: O(n) merge-based distance computation (224 lines) - index-construction.ts: hierarchical clustering with pure-TS linkage() (348 lines) - intra-ordering.ts: within-context reordering via tree prefix (349 lines) - inter-scheduler.ts: cross-context scheduling via path grouping (116 lines) Live engine: - live-index.ts: ContextPilot class — search/insert/evict/reorder (1232 lines) - metadata.ts: NodeMetadata for per-node runtime tracking (82 lines) - eviction-heap.ts: LRU min-heap for SGLang cache eviction sync (317 lines) - conversation-tracker.ts: multi-turn document deduplication (241 lines) - http-client.ts: native fetch() client for index server comms (267 lines) Previously ported: - extract.ts: document extraction from system/tool_results (969 lines) - dedup.ts: cross-turn block-level content dedup (355 lines) - cache-control.ts: Anthropic cache_control injection (144 lines) - reorder.ts: simplified LCP reorder fallback (109 lines) --- .../src/engine/compute-distance.ts | 224 +++ .../src/engine/conversation-tracker.ts | 241 ++++ openclaw-plugin/src/engine/eviction-heap.ts | 317 +++++ openclaw-plugin/src/engine/http-client.ts | 267 ++++ .../src/engine/index-construction.ts | 348 +++++ openclaw-plugin/src/engine/inter-scheduler.ts | 116 ++ openclaw-plugin/src/engine/intra-ordering.ts | 349 +++++ openclaw-plugin/src/engine/live-index.ts | 1232 +++++++++++++++++ openclaw-plugin/src/engine/metadata.ts | 82 ++ openclaw-plugin/src/engine/tree-nodes.ts | 334 +++++ 10 files changed, 3510 insertions(+) create mode 100644 openclaw-plugin/src/engine/compute-distance.ts create mode 100644 openclaw-plugin/src/engine/conversation-tracker.ts create mode 100644 openclaw-plugin/src/engine/eviction-heap.ts create mode 100644 openclaw-plugin/src/engine/http-client.ts create mode 100644 openclaw-plugin/src/engine/index-construction.ts create mode 100644 openclaw-plugin/src/engine/inter-scheduler.ts create mode 100644 openclaw-plugin/src/engine/intra-ordering.ts create mode 100644 openclaw-plugin/src/engine/live-index.ts create mode 100644 openclaw-plugin/src/engine/metadata.ts create mode 100644 openclaw-plugin/src/engine/tree-nodes.ts diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts new file mode 100644 index 0000000..8aad2d5 --- /dev/null +++ b/openclaw-plugin/src/engine/compute-distance.ts @@ -0,0 +1,224 @@ +export interface PreparedContextsCpu { + chunkIds: number[]; + originalPositions: number[]; + lengths: number[]; + offsets: number[]; +} + +export function computeDistanceSingle( + contextA: number[], + contextB: number[], + alpha: number = 0.001 +): number { + if (contextA.length === 0 || contextB.length === 0) { + return 1.0; + } + + const posA = new Map(); + const posB = new Map(); + + for (let pos = 0; pos < contextA.length; pos += 1) { + posA.set(contextA[pos], pos); + } + for (let pos = 0; pos < contextB.length; pos += 1) { + posB.set(contextB[pos], pos); + } + + let intersectionSize = 0; + let positionDiffSum = 0; + + for (const [docId, aPos] of posA) { + const bPos = posB.get(docId); + if (bPos === undefined) { + continue; + } + + intersectionSize += 1; + positionDiffSum += Math.abs(aPos - bPos); + } + + if (intersectionSize === 0) { + return 1.0; + } + + const maxSize = Math.max(contextA.length, contextB.length); + const overlapTerm = 1.0 - intersectionSize / maxSize; + const positionTerm = alpha * (positionDiffSum / intersectionSize); + + return overlapTerm + positionTerm; +} + +export function computeDistancesBatch( + queries: number[][], + targets: number[][], + alpha: number = 0.001 +): number[][] { + const nQueries = queries.length; + const nTargets = targets.length; + + if (nQueries === 0 || nTargets === 0) { + return Array.from({ length: nQueries }, () => new Array(nTargets).fill(0)); + } + + const totalPairs = nQueries * nTargets; + const distances: number[][] = Array.from( + { length: nQueries }, + () => new Array(nTargets).fill(1.0) + ); + + if (totalPairs < 1000) { + for (let i = 0; i < nQueries; i += 1) { + for (let j = 0; j < nTargets; j += 1) { + distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha); + } + } + return distances; + } + + for (let i = 0; i < nQueries; i += 1) { + for (let j = 0; j < nTargets; j += 1) { + distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha); + } + } + + return distances; +} + +export function prepareContextsForCpu(contexts: number[][]): PreparedContextsCpu { + const n = contexts.length; + const sortedData: Array> = new Array(n); + const lengths: number[] = new Array(n).fill(0); + + for (let idx = 0; idx < n; idx += 1) { + const ctx = contexts[idx]; + if (ctx.length === 0) { + sortedData[idx] = []; + lengths[idx] = 0; + continue; + } + + const pairs: Array<[number, number]> = new Array(ctx.length); + for (let origPos = 0; origPos < ctx.length; origPos += 1) { + pairs[origPos] = [ctx[origPos], origPos]; + } + pairs.sort((a, b) => a[0] - b[0]); + + sortedData[idx] = pairs; + lengths[idx] = pairs.length; + } + + const offsets: number[] = new Array(n + 1).fill(0); + for (let i = 0; i < n; i += 1) { + offsets[i + 1] = offsets[i] + lengths[i]; + } + + const totalElements = offsets[n]; + const chunkIds: number[] = new Array(totalElements).fill(0); + const originalPositions: number[] = new Array(totalElements).fill(0); + + for (let i = 0; i < n; i += 1) { + const pairs = sortedData[i]; + const start = offsets[i]; + for (let j = 0; j < pairs.length; j += 1) { + const [chunkId, origPos] = pairs[j]; + chunkIds[start + j] = chunkId; + originalPositions[start + j] = origPos; + } + } + + return { + chunkIds, + originalPositions, + lengths, + offsets + }; +} + +export function computeDistanceOptimized( + chunkIds: number[], + originalPositions: number[], + lengths: number[], + offsets: number[], + i: number, + j: number, + alpha: number +): number { + const lenI = lengths[i]; + const lenJ = lengths[j]; + + if (lenI === 0 || lenJ === 0) { + return 1.0; + } + + const offsetI = offsets[i]; + const offsetJ = offsets[j]; + const endI = offsetI + lenI; + const endJ = offsetJ + lenJ; + + let intersectionSize = 0; + let positionDiffSum = 0; + + let pi = offsetI; + let pj = offsetJ; + + while (pi < endI && pj < endJ) { + const chunkI = chunkIds[pi]; + const chunkJ = chunkIds[pj]; + + if (chunkI === chunkJ) { + intersectionSize += 1; + positionDiffSum += Math.abs(originalPositions[pi] - originalPositions[pj]); + pi += 1; + pj += 1; + } else if (chunkI < chunkJ) { + pi += 1; + } else { + pj += 1; + } + } + + const maxSize = Math.max(lenI, lenJ); + const overlapTerm = 1.0 - intersectionSize / maxSize; + + let positionTerm = 0.0; + if (intersectionSize !== 0) { + const avgPosDiff = positionDiffSum / intersectionSize; + positionTerm = alpha * avgPosDiff; + } + + return overlapTerm + positionTerm; +} + +export function computeDistanceMatrixCpu( + contexts: number[][], + alpha: number = 0.001 +): Float64Array { + const n = contexts.length; + const numPairs = (n * (n - 1)) / 2; + + if (numPairs === 0) { + return new Float64Array(0); + } + + const { chunkIds, originalPositions, lengths, offsets } = prepareContextsForCpu(contexts); + const condensedDistances = new Float64Array(numPairs); + + for (let i = 0; i < n; i += 1) { + for (let j = i + 1; j < n; j += 1) { + const dist = computeDistanceOptimized( + chunkIds, + originalPositions, + lengths, + offsets, + i, + j, + alpha + ); + + const condensedIdx = n * i - (i * (i + 1)) / 2 + j - i - 1; + condensedDistances[condensedIdx] = dist; + } + } + + return condensedDistances; +} diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts new file mode 100644 index 0000000..845ee68 --- /dev/null +++ b/openclaw-plugin/src/engine/conversation-tracker.ts @@ -0,0 +1,241 @@ +export interface DeduplicationResult { + originalDocs: number[]; + overlappingDocs: number[]; + newDocs: number[]; + referenceHints: string[]; + deduplicatedDocs: number[]; + docSourceTurns: Map; + isNewConversation: boolean; +} + +export interface RequestHistory { + requestId: string; + docs: number[]; + parentRequestId: string | null; + turnNumber: number; + timestamp: number; +} + +export interface ConversationTrackerStats { + totalRequests: number; + totalDedupCalls: number; + totalDocsDeduplicated: number; + activeRequests: number; +} + +export class ConversationTracker { + private _requests: Map; + private _hintTemplate: string; + private _stats: { + totalRequests: number; + totalDedupCalls: number; + totalDocsDeduplicated: number; + }; + + constructor(hintTemplate?: string) { + this._requests = new Map(); + this._hintTemplate = + hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn."; + this._stats = { + totalRequests: 0, + totalDedupCalls: 0, + totalDocsDeduplicated: 0 + }; + } + + registerRequest(requestId: string, docs: number[], parentRequestId?: string | null): RequestHistory { + let turnNumber = 1; + if (parentRequestId && this._requests.has(parentRequestId)) { + const parent = this._requests.get(parentRequestId); + if (parent) { + turnNumber = parent.turnNumber + 1; + } + } + + const history: RequestHistory = { + requestId, + docs: [...docs], + parentRequestId: parentRequestId ?? null, + turnNumber, + timestamp: Date.now() / 1000 + }; + + this._requests.set(requestId, history); + this._stats.totalRequests += 1; + + return history; + } + + getConversationChain(requestId: string): RequestHistory[] { + const chain: RequestHistory[] = []; + let currentId: string | null = requestId; + + while (currentId && this._requests.has(currentId)) { + const history = this._requests.get(currentId); + if (!history) { + break; + } + + chain.push(history); + currentId = history.parentRequestId; + } + + chain.reverse(); + return chain; + } + + getAllPreviousDocs(parentRequestId: string): [Set, Map] { + const allDocs = new Set(); + const docSources = new Map(); + + const chain = this.getConversationChain(parentRequestId); + + for (const history of chain) { + for (const docId of history.docs) { + if (!allDocs.has(docId)) { + allDocs.add(docId); + docSources.set(docId, history.requestId); + } + } + } + + return [allDocs, docSources]; + } + + deduplicate( + requestId: string, + docs: number[], + parentRequestId?: string | null, + hintTemplate?: string + ): DeduplicationResult { + this._stats.totalDedupCalls += 1; + + if (!parentRequestId || !this._requests.has(parentRequestId)) { + this.registerRequest(requestId, docs, null); + + return { + originalDocs: docs, + overlappingDocs: [], + newDocs: docs, + referenceHints: [], + deduplicatedDocs: docs, + docSourceTurns: new Map(), + isNewConversation: true + }; + } + + const [previousDocs, docSources] = this.getAllPreviousDocs(parentRequestId); + + const overlappingDocs: number[] = []; + const newDocs: number[] = []; + const docSourceTurns = new Map(); + + for (const docId of docs) { + if (previousDocs.has(docId)) { + overlappingDocs.push(docId); + const sourceRequestId = docSources.get(docId); + if (sourceRequestId !== undefined) { + docSourceTurns.set(docId, sourceRequestId); + } + } else { + newDocs.push(docId); + } + } + + const template = hintTemplate ?? this._hintTemplate; + const referenceHints: string[] = []; + + for (const docId of overlappingDocs) { + const sourceRequest = docSources.get(docId); + const sourceHistory = sourceRequest ? this._requests.get(sourceRequest) : undefined; + const turnNumber = sourceHistory ? String(sourceHistory.turnNumber) : "previous"; + + const hint = template + .replaceAll("{doc_id}", String(docId)) + .replaceAll("{turn_number}", turnNumber) + .replaceAll("{source_request}", sourceRequest ?? "previous"); + + referenceHints.push(hint); + } + + this.registerRequest(requestId, docs, parentRequestId); + this._stats.totalDocsDeduplicated += overlappingDocs.length; + + return { + originalDocs: docs, + overlappingDocs, + newDocs, + referenceHints, + deduplicatedDocs: newDocs, + docSourceTurns, + isNewConversation: false + }; + } + + deduplicateBatch( + requestIds: string[], + docsList: number[][], + parentRequestIds?: Array, + hintTemplate?: string + ): DeduplicationResult[] { + const effectiveParentRequestIds = + parentRequestIds ?? new Array(requestIds.length).fill(null); + + const results: DeduplicationResult[] = []; + const n = Math.min(requestIds.length, docsList.length, effectiveParentRequestIds.length); + + for (let i = 0; i < n; i += 1) { + const result = this.deduplicate( + requestIds[i], + docsList[i], + effectiveParentRequestIds[i], + hintTemplate + ); + results.push(result); + } + + return results; + } + + removeRequest(requestId: string): boolean { + if (this._requests.has(requestId)) { + this._requests.delete(requestId); + return true; + } + + return false; + } + + clearConversation(requestId: string): number { + const chain = this.getConversationChain(requestId); + let count = 0; + + for (const history of chain) { + if (this.removeRequest(history.requestId)) { + count += 1; + } + } + + return count; + } + + reset(): void { + this._requests.clear(); + this._stats = { + totalRequests: 0, + totalDedupCalls: 0, + totalDocsDeduplicated: 0 + }; + } + + getStats(): ConversationTrackerStats { + return { + ...this._stats, + activeRequests: this._requests.size + }; + } + + getRequestHistory(requestId: string): RequestHistory | null { + return this._requests.get(requestId) ?? null; + } +} diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts new file mode 100644 index 0000000..69de6ff --- /dev/null +++ b/openclaw-plugin/src/engine/eviction-heap.ts @@ -0,0 +1,317 @@ +import type { NodeMetadata } from "./metadata.js"; + +type HeapEntry = [number, number]; + +export interface EvictionHeapStats { + size: number; + total_tokens: number; + max_tokens: number | null; + utilization_pct: number; + avg_tokens_per_node: number; + oldest_access_time: number | null; + newest_access_time: number | null; + num_requests: number; +} + +export class EvictionHeap { + private _heap: HeapEntry[]; + private _metadata: Map; + private _requestToNode: Map; + private _inHeap: Map; + private _maxTokens: number | null; + private _totalTokens: number; + + constructor(maxTokens?: number | null) { + this._heap = []; + this._metadata = new Map(); + this._requestToNode = new Map(); + this._inHeap = new Map(); + this._maxTokens = maxTokens ?? null; + this._totalTokens = 0; + } + + get maxTokens(): number | null { + return this._maxTokens; + } + + set maxTokens(value: number | null) { + this._maxTokens = value; + } + + private _compare(a: HeapEntry, b: HeapEntry): number { + if (a[0] !== b[0]) { + return a[0] - b[0]; + } + return a[1] - b[1]; + } + + private _swap(i: number, j: number): void { + const tmp = this._heap[i]; + this._heap[i] = this._heap[j]; + this._heap[j] = tmp; + } + + private _siftUp(index: number): void { + let current = index; + + while (current > 0) { + const parent = Math.floor((current - 1) / 2); + if (this._compare(this._heap[current], this._heap[parent]) >= 0) { + break; + } + + this._swap(current, parent); + current = parent; + } + } + + private _siftDown(index: number): void { + const n = this._heap.length; + let current = index; + + while (true) { + const left = 2 * current + 1; + const right = 2 * current + 2; + let smallest = current; + + if (left < n && this._compare(this._heap[left], this._heap[smallest]) < 0) { + smallest = left; + } + + if (right < n && this._compare(this._heap[right], this._heap[smallest]) < 0) { + smallest = right; + } + + if (smallest === current) { + break; + } + + this._swap(current, smallest); + current = smallest; + } + } + + private _heapPush(entry: HeapEntry): void { + this._heap.push(entry); + this._siftUp(this._heap.length - 1); + } + + private _heapPop(): HeapEntry | null { + if (this._heap.length === 0) { + return null; + } + + if (this._heap.length === 1) { + return this._heap.pop() ?? null; + } + + const min = this._heap[0]; + const last = this._heap.pop(); + if (last !== undefined) { + this._heap[0] = last; + this._siftDown(0); + } + return min; + } + + push(metadata: NodeMetadata): void { + const nodeId = metadata.nodeId; + + if (this._inHeap.get(nodeId) === true) { + const oldMetadata = this._metadata.get(nodeId); + if (oldMetadata) { + this._totalTokens += metadata.extraTokens - oldMetadata.extraTokens; + } + this._metadata.set(nodeId, metadata); + this.updateAccessTime(nodeId, metadata.lastAccessTime); + return; + } + + this._heapPush([metadata.lastAccessTime, nodeId]); + this._metadata.set(nodeId, metadata); + this._inHeap.set(nodeId, true); + this._totalTokens += metadata.extraTokens; + + if (metadata.requestId) { + this._requestToNode.set(metadata.requestId, nodeId); + } + } + + pop(): NodeMetadata | null { + while (this._heap.length > 0) { + const entry = this._heapPop(); + if (entry === null) { + return null; + } + + const [accessTime, nodeId] = entry; + + if (!this._metadata.has(nodeId)) { + continue; + } + + const metadata = this._metadata.get(nodeId); + if (!metadata) { + continue; + } + + if (metadata.lastAccessTime === accessTime) { + this._inHeap.set(nodeId, false); + this._totalTokens -= metadata.extraTokens; + return metadata; + } + } + + return null; + } + + peek(): NodeMetadata | null { + while (this._heap.length > 0) { + const [accessTime, nodeId] = this._heap[0]; + + if (!this._metadata.has(nodeId)) { + this._heapPop(); + continue; + } + + const metadata = this._metadata.get(nodeId); + if (!metadata) { + this._heapPop(); + continue; + } + + if (metadata.lastAccessTime === accessTime) { + return metadata; + } + + this._heapPop(); + } + + return null; + } + + updateAccessTime(nodeId: number, newTime?: number): void { + const metadata = this._metadata.get(nodeId); + if (!metadata) { + return; + } + + const effectiveTime = newTime ?? Date.now() / 1000; + metadata.lastAccessTime = effectiveTime; + + this._heapPush([effectiveTime, nodeId]); + } + + remove(nodeId: number): void { + const metadata = this._metadata.get(nodeId); + + if (metadata) { + this._totalTokens -= metadata.extraTokens; + + if (metadata.requestId) { + this._requestToNode.delete(metadata.requestId); + } + + this._metadata.delete(nodeId); + } + + if (this._inHeap.has(nodeId)) { + this._inHeap.set(nodeId, false); + } + } + + getNodeByRequestId(requestId: string): NodeMetadata | null { + const nodeId = this._requestToNode.get(requestId); + if (nodeId !== undefined) { + return this._metadata.get(nodeId) ?? null; + } + return null; + } + + updateTokensForRequest(requestId: string, inputTokens: number, outputTokens: number): boolean { + const metadata = this.getNodeByRequestId(requestId); + if (metadata === null) { + return false; + } + + const oldTokens = metadata.totalTokens; + const totalNew = inputTokens + outputTokens; + const delta = totalNew - oldTokens; + + metadata.totalTokens = totalNew; + metadata.extraTokens = Math.max(0, metadata.extraTokens + delta); + metadata.updateAccessTime(); + + this._totalTokens += delta; + this._heapPush([metadata.lastAccessTime, metadata.nodeId]); + + return true; + } + + needsEviction(): boolean { + if (this._maxTokens === null) { + return false; + } + return this._totalTokens > this._maxTokens; + } + + tokensToEvict(): number { + if (this._maxTokens === null || this._totalTokens <= this._maxTokens) { + return 0; + } + return this._totalTokens - this._maxTokens; + } + + getMetadata(nodeId: number): NodeMetadata | null { + return this._metadata.get(nodeId) ?? null; + } + + isEmpty(): boolean { + return this.peek() === null; + } + + size(): number { + return this._metadata.size; + } + + totalTokens(): number { + return this._totalTokens; + } + + getAllRequestIds(): Set { + return new Set(this._requestToNode.keys()); + } + + getStats(): EvictionHeapStats { + if (this._metadata.size === 0) { + return { + size: 0, + total_tokens: 0, + max_tokens: this._maxTokens, + utilization_pct: 0, + avg_tokens_per_node: 0, + oldest_access_time: null, + newest_access_time: null, + num_requests: 0 + }; + } + + const accessTimes = Array.from(this._metadata.values(), (m) => m.lastAccessTime); + const utilization = this._maxTokens ? (this._totalTokens / this._maxTokens) * 100 : 0; + + return { + size: this._metadata.size, + total_tokens: this._totalTokens, + max_tokens: this._maxTokens, + utilization_pct: utilization, + avg_tokens_per_node: this._totalTokens / this._metadata.size, + oldest_access_time: Math.min(...accessTimes), + newest_access_time: Math.max(...accessTimes), + num_requests: this._requestToNode.size + }; + } + + toString(): string { + return `EvictionHeap(size=${this._metadata.size}, total_tokens=${this._totalTokens}, max_tokens=${this._maxTokens})`; + } +} diff --git a/openclaw-plugin/src/engine/http-client.ts b/openclaw-plugin/src/engine/http-client.ts new file mode 100644 index 0000000..1166785 --- /dev/null +++ b/openclaw-plugin/src/engine/http-client.ts @@ -0,0 +1,267 @@ +type JsonObject = Record; + +function isJsonObject(value: unknown): value is JsonObject { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +async function fetchJson( + url: string, + init: RequestInit, + timeoutMs: number, +): Promise { + try { + const response = await fetch(url, { + ...init, + signal: AbortSignal.timeout(timeoutMs), + }); + + if (!response.ok) { + return null; + } + + const data: unknown = await response.json(); + return isJsonObject(data) ? data : null; + } catch { + return null; + } +} + +export class ContextPilotIndexClient { + private readonly baseUrl: string; + + private readonly timeout: number; + + private readonly retryOnFailure: boolean; + + constructor( + baseUrl: string = "http://localhost:8765", + timeout: number = 1000, + retryOnFailure: boolean = false, + ) { + this.baseUrl = baseUrl.replace(/\/+$/, ""); + this.timeout = timeout; + this.retryOnFailure = retryOnFailure; + } + + private async _post(endpoint: string, jsonData: JsonObject): Promise { + const url = `${this.baseUrl}${endpoint}`; + const attempt = () => + fetchJson( + url, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(jsonData), + }, + this.timeout, + ); + + const result = await attempt(); + if (result !== null || !this.retryOnFailure) { + return result; + } + + return attempt(); + } + + private async _get(endpoint: string): Promise { + const url = `${this.baseUrl}${endpoint}`; + const attempt = () => + fetchJson( + url, + { + method: "GET", + }, + this.timeout, + ); + + const result = await attempt(); + if (result !== null || !this.retryOnFailure) { + return result; + } + + return attempt(); + } + + async evict(requestIds: string[]): Promise { + return this._post("/evict", { request_ids: requestIds }); + } + + async search(context: number[], updateAccess: boolean = true): Promise { + return this._post("/search", { + context, + update_access: updateAccess, + }); + } + + async updateNode(searchPath: number[], tokenDelta: number): Promise { + return this._post("/update", { + search_path: searchPath, + token_delta: tokenDelta, + }); + } + + async insert( + context: number[], + searchPath: number[], + totalTokens: number = 0, + ): Promise { + return this._post("/insert", { + context, + search_path: searchPath, + total_tokens: totalTokens, + }); + } + + async reorder( + contexts: Array>, + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + initialTokensPerContext: number = 0, + deduplicate: boolean = false, + parentRequestIds?: Array, + hintTemplate?: string, + ): Promise<[Array>, number[]] | null> { + const result = await this.reorderRaw( + contexts, + alpha, + useGpu, + linkageMethod, + initialTokensPerContext, + deduplicate, + parentRequestIds, + hintTemplate, + ); + + if (result === null) { + return null; + } + + const reorderedContexts = result.reordered_contexts; + const originalIndices = result.original_indices; + + if (!Array.isArray(reorderedContexts) || !Array.isArray(originalIndices)) { + return null; + } + + if (!originalIndices.every((index) => typeof index === "number")) { + return null; + } + + return [reorderedContexts as Array>, originalIndices as number[]]; + } + + async reorderRaw( + contexts: Array>, + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + initialTokensPerContext: number = 0, + deduplicate: boolean = false, + parentRequestIds?: Array, + hintTemplate?: string, + ): Promise { + const payload: JsonObject = { + contexts, + alpha, + use_gpu: useGpu, + linkage_method: linkageMethod, + initial_tokens_per_context: initialTokensPerContext, + deduplicate, + }; + + if (parentRequestIds !== undefined) { + payload.parent_request_ids = parentRequestIds; + } + + if (hintTemplate !== undefined) { + payload.hint_template = hintTemplate; + } + + return this._post("/reorder", payload); + } + + async deduplicate( + contexts: number[][], + parentRequestIds: Array, + hintTemplate?: string, + ): Promise { + const payload: JsonObject = { + contexts, + parent_request_ids: parentRequestIds, + }; + + if (hintTemplate !== undefined) { + payload.hint_template = hintTemplate; + } + + return this._post("/deduplicate", payload); + } + + async reset(): Promise { + return this._post("/reset", {}); + } + + async getRequests(): Promise { + return this._get("/requests"); + } + + async getStats(): Promise { + return this._get("/stats"); + } + + async health(): Promise { + return this._get("/health"); + } + + async isReady(): Promise { + const health = await this.health(); + return health !== null && health.status === "ready"; + } +} + +export async function evictRequests( + requestIds: string[], + serverUrl: string = "http://localhost:8765", +): Promise { + return fetchJson( + `${serverUrl.replace(/\/+$/, "")}/evict`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ request_ids: requestIds }), + }, + 1000, + ); +} + +export async function scheduleBatch( + contexts: number[][], + serverUrl: string = "http://localhost:8765", + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + timeout: number = 30000, +): Promise { + return fetchJson( + `${serverUrl.replace(/\/+$/, "")}/reorder`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + contexts, + alpha, + use_gpu: useGpu, + linkage_method: linkageMethod, + }), + }, + timeout, + ); +} diff --git a/openclaw-plugin/src/engine/index-construction.ts b/openclaw-plugin/src/engine/index-construction.ts new file mode 100644 index 0000000..a0f28d5 --- /dev/null +++ b/openclaw-plugin/src/engine/index-construction.ts @@ -0,0 +1,348 @@ +import { ClusterNode, NodeManager, NodeStats } from './tree-nodes.js'; +import { IntraContextOrderer } from './intra-ordering.js'; +import { computeDistanceMatrixCpu } from './compute-distance.js'; + +export function linkage( + condensedDistances: Float64Array, + n: number, + method: "single" | "complete" | "average" = "average" +): number[][] { + const dist: number[][] = Array.from({length: n}, () => new Array(n).fill(Infinity)); + for (let i = 0; i < n; i++) dist[i][i] = 0; + for (let i = 0; i < n; i++) { + for (let j = i + 1; j < n; j++) { + const idx = n * i - (i * (i + 1)) / 2 + j - i - 1; + dist[i][j] = condensedDistances[idx]; + dist[j][i] = condensedDistances[idx]; + } + } + + const active = new Set(Array.from({length: n}, (_, i) => i)); + const sizes = new Array(2 * n - 1).fill(1); + const result: number[][] = []; + + for (let step = 0; step < n - 1; step++) { + let minDist = Infinity; + let minI = -1, minJ = -1; + + for (const i of active) { + for (const j of active) { + if (j <= i) continue; + if (dist[i][j] < minDist) { + minDist = dist[i][j]; + minI = i; + minJ = j; + } + } + } + + const newClusterId = n + step; + const sizeNew = sizes[minI] + sizes[minJ]; + sizes[newClusterId] = sizeNew; + + result.push([minI, minJ, minDist, sizeNew]); + + while (dist.length <= newClusterId) { + dist.push(new Array(dist[0]?.length ?? 0).fill(Infinity)); + } + for (const row of dist) { + while (row.length <= newClusterId) row.push(Infinity); + } + dist[newClusterId][newClusterId] = 0; + + for (const k of active) { + if (k === minI || k === minJ) continue; + let newDist: number; + if (method === "single") { + newDist = Math.min(dist[minI][k], dist[minJ][k]); + } else if (method === "complete") { + newDist = Math.max(dist[minI][k], dist[minJ][k]); + } else { // average (UPGMA) + newDist = (dist[minI][k] * sizes[minI] + dist[minJ][k] * sizes[minJ]) / sizeNew; + } + dist[newClusterId][k] = newDist; + dist[k][newClusterId] = newDist; + } + + active.delete(minI); + active.delete(minJ); + active.add(newClusterId); + } + + return result; +} + +export class IndexResult { + linkageMatrix: number[][]; + clusterNodes: Map; + uniqueNodes: Map; + reorderedContexts: (number[] | string[])[]; + originalContexts: (number[] | string[])[]; + stats: NodeStats; + searchPaths: number[][] | null; + + // Legacy attributes for backward compatibility + reorderedPrompts: (number[] | string[])[]; + originalPrompts: (number[] | string[])[]; + + constructor( + linkageMatrix: number[][], + clusterNodes: Map, + uniqueNodes: Map, + reorderedContexts: (number[] | string[])[], + originalContexts: (number[] | string[])[], + stats: NodeStats, + searchPaths: number[][] | null = null + ) { + this.linkageMatrix = linkageMatrix; + this.clusterNodes = clusterNodes; + this.uniqueNodes = uniqueNodes; + this.reorderedContexts = reorderedContexts; + this.originalContexts = originalContexts; + this.stats = stats; + this.searchPaths = searchPaths; + + this.reorderedPrompts = this.reorderedContexts; + this.originalPrompts = this.originalContexts; + } + + printTree(): void { + console.log("\n--- Unique Cluster Tree Nodes ---"); + const sortedKeys = Array.from(this.uniqueNodes.keys()).sort((a, b) => a - b); + for (const nodeId of sortedKeys) { + const node = this.uniqueNodes.get(nodeId); + if (!node) continue; + console.log(`ClusterNode ${nodeId}`); + console.log(` Content: [${node.docIds.join(', ')}]`); + console.log(` Original indices: [${Array.from(node.originalIndices).sort((a, b) => a - b).join(', ')}]`); + if (node.searchPath && node.searchPath.length > 0) { + const pathStr = "[" + node.searchPath.join("][") + "]"; + console.log(` Search path (child indices from root): ${pathStr}`); + } else { + console.log(` Search path: (root node)`); + } + if (!node.isLeaf) { + console.log(` Children: [${node.children.join(', ')}]`); + console.log(` Merge distance: ${node.mergeDistance.toFixed(4)}`); + } + console.log("-".repeat(40)); + } + } +} + +export interface ContextIndexOptions { + linkageMethod?: "single" | "complete" | "average"; + useGpu?: boolean; + alpha?: number; + numWorkers?: number | null; + batchSize?: number; +} + +export class ContextIndex { + linkageMethod: "single" | "complete" | "average"; + useGpu: boolean; + alpha: number; + numWorkers: number | null; + batchSize: number; + + nodeManager: NodeManager; + contextOrderer: IntraContextOrderer; + + _strToId: Map; + _idToStr: Map; + _nextStrId: number; + _isStringInput: boolean; + + constructor(options: ContextIndexOptions = {}) { + this.linkageMethod = options.linkageMethod || "average"; + this.useGpu = false; + this.alpha = options.alpha !== undefined ? options.alpha : 0.001; + this.numWorkers = options.numWorkers || null; + this.batchSize = options.batchSize || 1000; + + this.nodeManager = new NodeManager(); + this.contextOrderer = new IntraContextOrderer(); + + this._strToId = new Map(); + this._idToStr = new Map(); + this._nextStrId = 0; + this._isStringInput = false; + } + + _convertToInt(contexts: (number[] | string[])[]): number[][] { + if (!contexts || contexts.length === 0 || !contexts[0] || contexts[0].length === 0) { + return contexts as number[][]; + } + if (typeof contexts[0][0] === "string") { + this._isStringInput = true; + const converted: number[][] = []; + for (const ctx of contexts as string[][]) { + const convertedCtx: number[] = []; + for (const item of ctx) { + let sid = this._strToId.get(item); + if (sid === undefined) { + sid = this._nextStrId; + this._strToId.set(item, sid); + this._idToStr.set(sid, item); + this._nextStrId += 1; + } + convertedCtx.push(sid); + } + converted.push(convertedCtx); + } + return converted; + } + return contexts as number[][]; + } + + _convertToStr(contexts: number[][]): string[][] { + if (!this._isStringInput || !contexts || contexts.length === 0) { + return contexts as any; + } + if (contexts[0] && typeof contexts[0][0] === "string") { + return contexts as any; + } + const result: string[][] = []; + for (const ctx of contexts) { + const strCtx: string[] = []; + for (const i of ctx) { + strCtx.push(this._idToStr.get(i) as string); + } + result.push(strCtx); + } + return result; + } + + fitTransform(contexts: (number[] | string[])[]): IndexResult { + const intContexts = this._convertToInt(contexts); + const n = intContexts.length; + + if (n < 2) { + return this._handleSinglePrompt(intContexts); + } + + const condensedDistances = this._computeDistanceMatrix(intContexts); + const linkageMatrix = linkage(condensedDistances, n, this.linkageMethod); + + this._buildTree(intContexts, linkageMatrix); + + this.nodeManager.cleanupEmptyNodes(); + this.nodeManager.updateSearchPaths(); + + const reorderedContexts = this.contextOrderer.reorderContexts( + intContexts, + this.nodeManager.uniqueNodes + ); + + const searchPaths = this.contextOrderer.extractSearchPaths( + this.nodeManager.uniqueNodes, + intContexts.length + ); + + const stats = this.nodeManager.getNodeStats(); + + return new IndexResult( + linkageMatrix, + this.nodeManager.clusterNodes, + this.nodeManager.uniqueNodes, + reorderedContexts, + intContexts, + stats, + searchPaths + ); + } + + _computeDistanceMatrix(contexts: number[][]): Float64Array { + return computeDistanceMatrixCpu(contexts, this.alpha); + } + + _handleSinglePrompt(contexts: number[][]): IndexResult { + for (let i = 0; i < contexts.length; i++) { + const prompt = contexts[i]; + const node = this.nodeManager.createLeafNode(i, prompt); + node.docIds = [...prompt]; + } + + const leafIds = Array.from(this.nodeManager.uniqueNodes.keys()); + const virtualRootId = leafIds.length > 0 ? Math.max(...leafIds) + 1 : 0; + + let freqSum = 0; + for (const nid of leafIds) { + const n = this.nodeManager.uniqueNodes.get(nid); + if (n) freqSum += n.frequency; + } + + const virtualRoot = new ClusterNode( + virtualRootId, + new Set(), + new Set(), + 0.0, + leafIds, + null, + freqSum + ); + this.nodeManager.uniqueNodes.set(virtualRootId, virtualRoot); + + for (const nid of leafIds) { + const n = this.nodeManager.uniqueNodes.get(nid); + if (n) { + n.parent = virtualRootId; + } + } + + this.nodeManager.updateSearchPaths(); + + const searchPaths = this.contextOrderer.extractSearchPaths( + this.nodeManager.uniqueNodes, + contexts.length + ); + + const reorderedContexts = contexts.map(c => [...c]); + + return new IndexResult( + [], + this.nodeManager.clusterNodes, + this.nodeManager.uniqueNodes, + reorderedContexts, + contexts, + this.nodeManager.getNodeStats(), + searchPaths + ); + } + + _buildTree(contexts: number[][], linkageMatrix: number[][]): void { + const n = contexts.length; + + for (let i = 0; i < n; i++) { + this.nodeManager.createLeafNode(i, contexts[i]); + } + + for (let i = 0; i < linkageMatrix.length; i++) { + const [idx1, idx2, distance] = linkageMatrix[i]; + const newNodeId = n + i; + this.nodeManager.createInternalNode( + newNodeId, + Math.floor(idx1), + Math.floor(idx2), + distance + ); + } + } +} + +export function buildContextIndex( + contexts: (number[] | string[])[], + options: ContextIndexOptions = {} +): IndexResult { + const indexer = new ContextIndex(options); + const result = indexer.fitTransform(contexts); + + if (indexer._isStringInput) { + result.reorderedContexts = indexer._convertToStr(result.reorderedContexts as number[][]); + result.originalContexts = indexer._convertToStr(result.originalContexts as number[][]); + result.reorderedPrompts = result.reorderedContexts; + result.originalPrompts = result.originalContexts; + } + + return result; +} diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts new file mode 100644 index 0000000..702eebc --- /dev/null +++ b/openclaw-plugin/src/engine/inter-scheduler.ts @@ -0,0 +1,116 @@ +import type { ClusterNode } from './tree-nodes.js'; + +export interface ClusteringResult { + reorderedPrompts: number[][]; + originalPrompts: number[][]; + searchPaths: number[][]; +} + +export class InterContextScheduler { + scheduleContexts( + clusteringResult: ClusteringResult + ): [number[][], number[][], number[], Array<[number, number[]]>] { + const reorderedContexts = clusteringResult.reorderedPrompts; + const originalContexts = clusteringResult.originalPrompts; + const searchPaths = clusteringResult.searchPaths; + + const groupsByRoot = this._groupByRootPrefix(searchPaths); + const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths, reorderedContexts); + + const allGroupsWithInfo: Array<[number, number[]]> = []; + for (const groupIndices of sortedGroups) { + allGroupsWithInfo.push([0, groupIndices]); + } + + allGroupsWithInfo.sort((a, b) => { + const sizeDiff = b[1].length - a[1].length; + if (sizeDiff !== 0) { + return sizeDiff; + } + + const aFirst = a[1].length > 0 ? a[1][0] : Number.POSITIVE_INFINITY; + const bFirst = b[1].length > 0 ? b[1][0] : Number.POSITIVE_INFINITY; + return aFirst - bFirst; + }); + + const finalIndexMapping = allGroupsWithInfo.flatMap(([, group]) => group); + + const scheduledReordered = finalIndexMapping.map((idx) => reorderedContexts[idx]); + const scheduledOriginals = finalIndexMapping.map((idx) => originalContexts[idx]); + + return [scheduledReordered, scheduledOriginals, finalIndexMapping, allGroupsWithInfo]; + } + + _groupByRootPrefix(searchPaths: number[][]): Map { + const groups = new Map(); + + for (let contextIdx = 0; contextIdx < searchPaths.length; contextIdx += 1) { + const path = searchPaths[contextIdx]; + const groupKey = path.length >= 1 ? path[0] : -1; + + const existing = groups.get(groupKey); + if (existing) { + existing.push(contextIdx); + } else { + groups.set(groupKey, [contextIdx]); + } + } + + return groups; + } + + _sortGroupsByPathLength( + groupsByRoot: Map, + searchPaths: number[][], + contexts: number[][] + ): number[][] { + void contexts; + const sortedGroups: number[][] = []; + + for (const groupIndices of groupsByRoot.values()) { + const sortedGroup = [...groupIndices].sort((a, b) => { + const lengthDiff = searchPaths[b].length - searchPaths[a].length; + if (lengthDiff !== 0) { + return lengthDiff; + } + + const lexCompare = this._compareNumberArrays(searchPaths[a], searchPaths[b]); + if (lexCompare !== 0) { + return lexCompare; + } + + return a - b; + }); + + sortedGroups.push(sortedGroup); + } + + return sortedGroups; + } + + reorderPrompts( + clusteringResult: ClusteringResult + ): [number[][], number[][], number[], Array<[number, number[]]>] { + return this.scheduleContexts(clusteringResult); + } + + _reorderSinglePrompt( + promptIndex: number, + originalPrompt: number[], + uniqueNodes: Map + ): number[] { + void promptIndex; + void uniqueNodes; + return [...originalPrompt]; + } + + private _compareNumberArrays(a: number[], b: number[]): number { + const minLength = Math.min(a.length, b.length); + for (let i = 0; i < minLength; i += 1) { + if (a[i] !== b[i]) { + return a[i] - b[i]; + } + } + return a.length - b.length; + } +} diff --git a/openclaw-plugin/src/engine/intra-ordering.ts b/openclaw-plugin/src/engine/intra-ordering.ts new file mode 100644 index 0000000..0d2eed0 --- /dev/null +++ b/openclaw-plugin/src/engine/intra-ordering.ts @@ -0,0 +1,349 @@ +import type { ClusterNode } from './tree-nodes.js'; + +export class IntraContextOrderer { + reorderContexts(originalContexts: number[][], uniqueNodes: Map): number[][] { + let rootNode: ClusterNode | null = null; + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + rootNode = node; + break; + } + } + + if (!rootNode) { + return originalContexts; + } + + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.size > 0) { + const firstIdx = Math.min(...node.originalIndices); + if (firstIdx < originalContexts.length) { + this._setNodeDocs(node, [...originalContexts[firstIdx]]); + } + } + } + + const queue: number[] = [rootNode.nodeId]; + const visited = new Set(); + + while (queue.length > 0) { + const nodeId = queue.shift(); + if (nodeId === undefined || visited.has(nodeId) || !uniqueNodes.has(nodeId)) { + continue; + } + + visited.add(nodeId); + const node = uniqueNodes.get(nodeId); + if (!node) { + continue; + } + + if (!node.isRoot && node.parent !== null) { + const parentNode = uniqueNodes.get(node.parent); + if (parentNode) { + const parentDocs = this._getNodeDocs(parentNode); + const nodeDocs = this._getNodeDocs(node); + if (parentDocs.length > 0 && nodeDocs.length > 0) { + this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs)); + } + } + } + + for (const childId of node.children) { + if (uniqueNodes.has(childId)) { + queue.push(childId); + } + } + } + + const reorderedContexts: number[][] = []; + for (let i = 0; i < originalContexts.length; i += 1) { + const leafNode = this._findLeafNode(i, uniqueNodes); + if (leafNode) { + const leafDocs = this._getNodeDocs(leafNode); + if (leafDocs.length > 0) { + reorderedContexts.push(leafDocs); + continue; + } + } + + reorderedContexts.push([...originalContexts[i]]); + } + + return reorderedContexts; + } + + _updateTreeAndReorderNodes(uniqueNodes: Map, reorderedContexts: number[][]): void { + let rootNode: ClusterNode | null = null; + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + rootNode = node; + break; + } + } + + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.size > 0) { + const firstIdx = Math.min(...node.originalIndices); + if (firstIdx < reorderedContexts.length) { + this._setNodeDocs(node, [...reorderedContexts[firstIdx]]); + } + } + } + + if (!rootNode) { + return; + } + + const queue: Array<[number, boolean]> = []; + for (const childId of rootNode.children) { + if (uniqueNodes.has(childId)) { + queue.push([childId, true]); + } + } + + while (queue.length > 0) { + const item = queue.shift(); + if (!item) { + continue; + } + + const [nodeId, isChildOfRoot] = item; + const node = uniqueNodes.get(nodeId); + if (!node) { + continue; + } + + if (!isChildOfRoot && node.parent !== null) { + const parentNode = uniqueNodes.get(node.parent); + if (parentNode) { + const parentDocs = this._getNodeDocs(parentNode); + const nodeDocs = this._getNodeDocs(node); + if (parentDocs.length > 0 && nodeDocs.length > 0) { + this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs)); + } + } + } + + for (const childId of node.children) { + if (uniqueNodes.has(childId)) { + queue.push([childId, false]); + } + } + } + } + + _reorderWithParentPrefix(nodeDocs: number[], parentDocs: number[]): number[] { + if (parentDocs.length === 0) { + return nodeDocs; + } + + const result = [...parentDocs]; + const parentSet = new Set(parentDocs); + + for (const doc of nodeDocs) { + if (!parentSet.has(doc)) { + result.push(doc); + } + } + + return result; + } + + _reorderContextWithTreePrefix( + contextIndex: number, + originalContext: number[], + uniqueNodes: Map + ): number[] { + const leafNode = this._findLeafNode(contextIndex, uniqueNodes); + if (!leafNode) { + return [...originalContext]; + } + + const prefixDocs: number[] = []; + const visited = new Set(); + let currentNode: ClusterNode | undefined = leafNode; + + const ancestors: ClusterNode[] = []; + while (currentNode && !currentNode.isRoot) { + if (visited.has(currentNode.nodeId)) { + break; + } + + visited.add(currentNode.nodeId); + ancestors.push(currentNode); + + if (currentNode.parent !== null && uniqueNodes.has(currentNode.parent)) { + currentNode = uniqueNodes.get(currentNode.parent); + } else { + break; + } + } + + ancestors.reverse(); + + const seenDocs = new Set(); + for (const ancestor of ancestors) { + const ancestorDocs = this._getNodeDocs(ancestor); + for (const doc of ancestorDocs) { + if (!seenDocs.has(doc)) { + prefixDocs.push(doc); + seenDocs.add(doc); + } + } + } + + const result = [...prefixDocs]; + for (const doc of originalContext) { + if (!seenDocs.has(doc)) { + result.push(doc); + seenDocs.add(doc); + } + } + + return result; + } + + extractSearchPaths(uniqueNodes: Map, numContexts: number): number[][] { + const searchPaths: number[][] = Array.from({ length: numContexts }, () => []); + + const contextToLeaf = new Map(); + for (const [nodeId, node] of uniqueNodes.entries()) { + if (!node.isLeaf) { + continue; + } + + for (const origIdx of node.originalIndices) { + contextToLeaf.set(origIdx, nodeId); + } + } + + for (let contextIdx = 0; contextIdx < numContexts; contextIdx += 1) { + const leafId = contextToLeaf.get(contextIdx); + if (leafId === undefined) { + searchPaths[contextIdx] = []; + continue; + } + + const childIndices: number[] = []; + let currentId: number | null = leafId; + const visited = new Set(); + + while (currentId !== null) { + if (visited.has(currentId)) { + break; + } + visited.add(currentId); + + const currentNode = uniqueNodes.get(currentId); + if (!currentNode) { + break; + } + + if (currentNode.parent !== null) { + const parentNode = uniqueNodes.get(currentNode.parent); + if (parentNode) { + const childIndex = parentNode.children.indexOf(currentId); + if (childIndex !== -1) { + childIndices.push(childIndex); + } + } + } + + currentId = currentNode.parent; + } + + searchPaths[contextIdx] = [...childIndices].reverse(); + } + + return searchPaths; + } + + _reorderSingleContext( + contextIndex: number, + originalContext: number[], + uniqueNodes: Map + ): number[] { + const originalSet = new Set(originalContext); + + const leafNode = this._findLeafNode(contextIndex, uniqueNodes); + if (!leafNode) { + return [...originalContext]; + } + + if (leafNode.isRoot) { + return Array.from(leafNode.content).sort((a, b) => a - b); + } + + if (leafNode.frequency > 1) { + const prefixContent = leafNode.content; + const prefixList = Array.from(prefixContent).sort((a, b) => a - b); + const remainingList = Array.from(originalSet) + .filter((value) => !prefixContent.has(value)) + .sort((a, b) => a - b); + return [...prefixList, ...remainingList]; + } + + const bestNode = this._findBestAncestor(leafNode, uniqueNodes); + if (!bestNode) { + return [...originalContext]; + } + + const prefixContent = bestNode.content; + const prefixList = Array.from(prefixContent).sort((a, b) => a - b); + const remainingList = Array.from(originalSet) + .filter((value) => !prefixContent.has(value)) + .sort((a, b) => a - b); + return [...prefixList, ...remainingList]; + } + + _findLeafNode(contextIndex: number, uniqueNodes: Map): ClusterNode | null { + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.has(contextIndex)) { + return node; + } + } + + return null; + } + + _findBestAncestor(startNode: ClusterNode, uniqueNodes: Map): ClusterNode | null { + let currentNode: ClusterNode = startNode; + + while (currentNode.parent !== null) { + const parentId = currentNode.parent; + const parentNode = uniqueNodes.get(parentId); + if (!parentNode) { + return null; + } + + if (parentNode.frequency > 1 && !parentNode.isEmpty) { + return parentNode; + } + + currentNode = parentNode; + } + + return null; + } + + reorderPrompts(originalPrompts: number[][], uniqueNodes: Map): number[][] { + return this.reorderContexts(originalPrompts, uniqueNodes); + } + + _reorderSinglePrompt( + promptIndex: number, + originalPrompt: number[], + uniqueNodes: Map + ): number[] { + return this._reorderSingleContext(promptIndex, originalPrompt, uniqueNodes); + } + + private _getNodeDocs(node: ClusterNode): number[] { + return Array.from(node.content); + } + + private _setNodeDocs(node: ClusterNode, docs: number[]): void { + node.content = new Set(docs); + } +} diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts new file mode 100644 index 0000000..777b1e8 --- /dev/null +++ b/openclaw-plugin/src/engine/live-index.ts @@ -0,0 +1,1232 @@ +import { ContextIndex, IndexResult } from './index-construction.js'; +import { ClusterNode, NodeManager } from './tree-nodes.js'; +import { NodeMetadata } from './metadata.js'; +import { InterContextScheduler } from './inter-scheduler.js'; +import { IntraContextOrderer } from './intra-ordering.js'; +import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js'; +import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js'; +import { EvictionHeap } from './eviction-heap.js'; +import crypto from 'crypto'; + +export function computePrefixLength(list1: number[], list2: number[]): number { + let length = 0; + const minLen = Math.min(list1.length, list2.length); + for (let i = 0; i < minLen; i++) { + if (list1[i] === list2[i]) { + length++; + } else { + break; + } + } + return length; +} + +export class ContextPilot extends ContextIndex { + metadata: Map = new Map(); + interScheduler = new InterContextScheduler(); + + protected _requestToNode: Map = new Map(); + protected _nextRequestCounter: number = 0; + + protected _conversations: Map; turnCount: number }> = new Map(); + protected _hasExplicitConversation: boolean = false; + + isLive: boolean = false; + initialResult: any = null; + scheduledResult: any = null; + + nodes: Map = new Map(); + rootId: number | null = null; + nextNodeId: number = 0; + + liveStats = { + totalSearches: 0, + totalInsertions: 0, + totalEvictions: 0, + totalSearchTimeUs: 0, + totalTraversalTimeUs: 0, + totalRemovals: 0 + }; + + static readonly _DEFAULT_CONVERSATION = "_default"; + + constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: string = "average", batchSize: number = 10000) { + super(alpha, useGpu, linkageMethod, batchSize); + } + + getAllRequestIds(): Set { + return new Set(this._requestToNode.keys()); + } + + reset(): void { + this.metadata.clear(); + this._requestToNode.clear(); + this._nextRequestCounter = 0; + this.isLive = false; + this.initialResult = null; + this.scheduledResult = null; + this.nodes.clear(); + this.rootId = null; + this.nextNodeId = 0; + this.liveStats = { + totalSearches: 0, + totalInsertions: 0, + totalEvictions: 0, + totalSearchTimeUs: 0, + totalTraversalTimeUs: 0, + totalRemovals: 0 + }; + } + + buildAndSchedule(contexts: number[][], initialTokensPerContext: number = 0): any { + this.initialResult = this.fitTransform(contexts); + + const [scheduledReordered, scheduledOriginals, finalMapping, groups] = + this.interScheduler.scheduleContexts(this.initialResult); + + this.scheduledResult = { + reordered_contexts: scheduledReordered, + original_indices: finalMapping, + scheduled_originals: scheduledOriginals, + groups: groups, + clustering_result: this.initialResult + }; + + const [requestIdMapping, requestIdsOrdered] = this._initializeLiveMetadata( + initialTokensPerContext, + contexts.length + ); + + this.scheduledResult['request_id_mapping'] = requestIdMapping; + this.scheduledResult['request_ids'] = requestIdsOrdered; + + this.isLive = true; + + return this.scheduledResult; + } + + reorder(contexts: any, initialTokensPerContext: number = 0, conversationId?: string): [any[], number[]] { + if (contexts && !Array.isArray(contexts[0])) { + contexts = [contexts]; + } + + const result = this.buildIncremental(contexts, initialTokensPerContext); + const reordered = result.reordered_contexts; + + const cid = conversationId || ContextPilot._DEFAULT_CONVERSATION; + if (conversationId !== undefined && conversationId !== null) { + this._hasExplicitConversation = true; + } + + let conv = this._conversations.get(cid); + if (!conv) { + conv = { seenDocs: new Set(), turnCount: 0 }; + this._conversations.set(cid, conv); + } + + for (const ctx of reordered) { + for (const doc of ctx) { + conv.seenDocs.add(doc); + } + } + conv.turnCount += 1; + + return [reordered, result.original_indices]; + } + + optimize(docs: string[], query: string, conversationId?: string, systemInstruction?: string): any[] { + const [reordered, _indices] = this.reorder(docs, 0, conversationId); + const reorderedDocs = reordered[0]; + + const systemContent = [systemInstruction, ...reorderedDocs].filter(Boolean).join("\n\n"); + + return [ + { role: "system", content: systemContent }, + { role: "user", content: query } + ]; + } + + optimizeBatch(allDocs: string[][], allQueries: string[], systemInstruction?: string): [any[][], number[]] { + if (allDocs.length !== allQueries.length) { + throw new Error(`all_docs (${allDocs.length}) and all_queries (${allQueries.length}) must have the same length.`); + } + + const [reorderedContexts, order] = this.reorder(allDocs); + const messagesBatch: any[][] = []; + + for (let i = 0; i < reorderedContexts.length; i++) { + const ctx = reorderedContexts[i]; + const origIdx = order[i]; + + const systemContent = [systemInstruction, ...ctx].filter(Boolean).join("\n\n"); + messagesBatch.push([ + { role: "system", content: systemContent }, + { role: "user", content: allQueries[origIdx] } + ]); + } + + return [messagesBatch, order]; + } + + deduplicate(contexts: any[][], conversationId: string, hintTemplate?: string): any[] { + if (!conversationId) { + throw new Error("conversation_id is required for .deduplicate()."); + } + + const template = hintTemplate || "Please refer to [Doc {doc_id}] from the previous conversation."; + + if (!this._conversations.has(conversationId)) { + throw new Error(`No prior .reorder() call found for conversation_id='${conversationId}'.`); + } + + const conv = this._conversations.get(conversationId)!; + const seen = conv.seenDocs; + const results: any[] = []; + + for (const ctx of contexts) { + const overlapping = ctx.filter(d => seen.has(d)); + const newDocs = ctx.filter(d => !seen.has(d)); + const hints = overlapping.map(d => template.replace("{doc_id}", String(d))); + + results.push({ + new_docs: newDocs, + overlapping_docs: overlapping, + reference_hints: hints, + deduplicated_docs: newDocs + }); + + for (const d of ctx) { + seen.add(d); + } + } + + conv.turnCount += 1; + return results; + } + + buildIncremental(contexts: any[][], initialTokensPerContext: number = 0): any { + // @ts-ignore - Assuming inherited from ContextIndex + const convertedContexts = this._convertToInt ? this._convertToInt(contexts) : contexts; + + if (!this.isLive) { + const result = this.buildAndSchedule(convertedContexts, initialTokensPerContext); + const reordered = result.reordered_contexts || convertedContexts; + // @ts-ignore + const stringReordered = this._convertToStr ? this._convertToStr(reordered) : reordered; + + return { + request_ids: result.request_ids || [], + reordered_contexts: stringReordered, + matched_count: 0, + inserted_count: convertedContexts.length, + merged_count: 0, + original_indices: result.original_indices || Array.from({ length: convertedContexts.length }, (_, i) => i), + groups: result.groups || [] + }; + } + + const matchedContexts: any[] = []; + const unmatchedContexts: any[] = []; + + const searchResults = this.searchBatch(convertedContexts); + + for (let i = 0; i < convertedContexts.length; i++) { + const context = convertedContexts[i]; + let [searchPath, matchedNodeId, overlapCount, hasPrefix] = searchResults[i]; + + if (overlapCount > 0 && matchedNodeId >= 0 && matchedNodeId !== this.rootId) { + const matchedNode = this.nodes.get(matchedNodeId); + let nodeDocs: number[] | null = null; + + if (this.metadata.has(matchedNodeId) && this.metadata.get(matchedNodeId)!.docIds) { + nodeDocs = this.metadata.get(matchedNodeId)!.docIds as number[]; + } else if (matchedNode && matchedNode.docIds) { + nodeDocs = matchedNode.docIds as number[]; + } + + let reordered = context; + if (nodeDocs) { + reordered = this._reorderWithPrefix(context, nodeDocs); + } else { + hasPrefix = true; + } + + matchedContexts.push([i, reordered, searchPath, hasPrefix]); + } else { + unmatchedContexts.push([i, context]); + } + } + + const requestIds: (string | null)[] = new Array(convertedContexts.length).fill(null); + const reorderedContexts: any[] = new Array(convertedContexts.length).fill(null); + const contextInfo: any[] = []; + + for (const [origIdx, reordered, searchPath, hasPrefix] of matchedContexts) { + const matchedNode = this.traverse(searchPath); + let newNodeId: number, newSearchPath: number[], requestId: string; + + if (hasPrefix && matchedNode && matchedNode.isLeaf) { + [newNodeId, newSearchPath, requestId] = this._splitLeafAndInsert( + reordered, matchedNode, searchPath, initialTokensPerContext + ); + } else if (hasPrefix) { + [newNodeId, newSearchPath, requestId] = this.insert( + reordered, searchPath, initialTokensPerContext + ); + } else { + const insertPath = searchPath.length > 0 ? searchPath.slice(0, -1) : searchPath; + [newNodeId, newSearchPath, requestId] = this.insert( + reordered, insertPath, initialTokensPerContext + ); + } + + requestIds[origIdx] = requestId; + reorderedContexts[origIdx] = reordered; + contextInfo.push([origIdx, requestId, newSearchPath]); + } + + let mergedCount = 0; + if (unmatchedContexts.length > 0) { + const unmatchedOnly = unmatchedContexts.map(x => x[1]); + + const tempIndex = new ContextPilot( + this.alpha, + // @ts-ignore + this.useGpu, + // @ts-ignore + this.linkageMethod, + // @ts-ignore + this.batchSize + ); + + const tempResult = tempIndex.fitTransform(unmatchedOnly); + + const [mergedRequestIds, mergedSearchPaths] = this._mergeIndex( + tempResult, + unmatchedContexts, + initialTokensPerContext + ); + + for (let i = 0; i < unmatchedContexts.length; i++) { + const [origIdx, origContext] = unmatchedContexts[i]; + requestIds[origIdx] = mergedRequestIds[i]; + + if (tempResult.reordered_contexts && i < tempResult.reordered_contexts.length) { + reorderedContexts[origIdx] = tempResult.reordered_contexts[i]; + } else { + reorderedContexts[origIdx] = origContext; + } + + contextInfo.push([origIdx, mergedRequestIds[i], mergedSearchPaths[i]]); + } + + mergedCount = unmatchedContexts.length; + } + + const scheduledOrder = this._scheduleIncremental(contextInfo); + const groups = this._groupByPathPrefix(contextInfo); + + // @ts-ignore + const finalReorderedStr = this._convertToStr ? this._convertToStr(reorderedContexts) : reorderedContexts; + + return { + request_ids: requestIds, + reordered_contexts: finalReorderedStr, + matched_count: matchedContexts.length, + inserted_count: convertedContexts.length, + merged_count: mergedCount, + original_indices: scheduledOrder, + groups: groups + }; + } + + _reorderWithPrefix(context: number[], prefix: number[]): number[] { + const contextSet = new Set(context); + const result: number[] = []; + const prefixUsed = new Set(); + + for (const elem of prefix) { + if (contextSet.has(elem) && !prefixUsed.has(elem)) { + result.push(elem); + prefixUsed.add(elem); + } + } + + for (const elem of context) { + if (!prefixUsed.has(elem)) { + result.push(elem); + } + } + + return result; + } + + _mergeIndex(tempResult: any, unmatchedInfo: any[], initialTokens: number): [string[], number[][]] { + const requestIds: string[] = []; + const searchPaths: number[][] = []; + + const uniqueNodes = tempResult.unique_nodes || tempResult.uniqueNodes; + let tempRoot: any = null; + + if (uniqueNodes) { + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + tempRoot = node; + break; + } + } + } + + const fallbackInsert = () => { + for (const [origIdx, context] of unmatchedInfo) { + const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens); + requestIds.push(reqId); + searchPaths.push(newPath); + } + }; + + if (!tempRoot || this.rootId === null) { + fallbackInsert(); + return [requestIds, searchPaths]; + } + + const globalRoot = this.nodes.get(this.rootId); + if (!globalRoot) { + fallbackInsert(); + return [requestIds, searchPaths]; + } + + const nodeIdMap = new Map(); + const baseChildIdx = globalRoot.children.length; + + for (let childIdx = 0; childIdx < tempRoot.children.length; childIdx++) { + const tempChildId = tempRoot.children[childIdx]; + const newChildIdx = baseChildIdx + childIdx; + this._copySubtree( + uniqueNodes, + tempChildId, + this.rootId, + nodeIdMap, + initialTokens, + [newChildIdx] + ); + } + + for (let i = 0; i < unmatchedInfo.length; i++) { + const [origIdx, context] = unmatchedInfo[i]; + let tempLeafId: number | null = null; + + for (const [nodeId, node] of uniqueNodes.entries()) { + if (node.isLeaf && node.originalIndices && node.originalIndices.has(i)) { + tempLeafId = nodeId; + break; + } + } + + if (tempLeafId !== null && nodeIdMap.has(tempLeafId)) { + const newNodeId = nodeIdMap.get(tempLeafId)!; + if (this.metadata.has(newNodeId)) { + const meta = this.metadata.get(newNodeId)!; + requestIds.push(meta.requestId!); + searchPaths.push(meta.searchPath); + continue; + } + } + + const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens); + requestIds.push(reqId); + searchPaths.push(newPath); + } + + return [requestIds, searchPaths]; + } + + _copySubtree(sourceNodes: Map, sourceNodeId: number, parentId: number, + nodeIdMap: Map, initialTokens: number, searchPath: number[]): void { + const sourceNode = sourceNodes.get(sourceNodeId); + if (!sourceNode) return; + + const newNodeId = this.nextNodeId++; + const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []); + const originalIndices = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set(); + + const newNode = new ClusterNode( + newNodeId, + content, + [], + parentId, + originalIndices + ); + + if (sourceNode.docIds) { + newNode.docIds = [...sourceNode.docIds]; + } + + this.nodes.set(newNodeId, newNode); + nodeIdMap.set(sourceNodeId, newNodeId); + + const parentNode = this.nodes.get(parentId); + if (parentNode) { + parentNode.addChild(newNodeId); + } + + const isLeaf = sourceNode.isLeaf || sourceNode.is_leaf; + const requestId = isLeaf ? `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}` : null; + + const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0; + + const metadata = new NodeMetadata( + newNodeId, + isLeaf ? initialTokens : 0, + isLeaf ? Math.max(0, initialTokens - parentTokens) : 0, + searchPath, + sourceNode.docIds ? [...sourceNode.docIds] : null, + isLeaf, + requestId + ); + + this.metadata.set(newNodeId, metadata); + + if (isLeaf && requestId) { + this._requestToNode.set(requestId, newNodeId); + } + + if (sourceNode.children) { + for (let childIdx = 0; childIdx < sourceNode.children.length; childIdx++) { + const childId = sourceNode.children[childIdx]; + const childSearchPath = [...searchPath, childIdx]; + this._copySubtree( + sourceNodes, childId, newNodeId, + nodeIdMap, initialTokens, childSearchPath + ); + } + } + } + + _scheduleIncremental(contextInfo: any[]): number[] { + const groups = new Map(); + + for (const [ctxIdx, reqId, path] of contextInfo) { + const groupKey = path && path.length > 0 ? path[0] : -1; + if (!groups.has(groupKey)) { + groups.set(groupKey, []); + } + groups.get(groupKey)!.push({ ctxIdx, len: path ? path.length : 0 }); + } + + const scheduled: number[] = []; + const sortedKeys = Array.from(groups.keys()).sort((a, b) => a - b); + + for (const groupKey of sortedKeys) { + const items = groups.get(groupKey)!; + items.sort((a, b) => b.len - a.len); + scheduled.push(...items.map(item => item.ctxIdx)); + } + + return scheduled; + } + + _groupByPathPrefix(contextInfo: any[]): [number, number[]][] { + const groups = new Map(); + + for (const [ctxIdx, reqId, path] of contextInfo) { + const groupKey = path && path.length > 0 ? path[0] : -1; + if (!groups.has(groupKey)) { + groups.set(groupKey, []); + } + groups.get(groupKey)!.push(ctxIdx); + } + + const result: [number, number[]][] = []; + for (const [groupKey, indices] of groups.entries()) { + result.push([indices.length, indices]); + } + + result.sort((a, b) => b[0] - a[0]); + return result; + } + + scheduleOnly(contexts: number[][]): any { + const result = this.fitTransform(contexts); + + const [scheduledReordered, scheduledOriginals, finalMapping, groups] = + this.interScheduler.scheduleContexts(result); + + return { + reordered_contexts: scheduledReordered, + original_indices: finalMapping, + scheduled_originals: scheduledOriginals, + groups: groups, + stats: { + total_nodes: result.stats?.total_nodes || result.stats?.totalNodes, + leaf_nodes: result.stats?.leaf_nodes || result.stats?.leafNodes, + num_contexts: contexts.length, + num_groups: groups.length + } + }; + } + + _initializeLiveMetadata(initialTokensPerContext: number, numInputContexts?: number): [Record, (string | null)[]] { + if (!this.initialResult) { + throw new Error("Must call fitTransform() before initializing metadata"); + } + + const uniqueNodes = this.initialResult.unique_nodes || this.initialResult.uniqueNodes; + const reorderedContexts = this.initialResult.reordered_contexts || this.initialResult.reorderedContexts; + const requestIdMapping: Record = {}; + + this.nodes = uniqueNodes; + + for (const [nodeId, node] of uniqueNodes.entries()) { + if (node.isRoot || node.is_root) { + this.rootId = nodeId; + break; + } + } + + this.nextNodeId = uniqueNodes.size > 0 ? Math.max(...Array.from(uniqueNodes.keys())) + 1 : 0; + let leafCounter = 0; + const originalIndexToRequestId = new Map(); + + for (const [nodeId, node] of uniqueNodes.entries()) { + const searchPath = this._computeSearchPath(nodeId); + const isLeaf = node.isLeaf || node.is_leaf; + + let totalTokens = 0; + let requestId: string | null = null; + + if (isLeaf) { + totalTokens = initialTokensPerContext; + requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + leafCounter++; + + if (node.originalIndices || node.original_indices) { + const indices = node.originalIndices || node.original_indices; + for (const origIdx of indices) { + originalIndexToRequestId.set(origIdx, requestId); + } + } + } + + let parentTokens = 0; + if (node.parent !== null && this.metadata.has(node.parent)) { + parentTokens = this.metadata.get(node.parent)!.totalTokens; + } + const extraTokens = Math.max(0, totalTokens - parentTokens); + + let leafDocIds: number[] | null = null; + if (isLeaf && (node.originalIndices || node.original_indices)) { + const indices = Array.from((node.originalIndices || node.original_indices) as Set); + if (indices.length > 0) { + const firstOrigIdx = Math.min(...indices); + if (reorderedContexts && firstOrigIdx < reorderedContexts.length) { + leafDocIds = reorderedContexts[firstOrigIdx]; + } else { + leafDocIds = node.docIds || node.doc_ids; + } + } + } else { + leafDocIds = node.docIds || node.doc_ids; + } + + const metadata = new NodeMetadata( + nodeId, + totalTokens, + extraTokens, + searchPath, + leafDocIds, + isLeaf, + requestId + ); + + this.metadata.set(nodeId, metadata); + + if (isLeaf && requestId) { + this._requestToNode.set(requestId, nodeId); + requestIdMapping[requestId] = nodeId; + } + } + + this.nextNodeId = this.nodes.size > 0 ? Math.max(...Array.from(this.nodes.keys())) + 1 : 0; + this._nextRequestCounter = leafCounter; + + const numContexts = numInputContexts !== undefined ? numInputContexts : originalIndexToRequestId.size; + const requestIdsOrdered: (string | null)[] = []; + + for (let i = 0; i < numContexts; i++) { + requestIdsOrdered.push(originalIndexToRequestId.get(i) || null); + } + + return [requestIdMapping, requestIdsOrdered]; + } + + trackRequest(requestId: string): void { + if (!this._requestToNode.has(requestId)) { + this._requestToNode.set(requestId, null); + } + } + + removeRequests(requestIds: Set): any { + const evictedNodes: number[] = []; + const notFound: string[] = []; + + for (const requestId of requestIds) { + if (!this._requestToNode.has(requestId)) { + notFound.push(requestId); + continue; + } + + const nodeId = this._requestToNode.get(requestId); + this._requestToNode.delete(requestId); + + if (nodeId !== null && nodeId !== undefined) { + evictedNodes.push(nodeId); + this._removeNodeAndPrune(nodeId); + } + } + + this.liveStats.totalEvictions += evictedNodes.length; + + const arrayReqs = Array.from(requestIds); + return { + removed_count: evictedNodes.length, + evicted_node_ids: evictedNodes, + evicted_request_ids: arrayReqs.filter(id => !notFound.includes(id)), + not_found: notFound, + nodes_remaining: this.nodes.size, + requests_remaining: this._requestToNode.size + }; + } + + removeRequestById(requestId: string): boolean { + const result = this.removeRequests(new Set([requestId])); + return result.evicted_node_ids.length > 0; + } + + getRequestNode(requestId: string): number | null { + return this._requestToNode.get(requestId) ?? null; + } + + _collectAllNodeDocs(): [number[], number[][], Record] { + const nodeIds: number[] = []; + const nodeDocsList: number[][] = []; + const nodeIdToPath: Record = {}; + + if (this.rootId === null) return [nodeIds, nodeDocsList, nodeIdToPath]; + + const queue: [number, number[]][] = [[this.rootId, []]]; + + while (queue.length > 0) { + const [nodeId, path] = queue.shift()!; + + if (!this.nodes.has(nodeId)) continue; + + const node = this.nodes.get(nodeId)!; + const nodeMeta = this.metadata.get(nodeId); + + let docs: number[] | null = null; + if (nodeMeta && nodeMeta.docIds) { + docs = nodeMeta.docIds; + } else if (node.docIds) { + docs = node.docIds; + } + + if (docs) { + nodeIds.push(nodeId); + nodeDocsList.push(docs); + nodeIdToPath[nodeId] = path; + } + + if (!node.isLeaf && node.children) { + for (let idx = 0; idx < node.children.length; idx++) { + queue.push([node.children[idx], [...path, idx]]); + } + } + } + + return [nodeIds, nodeDocsList, nodeIdToPath]; + } + + _getNodeDocs(nodeId: number): number[] | null { + const meta = this.metadata.get(nodeId); + if (meta && meta.docIds) return meta.docIds; + const node = this.nodes.get(nodeId); + if (node && node.docIds) return node.docIds; + return null; + } + + _searchSingleHierarchical(context: number[]): [number[], number, number, boolean] { + const contextSet = new Set(context); + let currentId = this.rootId; + let currentPath: number[] = []; + + while (true) { + if (currentId === null) return [[], -1, 0, false]; + const currentNode = this.nodes.get(currentId); + + if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) { + const docs = this._getNodeDocs(currentId); + if (docs && currentId !== this.rootId) { + const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false; + return [currentPath, currentId, overlap, hasPrefix]; + } + return [[], -1, 0, false]; + } + + const childIds: number[] = []; + const childDocsList: number[][] = []; + const childIndices: number[] = []; + + for (let idx = 0; idx < currentNode.children.length; idx++) { + const childId = currentNode.children[idx]; + const docs = this._getNodeDocs(childId); + if (docs) { + childIds.push(childId); + childDocsList.push(docs); + childIndices.push(idx); + } + } + + if (childIds.length === 0) return [[], -1, 0, false]; + + const distances = computeDistancesBatch([context], childDocsList, this.alpha); + + let bestJ = -1; + let bestDistance = Infinity; + let bestOverlap = 0; + + for (let j = 0; j < childIds.length; j++) { + const docs = childDocsList[j]; + const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + if (overlap === 0) continue; + + const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j]; + + if (dist < bestDistance) { + bestDistance = dist; + bestOverlap = overlap; + bestJ = j; + } + } + + if (bestJ < 0) { + if (currentId !== this.rootId) { + const docs = this._getNodeDocs(currentId); + if (docs) { + const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + return [currentPath, currentId, overlap, true]; + } + } + return [[], -1, 0, false]; + } + + const bestChildId = childIds[bestJ]; + const bestChildIdx = childIndices[bestJ]; + const bestDocs = childDocsList[bestJ]; + const childPath = [...currentPath, bestChildIdx]; + + if (contextSet.has(bestDocs[0])) { + const bestChildNode = this.nodes.get(bestChildId); + if (bestChildNode && !bestChildNode.isLeaf && bestChildNode.children && bestChildNode.children.length > 0) { + currentId = bestChildId; + currentPath = childPath; + continue; + } else { + return [childPath, bestChildId, bestOverlap, true]; + } + } else { + return [childPath, bestChildId, bestOverlap, false]; + } + } + } + + searchBatch(contexts: number[][]): [number[], number, number, boolean][] { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + if (this.rootId === null || contexts.length === 0) { + return contexts.map(() => [[], -1, 0, false]); + } + + const results = contexts.map(ctx => this._searchSingleHierarchical(ctx)); + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + const elapsedUs = (endTime - startTime) * 1000; + + this.liveStats.totalSearches += contexts.length; + this.liveStats.totalSearchTimeUs += elapsedUs; + + return results; + } + + search(context: number[], updateAccess: boolean = true): [number[], number, number, boolean] { + const results = this.searchBatch([context]); + const [searchPath, nodeId, overlap, hasPrefix] = results[0]; + + if (updateAccess && nodeId >= 0 && this.metadata.has(nodeId)) { + this.metadata.get(nodeId)!.updateAccessTime(); + } + + return [searchPath, nodeId, overlap, hasPrefix]; + } + + traverse(searchPath: number[]): ClusterNode | null { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + if (this.rootId === null) return null; + + let currentId = this.rootId; + + for (const childIdx of searchPath) { + if (!this.nodes.has(currentId)) return null; + + const currentNode = this.nodes.get(currentId)!; + + if (!currentNode.children || childIdx >= currentNode.children.length) { + return null; + } + + currentId = currentNode.children[childIdx]; + } + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + const elapsedUs = (endTime - startTime) * 1000; + this.liveStats.totalTraversalTimeUs += elapsedUs; + + return this.nodes.get(currentId) || null; + } + + insert(context: number[], searchPath: number[], totalTokens: number = 0): [number, number[], string] { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + let matchedNode = this.traverse(searchPath); + + if (!matchedNode) { + matchedNode = this.nodes.get(this.rootId!)!; + searchPath = []; + } + + let newNodeId: number, newSearchPath: number[], requestId: string; + + if (matchedNode.isLeaf) { + [newNodeId, newSearchPath, requestId] = this._insertAtLeaf( + context, matchedNode, searchPath, totalTokens + ); + } else { + [newNodeId, newSearchPath, requestId] = this._insertAtInternal( + context, matchedNode, searchPath, totalTokens + ); + } + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + this.liveStats.totalInsertions += 1; + + return [newNodeId, newSearchPath, requestId]; + } + + _insertAtInternal(context: number[], parentNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + + const newNodeId = this.nextNodeId++; + const newNode = new ClusterNode( + newNodeId, + context, + [], + parentNode.nodeId, + new Set([newNodeId]) + ); + + this.nodes.set(newNodeId, newNode); + parentNode.addChild(newNodeId); + + const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; + const newSearchPath = [...searchPath, parentNode.children.length - 1]; + + const metadata = new NodeMetadata( + newNodeId, + totalTokens, + Math.max(0, totalTokens - parentTokens), + newSearchPath, + context, + true, + requestId + ); + + this.metadata.set(newNodeId, metadata); + this._requestToNode.set(requestId, newNodeId); + + return [newNodeId, newSearchPath, requestId]; + } + + _insertAtLeaf(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + + let parentNode: ClusterNode; + let parentSearchPath: number[]; + + if (leafNode.parent === null) { + parentNode = this.nodes.get(this.rootId!)!; + parentSearchPath = []; + } else { + parentNode = this.nodes.get(leafNode.parent)!; + parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : []; + } + + const newLeafId = this.nextNodeId++; + const newLeaf = new ClusterNode( + newLeafId, + context, + [], + parentNode.nodeId, + new Set([newLeafId]) + ); + + this.nodes.set(newLeafId, newLeaf); + parentNode.addChild(newLeafId); + + const newSearchPath = [...parentSearchPath, parentNode.children.length - 1]; + const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; + + const newMetadata = new NodeMetadata( + newLeafId, + totalTokens, + Math.max(0, totalTokens - parentTokens), + newSearchPath, + context, + true, + requestId + ); + + this.metadata.set(newLeafId, newMetadata); + this._requestToNode.set(requestId, newLeafId); + + return [newLeafId, newSearchPath, requestId]; + } + + _splitLeafAndInsert(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const matchedDocs = this._getNodeDocs(leafNode.nodeId); + + if (!matchedDocs) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + const sharedPrefix: number[] = []; + for (let i = 0; i < Math.min(matchedDocs.length, context.length); i++) { + if (matchedDocs[i] === context[i]) { + sharedPrefix.push(matchedDocs[i]); + } else { + break; + } + } + + if (sharedPrefix.length === 0) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + if (sharedPrefix.length === matchedDocs.length && new Set(matchedDocs).size === new Set(context).size && + [...new Set(matchedDocs)].every(d => new Set(context).has(d))) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + let parentId = leafNode.parent; + if (parentId === null) { + parentId = this.rootId!; + } + const parentNode = this.nodes.get(parentId)!; + const parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : []; + + const leafChildIdx = parentNode.children.indexOf(leafNode.nodeId); + + const newInternalId = this.nextNodeId++; + const allContent = new Set([...leafNode.content, ...context]); + + const newInternal = new ClusterNode( + newInternalId, + Array.from(allContent), + [leafNode.nodeId], + parentId, + new Set() + ); + newInternal.docIds = [...sharedPrefix]; + + this.nodes.set(newInternalId, newInternal); + + parentNode.children[leafChildIdx] = newInternalId; + leafNode.parent = newInternalId; + + const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0; + const leafMeta = this.metadata.get(leafNode.nodeId); + const leafTotal = leafMeta ? leafMeta.totalTokens : 0; + + let internalTokens = parentTokens; + if (matchedDocs && matchedDocs.length > 0) { + const prefixRatio = sharedPrefix.length / matchedDocs.length; + internalTokens = Math.floor(parentTokens + (leafTotal - parentTokens) * prefixRatio); + } + + const internalPath = [...parentSearchPath, leafChildIdx]; + + const internalMeta = new NodeMetadata( + newInternalId, + internalTokens, + Math.max(0, internalTokens - parentTokens), + internalPath, + [...sharedPrefix], + false, + null + ); + this.metadata.set(newInternalId, internalMeta); + + if (leafMeta) { + leafMeta.extraTokens = Math.max(0, leafTotal - internalTokens); + leafMeta.searchPath = [...internalPath, 0]; + } + + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + const newLeafId = this.nextNodeId++; + + const newLeaf = new ClusterNode( + newLeafId, + context, + [], + newInternalId, + new Set([newLeafId]) + ); + newLeaf.docIds = [...context]; + + this.nodes.set(newLeafId, newLeaf); + newInternal.addChild(newLeafId); + + const newLeafPath = [...internalPath, 1]; + + const newLeafMeta = new NodeMetadata( + newLeafId, + totalTokens, + Math.max(0, totalTokens - internalTokens), + newLeafPath, + [...context], + true, + requestId + ); + + this.metadata.set(newLeafId, newLeafMeta); + this._requestToNode.set(requestId, newLeafId); + + return [newLeafId, newLeafPath, requestId]; + } + + updateNode(searchPath: number[], tokenDelta: number): boolean { + const node = this.traverse(searchPath); + + if (!node || !this.metadata.has(node.nodeId)) { + return false; + } + + const metadata = this.metadata.get(node.nodeId)!; + + if (tokenDelta > 0) { + metadata.addTokens(tokenDelta); + } else { + metadata.removeTokens(Math.abs(tokenDelta)); + } + + return true; + } + + _removeNode(nodeId: number): void { + this._removeNodeAndPrune(nodeId); + } + + _removeNodeAndPrune(nodeId: number): number { + if (!this.nodes.has(nodeId)) { + return 0; + } + + let nodesPruned = 0; + const node = this.nodes.get(nodeId)!; + const parentId = node.parent; + + if (parentId !== null && this.nodes.has(parentId)) { + const parent = this.nodes.get(parentId)!; + const idx = parent.children.indexOf(nodeId); + if (idx > -1) { + parent.children.splice(idx, 1); + } + + if (parent.children.length === 0 && !parent.isRoot) { + nodesPruned += 1; + nodesPruned += this._removeNodeAndPrune(parentId); + } + } + + this.nodes.delete(nodeId); + + if (this.metadata.has(nodeId)) { + this.metadata.delete(nodeId); + } + + return nodesPruned; + } + + _computeSearchPath(nodeId: number): number[] { + if (nodeId === this.rootId) return []; + + const path: number[] = []; + let currentId: number | null = nodeId; + const visited = new Set(); + + while (currentId !== this.rootId && currentId !== null) { + if (visited.has(currentId)) break; + visited.add(currentId); + + const node = this.nodes.get(currentId); + if (!node || node.parent === null) break; + + const parent = this.nodes.get(node.parent); + if (!parent) break; + + const childIdx = parent.children.indexOf(currentId); + if (childIdx === -1) break; + + path.push(childIdx); + currentId = node.parent; + } + + return path.reverse(); + } + + _findCommonPrefix(list1: number[], list2: number[]): number[] { + const prefix: number[] = []; + const minLen = Math.min(list1.length, list2.length); + for (let i = 0; i < minLen; i++) { + if (list1[i] === list2[i]) { + prefix.push(list1[i]); + } else { + break; + } + } + return prefix; + } + + getStats(): any { + const avgSearchTime = this.liveStats.totalSearches > 0 + ? this.liveStats.totalSearchTimeUs / this.liveStats.totalSearches + : 0; + + let totalTokens = 0; + for (const meta of this.metadata.values()) { + totalTokens += meta.extraTokens; + } + + return { + num_nodes: this.nodes.size, + active_nodes: this.metadata.size, + total_tokens: totalTokens, + num_requests: this._requestToNode.size, + total_searches: this.liveStats.totalSearches, + total_insertions: this.liveStats.totalInsertions, + total_removals: this.liveStats.totalRemovals, + avg_search_time_us: avgSearchTime + }; + } +} diff --git a/openclaw-plugin/src/engine/metadata.ts b/openclaw-plugin/src/engine/metadata.ts new file mode 100644 index 0000000..d2bf562 --- /dev/null +++ b/openclaw-plugin/src/engine/metadata.ts @@ -0,0 +1,82 @@ +export interface NodeMetadataInit { + totalTokens?: number; + extraTokens?: number; + lastAccessTime?: number; + searchPath?: number[]; + isActive?: boolean; + isLeaf?: boolean; + docIds?: number[] | null; + requestId?: string | null; +} + +export class NodeMetadata { + nodeId: number; + totalTokens: number; + extraTokens: number; + lastAccessTime: number; + searchPath: number[]; + isActive: boolean; + isLeaf: boolean; + docIds: number[] | null; + requestId: string | null; + + constructor(nodeId: number, init: NodeMetadataInit = {}) { + this.nodeId = nodeId; + this.totalTokens = init.totalTokens ?? 0; + this.extraTokens = init.extraTokens ?? 0; + this.lastAccessTime = init.lastAccessTime ?? Date.now() / 1000; + this.searchPath = init.searchPath ?? []; + this.isActive = init.isActive ?? true; + this.isLeaf = init.isLeaf ?? false; + this.docIds = init.docIds ?? null; + this.requestId = init.requestId ?? null; + } + + updateAccessTime(): void { + this.lastAccessTime = Date.now() / 1000; + } + + addTokens(delta: number): void { + this.totalTokens += delta; + this.extraTokens += delta; + this.updateAccessTime(); + } + + removeTokens(delta: number): number { + if (delta <= 0) { + return 0; + } + + let tokensRemoved = Math.min(delta, this.extraTokens); + this.extraTokens -= tokensRemoved; + this.totalTokens -= tokensRemoved; + + const remaining = delta - tokensRemoved; + if (remaining > 0) { + const actualRemoved = Math.min(remaining, this.totalTokens); + this.totalTokens -= actualRemoved; + tokensRemoved += actualRemoved; + } + + return tokensRemoved; + } + + isEmpty(): boolean { + return this.totalTokens <= 0; + } + + lessThan(other: NodeMetadata): boolean { + return this.lastAccessTime < other.lastAccessTime; + } + + toString(): string { + const req = this.requestId ? `, request_id=${this.requestId}` : ""; + return ( + `NodeMetadata(id=${this.nodeId}, ` + + `total_tokens=${this.totalTokens}, ` + + `extra_tokens=${this.extraTokens}, ` + + `is_leaf=${this.isLeaf}${req}, ` + + `active=${this.isActive})` + ); + } +} diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts new file mode 100644 index 0000000..3f9b380 --- /dev/null +++ b/openclaw-plugin/src/engine/tree-nodes.ts @@ -0,0 +1,334 @@ +export class ClusterNode { + nodeId: number; + content: Set; + originalIndices: Set; + distance: number; + children: number[]; + parent: number | null; + frequency: number; + mergeDistance: number; + searchPath: number[]; + + constructor( + nodeId: number, + content: Set, + originalIndices: Set = new Set([nodeId]), + distance: number = 0.0, + children: number[] = [], + parent: number | null = null, + frequency: number = 1 + ) { + this.nodeId = nodeId; + this.content = content instanceof Set ? new Set(content) : new Set(content); + this.originalIndices = originalIndices; + this.distance = distance; + this.children = children; + this.parent = parent; + this.frequency = frequency; + this.mergeDistance = distance; + this.searchPath = []; + } + + get isLeaf(): boolean { + return this.children.length === 0; + } + + get isRoot(): boolean { + return this.parent === null; + } + + get isEmpty(): boolean { + return this.content.size === 0; + } + + get docIds(): number[] { + return Array.from(this.content).sort((a, b) => a - b); + } + + set docIds(value: number[]) { + this.content = new Set(value); + } + + addChild(childId: number): void { + if (!this.children.includes(childId) && childId !== this.nodeId) { + this.children.push(childId); + } + } + + removeChild(childId: number): void { + const idx = this.children.indexOf(childId); + if (idx !== -1) { + this.children.splice(idx, 1); + } + } + + updateFrequency(additionalFrequency: number): void { + this.frequency += additionalFrequency; + } + + mergeWith(otherNode: ClusterNode): void { + this.content = new Set(Array.from(this.content).filter((v) => otherNode.content.has(v))); + this.originalIndices = new Set([...this.originalIndices, ...otherNode.originalIndices]); + this.frequency += otherNode.frequency; + } + + getDepth(): number { + return this.searchPath.length; + } +} + +export interface NodeStats { + totalNodes: number; + leafNodes: number; + rootNodes: number; + internalNodes: number; +} + +export class NodeManager { + clusterNodes: Map; + uniqueNodes: Map; + redirects: Map; + contentToNodeId: Map; + + constructor() { + this.clusterNodes = new Map(); + this.uniqueNodes = new Map(); + this.redirects = new Map(); + this.contentToNodeId = new Map(); + } + + private contentKey(content: Set): string { + return Array.from(content).sort((a, b) => a - b).join(','); + } + + createLeafNode(nodeId: number, promptContent: Iterable): ClusterNode { + const contentSet = promptContent instanceof Set ? new Set(promptContent) : new Set(promptContent); + const key = this.contentKey(contentSet); + + const canonicalId = this.contentToNodeId.get(key); + if (canonicalId !== undefined) { + const canonicalNode = this.uniqueNodes.get(canonicalId); + if (!canonicalNode) { + throw new Error(`Missing canonical leaf node for id ${canonicalId}`); + } + + canonicalNode.updateFrequency(1); + canonicalNode.originalIndices.add(nodeId); + + this.redirects.set(nodeId, canonicalId); + this.clusterNodes.set(nodeId, canonicalNode); + return canonicalNode; + } + + const node = new ClusterNode(nodeId, contentSet); + this.clusterNodes.set(nodeId, node); + this.uniqueNodes.set(nodeId, node); + this.contentToNodeId.set(key, nodeId); + return node; + } + + createInternalNode( + nodeId: number, + child1Id: number, + child2Id: number, + distance: number + ): ClusterNode { + const canonicalChild1Id = this.redirects.get(child1Id) ?? child1Id; + const canonicalChild2Id = this.redirects.get(child2Id) ?? child2Id; + + if (canonicalChild1Id === canonicalChild2Id) { + this.redirects.set(nodeId, canonicalChild1Id); + const canonicalNode = this.uniqueNodes.get(canonicalChild1Id); + if (!canonicalNode) { + throw new Error(`Missing canonical child node for id ${canonicalChild1Id}`); + } + this.clusterNodes.set(nodeId, canonicalNode); + return canonicalNode; + } + + const child1 = this.uniqueNodes.get(canonicalChild1Id); + const child2 = this.uniqueNodes.get(canonicalChild2Id); + if (!child1 || !child2) { + throw new Error( + `Missing child nodes for internal node ${nodeId}: ${canonicalChild1Id}, ${canonicalChild2Id}` + ); + } + + const intersectionContent = new Set( + Array.from(child1.content).filter((v) => child2.content.has(v)) + ); + const key = this.contentKey(intersectionContent); + + const existingId = this.contentToNodeId.get(key); + if (existingId !== undefined && intersectionContent.size > 0) { + if (existingId !== canonicalChild1Id && existingId !== canonicalChild2Id) { + const existingNode = this.uniqueNodes.get(existingId); + if (!existingNode) { + throw new Error(`Missing existing node for id ${existingId}`); + } + + existingNode.addChild(canonicalChild1Id); + existingNode.addChild(canonicalChild2Id); + existingNode.frequency = Math.max( + existingNode.frequency, + child1.frequency + child2.frequency + ); + existingNode.originalIndices = new Set([ + ...existingNode.originalIndices, + ...child1.originalIndices, + ...child2.originalIndices + ]); + + child1.parent = existingId; + child2.parent = existingId; + + this.redirects.set(nodeId, existingId); + this.clusterNodes.set(nodeId, existingNode); + return existingNode; + } + } + + const combinedIndices = new Set([...child1.originalIndices, ...child2.originalIndices]); + const node = new ClusterNode( + nodeId, + intersectionContent, + combinedIndices, + distance, + [canonicalChild1Id, canonicalChild2Id], + null, + child1.frequency + child2.frequency + ); + + this.clusterNodes.set(nodeId, node); + this.uniqueNodes.set(nodeId, node); + + if (intersectionContent.size > 0) { + this.contentToNodeId.set(key, nodeId); + } + + child1.parent = nodeId; + child2.parent = nodeId; + + return node; + } + + cleanupEmptyNodes(): void { + const emptyNodeIds = Array.from(this.uniqueNodes.entries()) + .filter(([_, node]) => node.isEmpty) + .map(([nodeId]) => nodeId); + + if (emptyNodeIds.length === 0) { + return; + } + + const sortedEmptyIds = emptyNodeIds.sort((a, b) => b - a); + + for (const emptyId of sortedEmptyIds) { + const emptyNode = this.uniqueNodes.get(emptyId); + if (!emptyNode) { + continue; + } + + const parentId = emptyNode.parent; + const childrenIds = [...emptyNode.children]; + + if (parentId !== null) { + const parentNode = this.uniqueNodes.get(parentId); + if (parentNode) { + parentNode.removeChild(emptyId); + for (const childId of childrenIds) { + if (this.uniqueNodes.has(childId)) { + parentNode.addChild(childId); + } + } + } + } + + for (const childId of childrenIds) { + const childNode = this.uniqueNodes.get(childId); + if (childNode) { + childNode.parent = parentId; + } + } + + this.uniqueNodes.delete(emptyId); + } + + for (const node of this.uniqueNodes.values()) { + if (node.parent !== null && !this.uniqueNodes.has(node.parent)) { + node.parent = null; + } + } + } + + getNodeStats(): NodeStats { + const totalNodes = this.uniqueNodes.size; + let leafNodes = 0; + let rootNodes = 0; + + for (const node of this.uniqueNodes.values()) { + if (node.isLeaf) { + leafNodes += 1; + } + if (node.isRoot) { + rootNodes += 1; + } + } + + return { + totalNodes, + leafNodes, + rootNodes, + internalNodes: totalNodes - leafNodes + }; + } + + updateSearchPaths(): void { + const rootNodes = Array.from(this.uniqueNodes.values()).filter((node) => node.isRoot); + + if (rootNodes.length === 0) { + return; + } + + if (rootNodes.length === 1) { + const root = rootNodes[0]; + root.searchPath = []; + this._updatePathsFromNode(root); + return; + } + + const currentMaxId = Math.max(...Array.from(this.uniqueNodes.keys())); + const virtualRootId = currentMaxId + 1; + const virtualRoot = new ClusterNode( + virtualRootId, + new Set(), + new Set(), + 0.0, + rootNodes.map((node) => node.nodeId), + null, + rootNodes.reduce((sum, node) => sum + node.frequency, 0) + ); + virtualRoot.searchPath = []; + + this.uniqueNodes.set(virtualRootId, virtualRoot); + + for (const node of rootNodes) { + node.parent = virtualRootId; + } + + this._updatePathsFromNode(virtualRoot); + } + + _updatePathsFromNode(node: ClusterNode): void { + for (let childIndex = 0; childIndex < node.children.length; childIndex += 1) { + const childId = node.children[childIndex]; + const childNode = this.uniqueNodes.get(childId); + if (!childNode) { + continue; + } + + childNode.searchPath = [...node.searchPath, childIndex]; + this._updatePathsFromNode(childNode); + } + } +} From 86ab671a27ed605a8233faa44652ff2756894b86 Mon Sep 17 00:00:00 2001 From: SecretSettler Date: Thu, 26 Mar 2026 13:45:58 +0000 Subject: [PATCH 5/8] feat: wire full ContextPilot engine + SGLang mode into plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit index.ts now supports three backend modes: - anthropic: in-process ContextPilot engine (clustering + reorder + dedup + cache_control) - openai: in-process engine (same pipeline, OpenAI cache is automatic) - sglang: remote ContextPilotIndexClient → index server for cache-aware reorder, in-process dedup, no cache_control injection (RadixAttention handles caching) Config additions: - backendProvider now accepts 'sglang' - indexServerUrl: URL for ContextPilot index server (default: http://localhost:8765) contextpilot_status tool shows engine stats (cloud) or server health (sglang) --- openclaw-plugin/openclaw.plugin.json | 7 +- openclaw-plugin/src/index.ts | 210 ++++++++++++++++++++++----- 2 files changed, 176 insertions(+), 41 deletions(-) diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json index c1b5f9a..94ec378 100644 --- a/openclaw-plugin/openclaw.plugin.json +++ b/openclaw-plugin/openclaw.plugin.json @@ -27,10 +27,15 @@ "properties": { "backendProvider": { "type": "string", - "enum": ["anthropic", "openai"], + "enum": ["anthropic", "openai", "sglang"], "description": "Backend LLM provider type", "default": "anthropic" }, + "indexServerUrl": { + "type": "string", + "description": "ContextPilot index server URL (used in SGLang mode)", + "default": "http://localhost:8765" + }, "scope": { "type": "string", "enum": ["all", "system", "tool_results"], diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts index 58fd015..fbd1384 100644 --- a/openclaw-plugin/src/index.ts +++ b/openclaw-plugin/src/index.ts @@ -9,9 +9,80 @@ import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-aut import { injectCacheControl } from "./engine/cache-control.js"; import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js"; import { getFormatHandler, type InterceptConfig } from "./engine/extract.js"; -import { ReorderState } from "./engine/reorder.js"; +import { ContextPilotIndexClient } from "./engine/http-client.js"; +import { ContextPilot } from "./engine/live-index.js"; const PROVIDER_ID = "contextpilot"; +type BackendProvider = "anthropic" | "openai" | "sglang"; + +function parseBackendProvider(value: unknown): BackendProvider { + if (value === "openai" || value === "sglang") { + return value; + } + return "anthropic"; +} + +function parseScope(value: unknown): "all" | "system" | "tool_results" { + if (value === "system" || value === "tool_results" || value === "all") { + return value; + } + return "all"; +} + +function detectApiFormat( + body: Record, + backendProvider: BackendProvider, +): "openai_chat" | "anthropic_messages" { + if (backendProvider === "anthropic") { + return "anthropic_messages"; + } + if (backendProvider === "openai") { + return "openai_chat"; + } + return "system" in body ? "anthropic_messages" : "openai_chat"; +} + +function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] { + const [reordered] = engine.reorder(docs); + if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) { + return docs; + } + const candidate = reordered[0]; + if (!candidate.every((entry) => typeof entry === "string")) { + return docs; + } + return candidate as string[]; +} + +async function reorderWithClient( + client: ContextPilotIndexClient, + docs: string[], +): Promise { + const encodedDocs = docs.map((doc) => Array.from(doc, (ch) => ch.charCodeAt(0))); + const result = await client.reorder(encodedDocs, 0.001, false, "average"); + + if (result === null) { + return docs; + } + + const [, originalIndices] = result; + if (!Array.isArray(originalIndices) || originalIndices.length !== docs.length) { + return docs; + } + + const reordered = originalIndices.map((index) => { + if (typeof index !== "number" || index < 0 || index >= docs.length) { + return null; + } + return docs[index]; + }); + + return reordered.includes(null) ? docs : (reordered as string[]); +} + +function formatJson(value: unknown): string { + return value === null || value === undefined ? "unavailable" : JSON.stringify(value); +} export default definePluginEntry({ id: "contextpilot", @@ -19,13 +90,15 @@ export default definePluginEntry({ description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.", register: (api) => { const config = { - backendProvider: api.pluginConfig?.backendProvider === "openai" ? "openai" : "anthropic", - scope: ["system", "tool_results", "all"].includes(String(api.pluginConfig?.scope)) - ? String(api.pluginConfig?.scope) - : "all", + backendProvider: parseBackendProvider(api.pluginConfig?.backendProvider), + scope: parseScope(api.pluginConfig?.scope), + indexServerUrl: String(api.pluginConfig?.indexServerUrl || "http://localhost:8765"), }; - const reorderState = new ReorderState(); + const isSglang = config.backendProvider === "sglang"; + const engine = isSglang ? null : new ContextPilot(0.001, false, "average"); + const client = isSglang ? new ContextPilotIndexClient(config.indexServerUrl) : null; + let requestCount = 0; let totalCharsSaved = 0; @@ -33,24 +106,43 @@ export default definePluginEntry({ id: PROVIDER_ID, label: "ContextPilot", docsPath: "/providers/contextpilot", - envVars: [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"], - auth: [ - createProviderApiKeyAuthMethod({ - providerId: PROVIDER_ID, - methodId: "api-key", - label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key", - hint: "API key for the backend LLM provider", - optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey", - flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key", - envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY", - promptMessage: "Enter your API key", - defaultModel: - config.backendProvider === "anthropic" - ? "contextpilot/claude-sonnet-4-6" - : "contextpilot/gpt-4o", - }), - ], + envVars: isSglang + ? [] + : [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"], + auth: isSglang + ? [] + : [ + createProviderApiKeyAuthMethod({ + providerId: PROVIDER_ID, + methodId: "api-key", + label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key", + hint: "API key for the backend LLM provider", + optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey", + flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key", + envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY", + promptMessage: "Enter your API key", + defaultModel: + config.backendProvider === "anthropic" + ? "contextpilot/claude-sonnet-4-6" + : "contextpilot/gpt-4o", + }), + ], resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => { + if (config.backendProvider === "sglang") { + return { + id: ctx.modelId, + name: ctx.modelId, + provider: PROVIDER_ID, + baseUrl: config.indexServerUrl, + api: "openai-completions", + reasoning: false, + input: ["text", "image"] as Array<"text" | "image">, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 16384, + }; + } + const isAnthropic = config.backendProvider === "anthropic"; return { id: ctx.modelId, @@ -76,9 +168,7 @@ export default definePluginEntry({ } const body = structuredClone(request.body) as Record; - const apiFormat = config.backendProvider === "anthropic" - ? "anthropic_messages" - : "openai_chat"; + const apiFormat = detectApiFormat(body, config.backendProvider); const interceptConfig: InterceptConfig = { enabled: true, @@ -93,17 +183,30 @@ export default definePluginEntry({ const handler = getFormatHandler(apiFormat); const multi = handler.extractAll(body, interceptConfig); + const reorderDocs = async (docs: string[]): Promise => { + if (docs.length < 2) { + return docs; + } + if (client) { + return reorderWithClient(client, docs); + } + if (engine) { + return reorderWithEngine(engine, docs); + } + return docs; + }; + if (multi.systemExtraction) { const [extraction, sysIdx] = multi.systemExtraction; if (extraction.documents.length >= 2) { - const [reordered] = reorderState.reorder(extraction.documents); + const reordered = await reorderDocs(extraction.documents); handler.reconstructSystem(body, extraction, reordered, sysIdx); } } for (const [extraction, location] of multi.toolExtractions) { if (extraction.documents.length >= 2) { - const [reordered] = reorderState.reorder(extraction.documents); + const reordered = await reorderDocs(extraction.documents); handler.reconstructToolResult(body, extraction, reordered, location); } } @@ -117,10 +220,9 @@ export default definePluginEntry({ totalCharsSaved += dedupResult.charsSaved; } - const optimizedBody = injectCacheControl( - body, - config.backendProvider === "anthropic" ? "anthropic" : "openai", - ); + const optimizedBody = isSglang + ? body + : injectCacheControl(body, config.backendProvider === "anthropic" ? "anthropic" : "openai"); requestCount++; @@ -131,6 +233,12 @@ export default definePluginEntry({ }; }, augmentModelCatalog: () => { + if (config.backendProvider === "sglang") { + return [ + { id: "default", name: "SGLang Default (ContextPilot)", provider: PROVIDER_ID }, + ]; + } + const isAnthropic = config.backendProvider === "anthropic"; if (isAnthropic) { return [ @@ -154,18 +262,40 @@ export default definePluginEntry({ description: "Report ContextPilot engine state", parameters: Type.Object({}), async execute(_toolCallId: string, _params: unknown) { + const lines = [ + "ContextPilot Engine Status:", + ` Backend: ${config.backendProvider}`, + ` Scope: ${config.scope}`, + ` Requests optimized: ${requestCount}`, + ` Total chars saved: ${totalCharsSaved.toLocaleString()}`, + ]; + + if (engine) { + const stats = engine.getStats(); + lines.push(" Mode: cloud-api (in-process ContextPilot engine)"); + lines.push(` Live index: ${engine.isLive ? "active" : "warming"}`); + lines.push(` Nodes: ${Number(stats.num_nodes ?? 0)}`); + lines.push(` Active nodes: ${Number(stats.active_nodes ?? 0)}`); + lines.push(` Requests tracked: ${Number(stats.num_requests ?? 0)}`); + lines.push(` Total searches: ${Number(stats.total_searches ?? 0)}`); + lines.push(` Total insertions: ${Number(stats.total_insertions ?? 0)}`); + lines.push(` Total removals: ${Number(stats.total_removals ?? 0)}`); + lines.push(` Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`); + } + + if (client) { + const [health, remoteStats] = await Promise.all([client.health(), client.getStats()]); + lines.push(" Mode: sglang (remote ContextPilot index)"); + lines.push(` Index server URL: ${config.indexServerUrl}`); + lines.push(` Index server health: ${formatJson(health)}`); + lines.push(` Index server stats: ${formatJson(remoteStats)}`); + } + return { content: [ { type: "text" as const, - text: [ - "ContextPilot Engine Status:", - " Mode: in-process (native TypeScript)", - ` Requests optimized: ${requestCount}`, - ` Total chars saved: ${totalCharsSaved.toLocaleString()}`, - ` Backend: ${config.backendProvider}`, - ` Scope: ${config.scope}`, - ].join("\n"), + text: lines.join("\n"), }, ], }; From f60b8fb8e54a5ec37d93f89702f6249ed13c01ef Mon Sep 17 00:00:00 2001 From: dalongbao Date: Wed, 1 Apr 2026 23:56:31 +0100 Subject: [PATCH 6/8] fix: working plugin --- .gitignore | 1 + contextpilot/server/http_server.py | 314 +++++++++-------- openclaw-plugin/README.md | 128 ++++--- openclaw-plugin/benchmark.sh | 183 ++++++++++ openclaw-plugin/openclaw.plugin.json | 33 +- openclaw-plugin/package-lock.json | 22 ++ openclaw-plugin/package.json | 11 +- openclaw-plugin/src/engine/cache-control.ts | 27 ++ openclaw-plugin/src/engine/dedup.ts | 28 +- openclaw-plugin/src/engine/tree-nodes.ts | 6 +- openclaw-plugin/src/index.ts | 357 ++++++++------------ openclaw-plugin/test-e2e.ts | 188 +++++++++++ 12 files changed, 835 insertions(+), 463 deletions(-) create mode 100755 openclaw-plugin/benchmark.sh create mode 100644 openclaw-plugin/package-lock.json create mode 100644 openclaw-plugin/test-e2e.ts diff --git a/.gitignore b/.gitignore index 55ae470..e4d3908 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ dist/ */.DS_Store *.DS_Store +node_modules/ diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py index c71af9a..82ecadd 100644 --- a/contextpilot/server/http_server.py +++ b/contextpilot/server/http_server.py @@ -25,6 +25,7 @@ import os import re import uuid +from dataclasses import dataclass, field as dc_field from typing import List, Dict, Any, Optional, cast from contextlib import asynccontextmanager @@ -105,13 +106,9 @@ # skip-old / dedup-new / reorder-new behaviour. Single-conversation # model (one user at a time). Resets when the system prompt changes. -from dataclasses import dataclass, field as dc_field - - @dataclass class _InterceptConvState: - """Global intercept state for the current conversation.""" - + """Per-session intercept state for a single conversation.""" # Cached copy of the full messages array after modification (reorder/dedup). # On subsequent turns, old messages are replaced with these cached versions # so the inference engine's prefix cache sees identical tokens. @@ -132,7 +129,10 @@ class _InterceptConvState: last_message_count: int = 0 -_intercept_state = _InterceptConvState() +# Per-session state dict keyed by session fingerprint (hash of first user msg). +# This allows concurrent multi-user sessions to each maintain their own state. +_intercept_states: dict[str, _InterceptConvState] = {} +_MAX_TRACKED_SESSIONS = 64 # LRU eviction threshold # TTFT tracking for averages across a session _ttft_history: List[float] = [] @@ -876,19 +876,13 @@ async def reset_index(): After reset, you must call /reorder again before other operations. """ - global \ - _index, \ - _str_to_id, \ - _id_to_str, \ - _next_str_id, \ - _intercept_index, \ - _intercept_state + global _index, _str_to_id, _id_to_str, _next_str_id, _intercept_index, _intercept_states # Reset conversation tracker reset_conversation_tracker() - # Reset intercept conversation state - _intercept_state = _InterceptConvState() + # Reset all per-session intercept states + _intercept_states.clear() _intercept_index = None # Reset string-to-ID mapping @@ -1186,31 +1180,72 @@ def _hash_text(text: str) -> str: return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()[:16] +def _session_fingerprint(body: Dict[str, Any]) -> str: + """Derive a session fingerprint from the first user message. + + In a multi-turn conversation, messages grow but the first user message + stays constant. Hashing it gives a stable per-session key that lets + concurrent users each maintain their own intercept state. + """ + msgs = body.get("messages") or [] + # Find the first user message (usually msg[0] or msg[1] after system) + for msg in msgs[:3]: + if isinstance(msg, dict) and msg.get("role") == "user": + content = msg.get("content", "") + if isinstance(content, list): + # OpenAI format: [{type: text, text: "..."}] + parts = [p.get("text", "") for p in content + if isinstance(p, dict)] + content = "".join(parts) + return _hash_text(str(content)) + # Fallback: hash all messages (shouldn't happen in practice) + return _hash_text(json.dumps(msgs[:2], sort_keys=True)) + + def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: - """Return the global intercept state, resetting if the conversation changed. + """Return per-session intercept state, creating or resetting as needed. + + Uses the first user message as a session fingerprint so concurrent + multi-user sessions each get their own state. Detection: in a multi-turn agent conversation the messages array only grows. If the count drops, either a new session started or the host - compacted old messages. Either way, reset all state: the old KV cache + compacted old messages. Either way, reset state: the old KV cache entries are gone (compaction rewrites content), so cached_messages, seen_doc_hashes, and reorder state are all invalid. """ - global _intercept_state + global _intercept_states + session_key = _session_fingerprint(body) msg_count = len(body.get("messages") or []) - if msg_count < _intercept_state.last_message_count: + + state = _intercept_states.get(session_key) + + if state is None: + # New session + state = _InterceptConvState() + state.system_processed = True logger.info( - f"Intercept: message count dropped " - f"({msg_count} < {_intercept_state.last_message_count}), " - f"resetting all state (compaction or new session)" + f"Intercept: new session {session_key[:8]}… " + f"({msg_count} msgs, {len(_intercept_states)} active sessions)" ) - _intercept_state = _InterceptConvState() - # Skip reorder for the first post-compaction tool result: - # prefix cache is fully invalidated, nothing to align with. - # Go straight to dedup mode so docs are registered for future turns. - _intercept_state.first_tool_result_done = True - _intercept_state.system_processed = True - _intercept_state.last_message_count = msg_count - return _intercept_state + # Evict oldest sessions if over limit + if len(_intercept_states) >= _MAX_TRACKED_SESSIONS: + oldest_key = next(iter(_intercept_states)) + del _intercept_states[oldest_key] + logger.info(f"Intercept: evicted session {oldest_key[:8]}…") + _intercept_states[session_key] = state + elif msg_count < state.last_message_count: + logger.info( + f"Intercept: session {session_key[:8]}… message count dropped " + f"({msg_count} < {state.last_message_count}), " + f"resetting state (compaction or restart)" + ) + state = _InterceptConvState() + state.system_processed = True + _intercept_states[session_key] = state + + state.last_message_count = msg_count + return state def _deduplicate_docs(docs: List[str], state: _InterceptConvState) -> tuple: @@ -1261,6 +1296,16 @@ def _strip_external_content_ids(body: Any) -> Any: _OPENAI_CHAT = "openai_chat" _ANTHROPIC_MESSAGES = "anthropic_messages" +# Hop-by-hop headers that must not be forwarded by proxies. +_HOP_BY_HOP = frozenset(( + "host", "connection", "keep-alive", "transfer-encoding", + "te", "trailer", "upgrade", "proxy-authorization", + "proxy-authenticate", "content-length", +)) + +# Previous message hashes for prefix divergence detection. +_debug_prev_msg_hashes: List[str] = [] + def _doc_preview(doc: str, max_len: int = 60) -> str: """Truncate a document string for log preview.""" @@ -1368,14 +1413,12 @@ async def _intercept_and_forward(request: Request, api_format: str): total_reordered = 0 total_deduped = 0 total_slimmed = 0 - tool_results_skipped = 0 # TODO: never incremented — wire up or remove - _chars_before_slim = 0 - _chars_after_slim = 0 + chars_before_slim = 0 + chars_after_slim = 0 system_count = 0 tool_result_count = 0 - reorder_details = [] # collect per-source reorder info + reorder_details = [] _dedup_result = DedupResult() - state = _intercept_state # ── Debug: log conversation shape, divergence, and tool_result details ── _debug_messages = body.get("messages") or [] @@ -1383,12 +1426,11 @@ async def _intercept_and_forward(request: Request, api_format: str): # Per-message hashes for this request _debug_msg_hashes = [] - if logger.isEnabledFor(logging.DEBUG): - for m in _debug_messages: - h = hashlib.sha256( - json.dumps(m, sort_keys=True, ensure_ascii=False).encode() - ).hexdigest()[:12] - _debug_msg_hashes.append(h) + for m in _debug_messages: + h = hashlib.sha256( + json.dumps(m, sort_keys=True, ensure_ascii=False).encode() + ).hexdigest()[:12] + _debug_msg_hashes.append(h) # Build tool_call_id → function name mapping from assistant messages _tool_call_names = {} @@ -1421,7 +1463,7 @@ async def _intercept_and_forward(request: Request, api_format: str): _chars = len(_content_str) _is_compacted = "[compacted:" in _content_str _preview = _content_str[:150].replace("\n", "\\n") - logger.info( + logger.debug( f" msg[{idx}] role={_role} fn={_fn_label} " f"tool_call_id={_tc_id} " f"chars={_chars} compacted={_is_compacted} " @@ -1439,43 +1481,18 @@ async def _intercept_and_forward(request: Request, api_format: str): _chars = len(_tc_str) _is_compacted = "[compacted:" in _tc_str _preview = _tc_str[:150].replace("\n", "\\n") - logger.info( + logger.debug( f" msg[{idx}].content[{bi}] type=tool_result " f"tool_use_id={_tu_id} chars={_chars} " f"compacted={_is_compacted} preview: {_preview}" ) - global _debug_prev_msg_hashes - if "_debug_prev_msg_hashes" not in globals(): - _debug_prev_msg_hashes = [] - - _prev_n = len(_debug_prev_msg_hashes) - if _prev_n > 0 and _prev_n <= _debug_msg_count: - _first_diff = None - for idx in range(_prev_n): - if _debug_msg_hashes[idx] != _debug_prev_msg_hashes[idx]: - _first_diff = idx - break - if _first_diff is not None: - _diff_msg = _debug_messages[_first_diff] - _diff_role = _diff_msg.get("role", "?") - _diff_content = str(_diff_msg.get("content", "")) - logger.warning( - f"Intercept PREFIX MISMATCH at msg[{_first_diff}] " - f"(role={_diff_role}), " - f"hash was {_debug_prev_msg_hashes[_first_diff]} " - f"now {_debug_msg_hashes[_first_diff]}. " - f"Content preview ({len(_diff_content)} chars): " - f"{_diff_content[:300]}..." - ) - else: - logger.info( - f"Intercept: {_debug_msg_count} msgs (prev={_prev_n}), " - f"prefix[:{_prev_n}] MATCH, " - f"{_debug_msg_count - _prev_n} new msgs" - ) - else: - logger.info(f"Intercept: {_debug_msg_count} msgs (first request or reset)") + # Per-session debug logging (uses session fingerprint, not global state) + _session_key = _session_fingerprint(body) + _session_tag = _session_key[:8] + logger.info( + f"Intercept: session={_session_tag} {_debug_msg_count} msgs" + ) _debug_prev_msg_hashes = list(_debug_msg_hashes) @@ -1484,7 +1501,7 @@ async def _intercept_and_forward(request: Request, api_format: str): if config.enabled: try: - # body is already a fresh copy from _strip_external_content_ids + body = copy.deepcopy(body) # ── Conversation-aware state (single-conversation model) ── state = _get_intercept_state(body) @@ -1493,15 +1510,44 @@ async def _intercept_and_forward(request: Request, api_format: str): # On subsequent turns, the host sends original (unmodified) # messages. Replace them with our cached modified versions # so the inference engine's prefix cache sees identical tokens. + # IMPORTANT: Only replace if the old messages actually match + # (same session/user). Without this check, concurrent requests + # from different sessions would get cross-contaminated. old_msg_count = len(state.cached_messages) if old_msg_count > 0: msgs = body.get("messages", []) if len(msgs) >= old_msg_count: - msgs[:old_msg_count] = copy.deepcopy(state.cached_messages) - logger.info( - f"Intercept: replaced {old_msg_count} old messages " - f"with cached versions for prefix cache consistency" - ) + # Verify prefix match before replacing + prefix_ok = True + for _ci in range(old_msg_count): + _cached_h = hashlib.sha256( + json.dumps(state.cached_messages[_ci], + sort_keys=True, + ensure_ascii=False).encode() + ).hexdigest()[:16] + _current_h = hashlib.sha256( + json.dumps(msgs[_ci], + sort_keys=True, + ensure_ascii=False).encode() + ).hexdigest()[:16] + if _cached_h != _current_h: + prefix_ok = False + break + if prefix_ok: + msgs[:old_msg_count] = copy.deepcopy( + state.cached_messages) + logger.info( + f"Intercept: replaced {old_msg_count} old " + f"messages with cached versions for prefix " + f"cache consistency" + ) + else: + logger.info( + f"Intercept: prefix mismatch at msg[{_ci}], " + f"skipping cached message replay " + f"(different session/user)" + ) + old_msg_count = 0 handler.restore_system(body, state.cached_system) multi = handler.extract_all(body, config) @@ -1523,7 +1569,7 @@ async def _intercept_and_forward(request: Request, api_format: str): } ) handler.reconstruct_system( - body, extraction, reordered_docs, sys_idx, config + body, extraction, reordered_docs, sys_idx ) total_reordered += len(extraction.documents) system_count = 1 @@ -1570,8 +1616,8 @@ async def _intercept_and_forward(request: Request, api_format: str): f"previous tool result ({orig_chars} chars). " f"Refer to the earlier result above.]" ] - _chars_before_slim += orig_chars - _chars_after_slim += len(new_docs[0]) + chars_before_slim += orig_chars + chars_after_slim += len(new_docs[0]) total_slimmed += deduped reorder_details.append( { @@ -1626,13 +1672,8 @@ async def _intercept_and_forward(request: Request, api_format: str): single_doc.tool_call_id ) - if ( - total_reordered > 0 - or total_deduped > 0 - or total_slimmed > 0 - or tool_results_skipped > 0 - ): - saved = _chars_before_slim - _chars_after_slim + if total_reordered > 0 or total_deduped > 0 or total_slimmed > 0: + saved = chars_before_slim - chars_after_slim saved_tokens = saved // 4 if saved > 0 else 0 logger.info( f"Intercept ({api_format}): reordered {total_reordered}, " @@ -1648,8 +1689,8 @@ async def _intercept_and_forward(request: Request, api_format: str): _dedup_result = dedup_responses_api(body, chunk_modulus=_chunk_modulus) if _dedup_result.chars_saved > 0: - _chars_before_slim += _dedup_result.chars_before - _chars_after_slim += _dedup_result.chars_after + chars_before_slim += _dedup_result.chars_before + chars_after_slim += _dedup_result.chars_after logger.info( f"Dedup ({api_format}): " f"blocks={_dedup_result.blocks_deduped}/{_dedup_result.blocks_total}, " @@ -1697,22 +1738,6 @@ async def _intercept_and_forward(request: Request, api_format: str): else: target_url = f"{infer_api_url}{handler.target_path()}" - # Build outbound headers: forward everything except X-ContextPilot-* - # and hop-by-hop headers that must not be forwarded by proxies. - _HOP_BY_HOP = frozenset( - ( - "host", - "connection", - "keep-alive", - "transfer-encoding", - "te", - "trailer", - "upgrade", - "proxy-authorization", - "proxy-authenticate", - "content-length", - ) - ) if _cloud_mode and _cloud_adapter is not None and _cloud_api_key: outbound_headers = _cloud_adapter.get_auth_headers(_cloud_api_key) else: @@ -1732,34 +1757,30 @@ async def _intercept_and_forward(request: Request, api_format: str): total_reordered > 0 or total_deduped > 0 or total_slimmed > 0 - or tool_results_skipped > 0 or _dedup_result.chars_saved > 0 ) if _has_activity: - cp_response_headers["X-ContextPilot-Result"] = json.dumps( - { - "intercepted": True, - "documents_reordered": total_reordered > 0, - "total_documents": total_reordered, - "documents_deduplicated": total_deduped, - "documents_slimmed": total_slimmed, - "chars_before_slim": _chars_before_slim, - "chars_after_slim": _chars_after_slim, - "chars_saved": _chars_before_slim - _chars_after_slim, - "tool_results_skipped": tool_results_skipped, - "message_count": state.last_message_count, - "sources": { - "system": system_count, - "tool_results": tool_result_count, - }, - "reorder_details": reorder_details, - "dedup": { - "blocks_deduped": _dedup_result.blocks_deduped, - "blocks_total": _dedup_result.blocks_total, - "chars_saved": _dedup_result.chars_saved, - }, - } - ) + cp_response_headers["X-ContextPilot-Result"] = json.dumps({ + "intercepted": True, + "documents_reordered": total_reordered > 0, + "total_documents": total_reordered, + "documents_deduplicated": total_deduped, + "documents_slimmed": total_slimmed, + "chars_before_slim": chars_before_slim, + "chars_after_slim": chars_after_slim, + "chars_saved": chars_before_slim - chars_after_slim, + "message_count": state.last_message_count, + "sources": { + "system": system_count, + "tool_results": tool_result_count, + }, + "reorder_details": reorder_details, + "dedup": { + "blocks_deduped": _dedup_result.blocks_deduped, + "blocks_total": _dedup_result.blocks_total, + "chars_saved": _dedup_result.chars_saved, + }, + }) is_stream = body.get("stream", False) @@ -1785,7 +1806,7 @@ async def _stream_with_headers(): async for chunk in resp.content.iter_any(): if not _ttft_logged: _ttft_ms = (time.monotonic() - _request_start) * 1000 - _saved = _chars_before_slim - _chars_after_slim + _saved = chars_before_slim - chars_after_slim _log_ttft(_ttft_ms, total_slimmed, _saved) _ttft_logged = True yield chunk @@ -1795,12 +1816,9 @@ async def _stream_with_headers(): status, fwd_headers = cast(tuple[int, Dict[str, str]], first_event) async def _stream_content_only(): - try: - async for event in stream_iter: - if isinstance(event, bytes): - yield event - finally: - await stream_iter.aclose() + async for event in stream_iter: + if isinstance(event, bytes): + yield event return StreamingResponse( _stream_content_only(), @@ -1814,13 +1832,9 @@ async def _stream_content_only(): target_url, json=body, headers=outbound_headers ) as resp: _ttft_ms = (time.monotonic() - _request_start) * 1000 - _saved = _chars_before_slim - _chars_after_slim + _saved = chars_before_slim - chars_after_slim _log_ttft(_ttft_ms, total_slimmed, _saved) - try: - result = await resp.json() - except (json.JSONDecodeError, aiohttp.ContentTypeError): - text = await resp.text() - raise HTTPException(status_code=resp.status, detail=text[:500]) + result = await resp.json() # ── Cloud mode: track cache metrics from response ── if ( @@ -1858,7 +1872,7 @@ async def _stream_content_only(): except aiohttp.ClientError as e: logger.error(f"Error forwarding intercepted request: {e}") - raise HTTPException(status_code=502, detail="Backend connection error") + raise HTTPException(status_code=502, detail=f"Backend error: {str(e)}") @app.post("/v1/chat/completions") @@ -1938,9 +1952,9 @@ async def proxy_engine(path: str, request: Request): body["rid"] = request_id body["request_id"] = request_id - body.setdefault("temperature", 0) + body["temperature"] = 0 if _cloud_mode: - body.setdefault("top_p", 0) + body["top_p"] = 0 dedup_result = DedupResult() try: @@ -2153,7 +2167,7 @@ def main(): os.environ["CONTEXTPILOT_CLOUD_API_KEY"] = args.cloud_api_key # Also set global config for direct access - global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode, _chunk_modulus + global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode _max_tokens = args.max_tokens _infer_api_url = args.infer_api_url.rstrip("/") _stateless_mode = args.stateless diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md index 851533c..c36fc7c 100644 --- a/openclaw-plugin/README.md +++ b/openclaw-plugin/README.md @@ -1,90 +1,108 @@ -# @contextpilot/openclaw-plugin +# @contextpilot/contextpilot -OpenClaw native plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context reuse. **Zero external dependencies** — no Python, no proxy server, just install and go. +OpenClaw plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context optimization. **Zero external dependencies** — no Python, no proxy server, just install and go. ## What It Does -ContextPilot optimizes every LLM request by: +ContextPilot registers as an OpenClaw **Context Engine** and optimizes every LLM request by: -1. **Extracting** documents from system prompts and tool results +1. **Extracting** documents from tool results 2. **Reordering** documents for maximum prefix cache sharing across turns 3. **Deduplicating** repeated content blocks with compact reference hints -4. **Injecting** provider-specific cache control markers (Anthropic `cache_control`) +4. **Injecting** cache control markers (Anthropic `cache_control: { type: "ephemeral" }`) -All processing happens in-process inside the OpenClaw plugin — no external services needed. +All processing happens in-process — no external services needed. ## Installation +### From npm (when published) + ```bash -openclaw plugins install @contextpilot/openclaw-plugin +openclaw plugins install @contextpilot/contextpilot +``` + +### From local path (development) + +Add to `~/.openclaw/openclaw.json`: + +```json +{ + "plugins": { + "load": { + "paths": [ + "/path/to/ContextPilot/openclaw-plugin" + ] + } + } +} ``` ## Configuration -In `~/.openclaw/openclaw.json`: +In `~/.openclaw/openclaw.json`, enable the plugin and set it as the context engine: -```json5 +```json { - plugins: { - entries: { + "plugins": { + "slots": { + "contextEngine": "contextpilot" + }, + "entries": { "contextpilot": { - enabled: true, - config: { - // "anthropic" (default) or "openai" - "backendProvider": "anthropic", - - // What to optimize: "all" (default), "system", or "tool_results" + "enabled": true, + "config": { "scope": "all" } } } + }, + "tools": { + "allow": ["contextpilot"] } } ``` -Set your API key: - -```bash -export ANTHROPIC_API_KEY="sk-ant-xxx" -# or -export OPENAI_API_KEY="sk-xxx" -``` - -Then select a ContextPilot model (e.g., `contextpilot/claude-sonnet-4-6`) and start using OpenClaw. +### Scope Options -## Available Models +| Scope | Tool Results | Description | +|:------|:------------:|:------------| +| `all` (default) | Optimized | Optimize all tool results | +| `tool_results` | Optimized | Same as `all` | -### Anthropic backend (default) - -| Model ID | Name | -|----------|------| -| `contextpilot/claude-opus-4-6` | Claude Opus 4.6 (ContextPilot) | -| `contextpilot/claude-sonnet-4-6` | Claude Sonnet 4.6 (ContextPilot) | - -### OpenAI backend - -| Model ID | Name | -|----------|------| -| `contextpilot/gpt-4o` | GPT-4o (ContextPilot) | -| `contextpilot/gpt-4o-mini` | GPT-4o Mini (ContextPilot) | - -Any model ID works via dynamic resolution — use `contextpilot/`. +> **Note:** System prompt optimization is not currently available — OpenClaw's context engine API does not expose the system prompt to plugins. ## How It Works ``` -OpenClaw request +OpenClaw agent request ↓ -ContextPilot Plugin (wrapStreamFn) - ├─ Extract documents from system/tool_results +ContextPilot Context Engine (assemble hook) + ├─ Convert OpenClaw message format (toolResult → tool_result) + ├─ Extract documents from tool results ├─ Reorder for prefix cache sharing ├─ Deduplicate repeated blocks ├─ Inject cache_control markers ↓ -Optimized request → LLM Backend (Anthropic/OpenAI) +Optimized context → LLM Backend ``` -The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercept requests before they reach the backend. All optimization is done in-process in TypeScript. +The plugin registers as an OpenClaw Context Engine using `api.registerContextEngine()`. The `assemble()` hook intercepts context assembly before each LLM call. + +## Files + +``` +openclaw-plugin/ +├── openclaw.plugin.json # Plugin manifest (id: "contextpilot") +├── package.json # npm package (@contextpilot/contextpilot) +├── src/ +│ ├── index.ts # Plugin entry point +│ └── engine/ +│ ├── cache-control.ts # Cache control injection +│ ├── dedup.ts # Content deduplication +│ ├── extract.ts # Document extraction +│ └── live-index.ts # Reordering engine +└── tsconfig.json +``` ## Agent Tool @@ -92,13 +110,17 @@ The plugin registers as an OpenClaw provider and uses `wrapStreamFn` to intercep |------|-------------| | `contextpilot_status` | Check engine status, request count, and chars saved | -## Scope Control +> **Note:** The status tool is registered but may not be visible to agents due to OpenClaw plugin API limitations. + +## Verifying It Works -| Scope | System Prompt | Tool Results | -|:---:|:---:|:---:| -| `all` (default) | Optimized | Optimized | -| `system` | Optimized | Untouched | -| `tool_results` | Untouched | Optimized | +Check the gateway logs for ContextPilot output: + +``` +[ContextPilot] assemble() called with 84 messages +[ContextPilot] Extractions found - system: 0 tool: 1 singleDoc: 3 +[ContextPilot] Optimization complete. Chars saved: 2389 +``` ## License diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh new file mode 100755 index 0000000..b913178 --- /dev/null +++ b/openclaw-plugin/benchmark.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# +# ContextPilot OpenClaw Plugin Benchmark +# Compares token usage and cache hits with and without the plugin +# +# Usage: ./benchmark.sh [num_iterations] +# + +set -e + +NUM_ITERATIONS=${1:-3} +OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json" +BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.backup" +GATEWAY_LOG="/tmp/gw-benchmark.log" + +# Test that triggers multiple file reads to show dedup benefit +TEST_FILES=( + "/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts" + "/home/ryan/ContextPilot/openclaw-plugin/src/engine/cache-control.ts" + "/home/ryan/ContextPilot/openclaw-plugin/src/index.ts" +) + +echo "==========================================" +echo "ContextPilot OpenClaw Plugin Benchmark" +echo "==========================================" +echo "Iterations: $NUM_ITERATIONS" +echo "" + +# Backup config +cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG" + +cleanup() { + echo "" + echo "Restoring original config..." + cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG" + rm -f "$BACKUP_CONFIG" + pkill -9 -f "openclaw gateway" 2>/dev/null || true +} +trap cleanup EXIT + +restart_gateway() { + pkill -9 -f "openclaw gateway" 2>/dev/null || true + sleep 2 + openclaw gateway > "$GATEWAY_LOG" 2>&1 & + sleep 5 +} + +run_multi_read_test() { + local label=$1 + + echo "Running $label test..." + echo " Reading ${#TEST_FILES[@]} files multiple times to trigger dedup..." + + # First, read all files + for f in "${TEST_FILES[@]}"; do + openclaw agent --agent main --message "Read $f" > /dev/null 2>&1 + done + + # Then read them again (should trigger dedup on second pass) + for f in "${TEST_FILES[@]}"; do + openclaw agent --agent main --message "Read $f again and count lines" > /dev/null 2>&1 + done + + echo " Done." +} + +extract_stats() { + local log_file=$1 + + # Extract chars saved + local chars_saved=$(grep -oP "Chars saved: \K\d+" "$log_file" 2>/dev/null | tail -1 || echo "0") + + # Extract cache stats from usage blocks + local cache_read=$(grep -oP '"cacheRead": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") + local cache_write=$(grep -oP '"cacheWrite": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") + local input_tokens=$(grep -oP '"input": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") + + echo "$chars_saved $cache_read $cache_write $input_tokens" +} + +# ========================================== +# Test WITH ContextPilot enabled +# ========================================== +echo "----------------------------------------" +echo "Test 1: WITH ContextPilot enabled" +echo "----------------------------------------" + +# Ensure plugin is enabled +python3 << 'PYTHON' +import json +config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"]) +with open(config_path, 'r') as f: + config = json.load(f) +if 'plugins' not in config: + config['plugins'] = {} +if 'slots' not in config['plugins']: + config['plugins']['slots'] = {} +config['plugins']['slots']['contextEngine'] = 'contextpilot' +if 'entries' not in config['plugins']: + config['plugins']['entries'] = {} +if 'contextpilot' not in config['plugins']['entries']: + config['plugins']['entries']['contextpilot'] = {} +config['plugins']['entries']['contextpilot']['enabled'] = True +with open(config_path, 'w') as f: + json.dump(config, f, indent=2) +PYTHON + +restart_gateway +run_multi_read_test "WITH_CONTEXTPILOT" + +WITH_STATS=$(extract_stats "$GATEWAY_LOG") +WITH_CHARS=$(echo $WITH_STATS | cut -d' ' -f1) +WITH_CACHE_READ=$(echo $WITH_STATS | cut -d' ' -f2) +WITH_CACHE_WRITE=$(echo $WITH_STATS | cut -d' ' -f3) +WITH_INPUT=$(echo $WITH_STATS | cut -d' ' -f4) + +echo "" +echo " Chars saved by dedup: $WITH_CHARS" +echo " Cache read tokens: $WITH_CACHE_READ" +echo " Cache write tokens: $WITH_CACHE_WRITE" +echo " Input tokens: $WITH_INPUT" + +# ========================================== +# Test WITHOUT ContextPilot (disabled) +# ========================================== +echo "" +echo "----------------------------------------" +echo "Test 2: WITHOUT ContextPilot (disabled)" +echo "----------------------------------------" + +# Disable the plugin +python3 << 'PYTHON' +import json +config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"]) +with open(config_path, 'r') as f: + config = json.load(f) +if 'plugins' in config: + if 'slots' in config['plugins']: + config['plugins']['slots'].pop('contextEngine', None) + if 'entries' in config['plugins'] and 'contextpilot' in config['plugins']['entries']: + config['plugins']['entries']['contextpilot']['enabled'] = False +with open(config_path, 'w') as f: + json.dump(config, f, indent=2) +PYTHON + +restart_gateway +run_multi_read_test "WITHOUT_CONTEXTPILOT" + +WITHOUT_STATS=$(extract_stats "$GATEWAY_LOG") +WITHOUT_CHARS=$(echo $WITHOUT_STATS | cut -d' ' -f1) +WITHOUT_CACHE_READ=$(echo $WITHOUT_STATS | cut -d' ' -f2) +WITHOUT_CACHE_WRITE=$(echo $WITHOUT_STATS | cut -d' ' -f3) +WITHOUT_INPUT=$(echo $WITHOUT_STATS | cut -d' ' -f4) + +echo "" +echo " Chars saved by dedup: $WITHOUT_CHARS (expected: 0)" +echo " Cache read tokens: $WITHOUT_CACHE_READ" +echo " Cache write tokens: $WITHOUT_CACHE_WRITE" +echo " Input tokens: $WITHOUT_INPUT" + +# ========================================== +# Results Summary +# ========================================== +echo "" +echo "==========================================" +echo "RESULTS SUMMARY" +echo "==========================================" +echo "" +echo " WITH WITHOUT" +echo " ContextPilot Plugin" +echo "----------------------------------------" +printf "Chars deduped: %8s %8s\n" "$WITH_CHARS" "$WITHOUT_CHARS" +printf "Cache read tokens: %8s %8s\n" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ" +printf "Cache write tokens: %8s %8s\n" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE" +printf "Input tokens: %8s %8s\n" "$WITH_INPUT" "$WITHOUT_INPUT" +echo "" + +if [ "$WITH_CHARS" -gt "0" ]; then + echo "ContextPilot deduplication saved $WITH_CHARS characters" + # Rough estimate: 4 chars per token + tokens_saved=$((WITH_CHARS / 4)) + echo "Estimated token savings: ~$tokens_saved tokens" +fi diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json index 94ec378..f9faee6 100644 --- a/openclaw-plugin/openclaw.plugin.json +++ b/openclaw-plugin/openclaw.plugin.json @@ -1,41 +1,12 @@ { "id": "contextpilot", "name": "ContextPilot", - "description": "Faster long-context inference via in-process context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing. No external dependencies.", - "version": "0.2.0", - "providers": ["contextpilot"], - "providerAuthEnvVars": { - "contextpilot": ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] - }, - "providerAuthChoices": [ - { - "provider": "contextpilot", - "method": "api-key", - "choiceId": "contextpilot-api-key", - "choiceLabel": "Backend API key (Anthropic or OpenAI)", - "groupId": "contextpilot", - "groupLabel": "ContextPilot", - "cliFlag": "--anthropic-api-key", - "cliOption": "--anthropic-api-key ", - "cliDescription": "API key for the backend LLM provider", - "onboardingScopes": ["text-inference"] - } - ], + "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.", + "version": "0.3.0", "configSchema": { "type": "object", "additionalProperties": false, "properties": { - "backendProvider": { - "type": "string", - "enum": ["anthropic", "openai", "sglang"], - "description": "Backend LLM provider type", - "default": "anthropic" - }, - "indexServerUrl": { - "type": "string", - "description": "ContextPilot index server URL (used in SGLang mode)", - "default": "http://localhost:8765" - }, "scope": { "type": "string", "enum": ["all", "system", "tool_results"], diff --git a/openclaw-plugin/package-lock.json b/openclaw-plugin/package-lock.json new file mode 100644 index 0000000..aeda12e --- /dev/null +++ b/openclaw-plugin/package-lock.json @@ -0,0 +1,22 @@ +{ + "name": "@contextpilot/openclaw-plugin", + "version": "0.2.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@contextpilot/openclaw-plugin", + "version": "0.2.0", + "license": "Apache-2.0", + "dependencies": { + "@sinclair/typebox": "^0.34.49" + } + }, + "node_modules/@sinclair/typebox": { + "version": "0.34.49", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.49.tgz", + "integrity": "sha512-brySQQs7Jtn0joV8Xh9ZV/hZb9Ozb0pmazDIASBkYKCjXrXU3mpcFahmK/z4YDhGkQvP9mWJbVyahdtU5wQA+A==", + "license": "MIT" + } + } +} diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json index 58defc7..7f03fc8 100644 --- a/openclaw-plugin/package.json +++ b/openclaw-plugin/package.json @@ -1,5 +1,5 @@ { - "name": "@contextpilot/openclaw-plugin", + "name": "@contextpilot/contextpilot", "version": "0.2.0", "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.", "type": "module", @@ -21,11 +21,16 @@ "llm" ], "openclaw": { - "extensions": ["./src/index.ts"] + "extensions": [ + "./src/index.ts" + ] }, "files": [ "src/", "openclaw.plugin.json", "README.md" - ] + ], + "dependencies": { + "@sinclair/typebox": "^0.34.49" + } } diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts index 53d48e7..6ab3901 100644 --- a/openclaw-plugin/src/engine/cache-control.ts +++ b/openclaw-plugin/src/engine/cache-control.ts @@ -102,6 +102,30 @@ function injectToolResultCacheControl( } const message = msg as MessageBlock; + + // Handle OpenClaw's toolResult role (content is the tool result itself) + if (message.role === 'toolResult') { + const toolResultContent = message.content ?? ''; + let totalChars = 0; + + if (typeof toolResultContent === 'string') { + totalChars = toolResultContent.length; + } else if (Array.isArray(toolResultContent)) { + totalChars = toolResultContent.reduce((sum, inner) => { + if (isRecord(inner) && inner.type === 'text') { + return sum + (typeof inner.text === 'string' ? inner.text.length : 0); + } + return sum; + }, 0); + } + + if (totalChars >= MIN_CONTENT_LENGTH_FOR_CACHE) { + (message as any).cache_control = cc; + } + continue; + } + + // Handle Anthropic's user message with tool_result blocks if (message.role !== 'user' || !Array.isArray(message.content)) { continue; } @@ -121,6 +145,9 @@ function injectToolResultCacheControl( } export function injectAnthropicCacheControl(body: Record): Record { + if (!body || typeof body !== 'object') { + return body ?? {}; + } const copiedBody = structuredClone(body); injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts index 14d79f4..eb3acb0 100644 --- a/openclaw-plugin/src/engine/dedup.ts +++ b/openclaw-plugin/src/engine/dedup.ts @@ -172,11 +172,22 @@ export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptio for (let idx = 0; idx < messages.length; idx++) { const msg = messages[idx]; - if (!msg || typeof msg !== 'object' || msg.role !== 'tool') { + if (!msg || typeof msg !== 'object') { + continue; + } + // Support both OpenAI 'tool' role and OpenClaw 'toolResult' role + if (msg.role !== 'tool' && msg.role !== 'toolResult') { continue; } - const content = msg.content || ''; + // For toolResult role, content might be an array of {type: "text", text: "..."} blocks + let content = msg.content || ''; + if (Array.isArray(content)) { + content = content + .filter((b: any) => b?.type === 'text') + .map((b: any) => b.text || '') + .join('\n'); + } if (typeof content !== 'string' || content.length < minContentChars) { continue; } @@ -234,7 +245,18 @@ export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptio if (dedupedInThis > 0) { const originalLen = content.length; const newContent = newBlocks.join('\n\n'); - msg.content = newContent; + + // Preserve original content format + if (Array.isArray(msg.content)) { + // For array content, update the first text block + const textBlockIdx = msg.content.findIndex((b: any) => b?.type === 'text'); + if (textBlockIdx >= 0) { + (msg.content as any[])[textBlockIdx].text = newContent; + } + } else { + msg.content = newContent; + } + const newLen = newContent.length; result.charsBefore += originalLen; result.charsAfter += newLen; diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts index 3f9b380..e7b3c7a 100644 --- a/openclaw-plugin/src/engine/tree-nodes.ts +++ b/openclaw-plugin/src/engine/tree-nodes.ts @@ -30,7 +30,7 @@ export class ClusterNode { } get isLeaf(): boolean { - return this.children.length === 0; + return !Array.isArray(this.children) || this.children.length === 0; } get isRoot(): boolean { @@ -50,6 +50,10 @@ export class ClusterNode { } addChild(childId: number): void { + // Defensive: ensure children is an array + if (!Array.isArray(this.children)) { + this.children = []; + } if (!this.children.includes(childId) && childId !== this.nodeId) { this.children.push(childId); } diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts index fbd1384..e3736ff 100644 --- a/openclaw-plugin/src/index.ts +++ b/openclaw-plugin/src/index.ts @@ -1,47 +1,21 @@ import { Type } from "@sinclair/typebox"; -import { - definePluginEntry, - type ProviderResolveDynamicModelContext, - type ProviderWrapStreamFnContext, -} from "openclaw/plugin-sdk/plugin-entry"; -import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth"; +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { delegateCompactionToRuntime } from "openclaw/plugin-sdk/core"; import { injectCacheControl } from "./engine/cache-control.js"; -import { dedupChatCompletions, dedupResponsesApi } from "./engine/dedup.js"; +import { dedupChatCompletions } from "./engine/dedup.js"; import { getFormatHandler, type InterceptConfig } from "./engine/extract.js"; -import { ContextPilotIndexClient } from "./engine/http-client.js"; import { ContextPilot } from "./engine/live-index.js"; -const PROVIDER_ID = "contextpilot"; -type BackendProvider = "anthropic" | "openai" | "sglang"; +type Scope = "all" | "system" | "tool_results"; -function parseBackendProvider(value: unknown): BackendProvider { - if (value === "openai" || value === "sglang") { - return value; - } - return "anthropic"; -} - -function parseScope(value: unknown): "all" | "system" | "tool_results" { +function parseScope(value: unknown): Scope { if (value === "system" || value === "tool_results" || value === "all") { return value; } return "all"; } -function detectApiFormat( - body: Record, - backendProvider: BackendProvider, -): "openai_chat" | "anthropic_messages" { - if (backendProvider === "anthropic") { - return "anthropic_messages"; - } - if (backendProvider === "openai") { - return "openai_chat"; - } - return "system" in body ? "anthropic_messages" : "openai_chat"; -} - function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] { const [reordered] = engine.reorder(docs); if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) { @@ -54,243 +28,182 @@ function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] { return candidate as string[]; } -async function reorderWithClient( - client: ContextPilotIndexClient, - docs: string[], -): Promise { - const encodedDocs = docs.map((doc) => Array.from(doc, (ch) => ch.charCodeAt(0))); - const result = await client.reorder(encodedDocs, 0.001, false, "average"); - - if (result === null) { - return docs; - } - - const [, originalIndices] = result; - if (!Array.isArray(originalIndices) || originalIndices.length !== docs.length) { - return docs; - } - - const reordered = originalIndices.map((index) => { - if (typeof index !== "number" || index < 0 || index >= docs.length) { - return null; - } - return docs[index]; - }); - - return reordered.includes(null) ? docs : (reordered as string[]); -} - -function formatJson(value: unknown): string { - return value === null || value === undefined ? "unavailable" : JSON.stringify(value); +interface Message { + role: string; + content: unknown; } export default definePluginEntry({ id: "contextpilot", name: "ContextPilot", - description: "Optimizes LLM requests in-process via extraction, dedup, caching, and reordering.", + description: "Optimizes context via reordering, deduplication, and cache control injection.", register: (api) => { const config = { - backendProvider: parseBackendProvider(api.pluginConfig?.backendProvider), scope: parseScope(api.pluginConfig?.scope), - indexServerUrl: String(api.pluginConfig?.indexServerUrl || "http://localhost:8765"), }; - const isSglang = config.backendProvider === "sglang"; - const engine = isSglang ? null : new ContextPilot(0.001, false, "average"); - const client = isSglang ? new ContextPilotIndexClient(config.indexServerUrl) : null; + // Initialize the ContextPilot engine for reordering + const engine = new ContextPilot(0.001, false, "average"); - let requestCount = 0; + let assembleCount = 0; let totalCharsSaved = 0; - api.registerProvider({ - id: PROVIDER_ID, - label: "ContextPilot", - docsPath: "/providers/contextpilot", - envVars: isSglang - ? [] - : [config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY"], - auth: isSglang - ? [] - : [ - createProviderApiKeyAuthMethod({ - providerId: PROVIDER_ID, - methodId: "api-key", - label: config.backendProvider === "anthropic" ? "Anthropic API key" : "OpenAI API key", - hint: "API key for the backend LLM provider", - optionKey: config.backendProvider === "anthropic" ? "anthropicApiKey" : "openaiApiKey", - flagName: config.backendProvider === "anthropic" ? "--anthropic-api-key" : "--openai-api-key", - envVar: config.backendProvider === "anthropic" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY", - promptMessage: "Enter your API key", - defaultModel: - config.backendProvider === "anthropic" - ? "contextpilot/claude-sonnet-4-6" - : "contextpilot/gpt-4o", - }), - ], - resolveDynamicModel: (ctx: ProviderResolveDynamicModelContext) => { - if (config.backendProvider === "sglang") { - return { - id: ctx.modelId, - name: ctx.modelId, - provider: PROVIDER_ID, - baseUrl: config.indexServerUrl, - api: "openai-completions", - reasoning: false, - input: ["text", "image"] as Array<"text" | "image">, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 200000, - maxTokens: 16384, - }; - } + // Register as a Context Engine - this intercepts context assembly + api.registerContextEngine("contextpilot", () => ({ + info: { + id: "contextpilot", + name: "ContextPilot", + ownsCompaction: false, + }, - const isAnthropic = config.backendProvider === "anthropic"; - return { - id: ctx.modelId, - name: ctx.modelId, - provider: PROVIDER_ID, - baseUrl: isAnthropic ? "https://api.anthropic.com/v1" : "https://api.openai.com/v1", - api: isAnthropic ? "anthropic-messages" : "openai-completions", - reasoning: false, - input: ["text", "image"] as Array<"text" | "image">, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 200000, - maxTokens: 16384, - }; + async ingest() { + return { ingested: true }; }, - wrapStreamFn: (ctx: ProviderWrapStreamFnContext) => { - const originalStreamFn = ctx.streamFn; - if (!originalStreamFn) return undefined; - return async (params) => { - const request = params as { body?: unknown }; - if (!request.body) { - return originalStreamFn(params); - } + async assemble({ messages, system }: { messages: Message[]; system?: string }) { + const interceptConfig: InterceptConfig = { + enabled: true, + mode: "auto", + tag: "document", + separator: "---", + alpha: 0.001, + linkageMethod: "average", + scope: config.scope, + }; - const body = structuredClone(request.body) as Record; - const apiFormat = detectApiFormat(body, config.backendProvider); + // OpenClaw uses role: "toolResult" instead of Anthropic's user+tool_result blocks + // Convert to Anthropic format for our extractors + const convertedMessages = messages.map((msg, idx) => { + if (msg.role === "toolResult") { + const content = typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) + ? (msg.content as any[]).map(b => b?.text || "").join("\n") + : ""; + return { + role: "user", + content: [{ + type: "tool_result", + tool_use_id: (msg as any).tool_use_id || (msg as any).toolUseId || `tool_${idx}`, + content: content, + }], + }; + } + return msg; + }); - const interceptConfig: InterceptConfig = { - enabled: true, - mode: "auto", - tag: "document", - separator: "---", - alpha: 0.001, - linkageMethod: "average", - scope: config.scope, - }; + const convertedBody: Record = { + messages: convertedMessages, + system: system, + }; - const handler = getFormatHandler(apiFormat); - const multi = handler.extractAll(body, interceptConfig); + const handler = getFormatHandler("anthropic_messages"); + const multi = handler.extractAll(convertedBody, interceptConfig); - const reorderDocs = async (docs: string[]): Promise => { - if (docs.length < 2) { - return docs; - } - if (client) { - return reorderWithClient(client, docs); - } - if (engine) { - return reorderWithEngine(engine, docs); - } + const reorderDocs = (docs: string[]): string[] => { + if (docs.length < 2) { return docs; - }; - - if (multi.systemExtraction) { - const [extraction, sysIdx] = multi.systemExtraction; - if (extraction.documents.length >= 2) { - const reordered = await reorderDocs(extraction.documents); - handler.reconstructSystem(body, extraction, reordered, sysIdx); - } } + return reorderWithEngine(engine, docs); + }; - for (const [extraction, location] of multi.toolExtractions) { - if (extraction.documents.length >= 2) { - const reordered = await reorderDocs(extraction.documents); - handler.reconstructToolResult(body, extraction, reordered, location); - } + // Reorder documents in system prompt + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + if (extraction.documents.length >= 2) { + const reordered = reorderDocs(extraction.documents); + handler.reconstructSystem(convertedBody, extraction, reordered, sysIdx); } + } - if (apiFormat === "openai_chat") { - const dedupResult = dedupChatCompletions(body); - totalCharsSaved += dedupResult.charsSaved; + // Reorder documents in tool results + for (const [extraction, location] of multi.toolExtractions) { + if (extraction.documents.length >= 2) { + const reordered = reorderDocs(extraction.documents); + handler.reconstructToolResult(convertedBody, extraction, reordered, location); } - if (body.input && Array.isArray(body.input)) { - const dedupResult = dedupResponsesApi(body); - totalCharsSaved += dedupResult.charsSaved; + } + + // Map converted messages back to original format (toolResult role) + const finalMessages = (convertedBody.messages as any[]).map((msg, idx) => { + const original = messages[idx]; + if (original?.role === "toolResult") { + const block = Array.isArray(msg.content) ? msg.content[0] : null; + const extractedContent = block?.content; + + if (Array.isArray(original.content)) { + const newContentArray = (original.content as any[]).map(b => { + if (b?.type === "text" && typeof extractedContent === "string") { + return { ...b, text: extractedContent }; + } + return b; + }); + return { ...original, content: newContentArray }; + } else if (typeof extractedContent === "string") { + return { ...original, content: extractedContent }; + } + return original; } + return msg; + }); + + // Build final body with potentially reordered messages + const finalBody: Record = { + messages: finalMessages, + system: system, + }; + + // Deduplicate repeated content + const dedupResult = dedupChatCompletions(finalBody); + totalCharsSaved += dedupResult.charsSaved; - const optimizedBody = isSglang - ? body - : injectCacheControl(body, config.backendProvider === "anthropic" ? "anthropic" : "openai"); + // Inject cache control markers + const optimizedBody = injectCacheControl(finalBody, "anthropic"); - requestCount++; + assembleCount++; - return originalStreamFn({ - ...params, - body: optimizedBody, - }); + // Log savings periodically (every 5 requests or when significant savings) + if (dedupResult.charsSaved > 0 || assembleCount % 5 === 0) { + const estimatedTokensSaved = Math.round(totalCharsSaved / 4); + const estimatedCostSaved = (estimatedTokensSaved * 0.003 / 1000).toFixed(4); // $3/MTok input + console.error(`[ContextPilot] Stats: ${assembleCount} requests, ${totalCharsSaved.toLocaleString()} chars saved (~${estimatedTokensSaved.toLocaleString()} tokens, ~$${estimatedCostSaved})`); + } + + // Return optimized messages + return { + messages: (optimizedBody.messages as Message[]) || messages, + system: optimizedBody.system as string | undefined, + estimatedTokens: 0, }; }, - augmentModelCatalog: () => { - if (config.backendProvider === "sglang") { - return [ - { id: "default", name: "SGLang Default (ContextPilot)", provider: PROVIDER_ID }, - ]; - } - const isAnthropic = config.backendProvider === "anthropic"; - if (isAnthropic) { - return [ - { id: "claude-opus-4-6", name: "Claude Opus 4.6 (ContextPilot)", provider: PROVIDER_ID }, - { - id: "claude-sonnet-4-6", - name: "Claude Sonnet 4.6 (ContextPilot)", - provider: PROVIDER_ID, - }, - ]; - } - return [ - { id: "gpt-4o", name: "GPT-4o (ContextPilot)", provider: PROVIDER_ID }, - { id: "gpt-4o-mini", name: "GPT-4o Mini (ContextPilot)", provider: PROVIDER_ID }, - ]; + async compact(params) { + return await delegateCompactionToRuntime(params); }, - }); + })); + // Register status tool api.registerTool({ name: "contextpilot_status", description: "Report ContextPilot engine state", parameters: Type.Object({}), async execute(_toolCallId: string, _params: unknown) { + const stats = engine.getStats(); const lines = [ "ContextPilot Engine Status:", - ` Backend: ${config.backendProvider}`, ` Scope: ${config.scope}`, - ` Requests optimized: ${requestCount}`, + ` Contexts assembled: ${assembleCount}`, ` Total chars saved: ${totalCharsSaved.toLocaleString()}`, + ` Live index: ${engine.isLive ? "active" : "warming"}`, + ` Nodes: ${Number(stats.num_nodes ?? 0)}`, + ` Active nodes: ${Number(stats.active_nodes ?? 0)}`, + ` Requests tracked: ${Number(stats.num_requests ?? 0)}`, + ` Total searches: ${Number(stats.total_searches ?? 0)}`, + ` Total insertions: ${Number(stats.total_insertions ?? 0)}`, + ` Total removals: ${Number(stats.total_removals ?? 0)}`, + ` Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`, ]; - if (engine) { - const stats = engine.getStats(); - lines.push(" Mode: cloud-api (in-process ContextPilot engine)"); - lines.push(` Live index: ${engine.isLive ? "active" : "warming"}`); - lines.push(` Nodes: ${Number(stats.num_nodes ?? 0)}`); - lines.push(` Active nodes: ${Number(stats.active_nodes ?? 0)}`); - lines.push(` Requests tracked: ${Number(stats.num_requests ?? 0)}`); - lines.push(` Total searches: ${Number(stats.total_searches ?? 0)}`); - lines.push(` Total insertions: ${Number(stats.total_insertions ?? 0)}`); - lines.push(` Total removals: ${Number(stats.total_removals ?? 0)}`); - lines.push(` Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`); - } - - if (client) { - const [health, remoteStats] = await Promise.all([client.health(), client.getStats()]); - lines.push(" Mode: sglang (remote ContextPilot index)"); - lines.push(` Index server URL: ${config.indexServerUrl}`); - lines.push(` Index server health: ${formatJson(health)}`); - lines.push(` Index server stats: ${formatJson(remoteStats)}`); - } - return { content: [ { diff --git a/openclaw-plugin/test-e2e.ts b/openclaw-plugin/test-e2e.ts new file mode 100644 index 0000000..6b2f6f6 --- /dev/null +++ b/openclaw-plugin/test-e2e.ts @@ -0,0 +1,188 @@ +#!/usr/bin/env npx tsx +/** + * E2E test for ContextPilot plugin + * + * Run: npx tsx test-e2e.ts + * Requires: ANTHROPIC_API_KEY in environment + */ + +import { ContextPilot } from './src/engine/live-index.js'; +import { getFormatHandler, type InterceptConfig } from './src/engine/extract.js'; +import { injectCacheControl } from './src/engine/cache-control.js'; +import { dedupChatCompletions } from './src/engine/dedup.js'; + +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +if (!ANTHROPIC_API_KEY) { + console.error('Error: ANTHROPIC_API_KEY not set'); + process.exit(1); +} + +// Simulated system prompt with multiple documents (like Claude Code's context) +const systemPromptWithDocs = `You are a helpful coding assistant. + + + +# File: src/index.ts +export function main() { + console.log("Hello world"); + const result = processData(getData()); + return result; +} + +function getData() { + return { items: [1, 2, 3, 4, 5] }; +} + +function processData(data: { items: number[] }) { + return data.items.map(x => x * 2); +} + + +# File: src/utils.ts +export function formatOutput(data: number[]): string { + return data.join(', '); +} + +export function validateInput(input: unknown): boolean { + return Array.isArray(input) && input.every(x => typeof x === 'number'); +} + +export function calculateSum(numbers: number[]): number { + return numbers.reduce((a, b) => a + b, 0); +} + + +# File: README.md +# My Project + +This is a sample project demonstrating the ContextPilot optimization. + +## Installation +npm install + +## Usage +npm start + +## Features +- Data processing +- Input validation +- Output formatting + + + +Answer questions about the code above.`; + +// Build Anthropic Messages API request body +const requestBody = { + model: 'claude-sonnet-4-6', + max_tokens: 256, + system: systemPromptWithDocs, + messages: [ + { + role: 'user', + content: 'What does the main function do? Be brief.' + } + ] +}; + +async function runTest() { + console.log('=== ContextPilot E2E Test ===\n'); + + // 1. Initialize engine + const engine = new ContextPilot(0.001, false, 'average'); + console.log('1. Engine initialized'); + + // 2. Extract documents + const interceptConfig: InterceptConfig = { + enabled: true, + mode: 'auto', + tag: 'document', + separator: '---', + alpha: 0.001, + linkageMethod: 'average', + scope: 'all' + }; + + const body = structuredClone(requestBody); + const handler = getFormatHandler('anthropic_messages'); + const multi = handler.extractAll(body, interceptConfig); + + console.log(`2. Extracted ${multi.totalDocuments} documents from system prompt`); + + // 3. Reorder documents + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + console.log(` Original order: ${extraction.documents.map((_, i) => i).join(', ')}`); + + if (extraction.documents.length >= 2) { + const [reordered] = engine.reorder(extraction.documents); + if (Array.isArray(reordered) && Array.isArray(reordered[0])) { + const reorderedDocs = reordered[0] as string[]; + handler.reconstructSystem(body, extraction, reorderedDocs, sysIdx); + console.log('3. Documents reordered for prefix cache optimization'); + } + } + } + + // 4. Inject cache control + const optimizedBody = injectCacheControl(body, 'anthropic'); + console.log('4. Cache control markers injected'); + + // 5. Show optimization summary + const originalLen = JSON.stringify(requestBody).length; + const optimizedLen = JSON.stringify(optimizedBody).length; + console.log(`\n=== Optimization Summary ===`); + console.log(`Original request size: ${originalLen} chars`); + console.log(`Optimized request size: ${optimizedLen} chars`); + console.log(`Cache control added: ${JSON.stringify(optimizedBody).includes('cache_control')}`); + + // 6. Make real API call + console.log('\n=== Making API Call ==='); + console.log('Calling Anthropic API with optimized request...\n'); + + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': ANTHROPIC_API_KEY, + 'anthropic-version': '2023-06-01', + 'anthropic-beta': 'prompt-caching-2024-07-31' + }, + body: JSON.stringify(optimizedBody) + }); + + const result = await response.json(); + + if (result.error) { + console.error('API Error:', result.error); + process.exit(1); + } + + console.log('=== Response ==='); + console.log('Model:', result.model); + console.log('Stop reason:', result.stop_reason); + console.log('\nAssistant:', result.content?.[0]?.text || '(no text)'); + + console.log('\n=== Usage ==='); + console.log('Input tokens:', result.usage?.input_tokens); + console.log('Output tokens:', result.usage?.output_tokens); + if (result.usage?.cache_creation_input_tokens) { + console.log('Cache creation tokens:', result.usage.cache_creation_input_tokens); + } + if (result.usage?.cache_read_input_tokens) { + console.log('Cache read tokens:', result.usage.cache_read_input_tokens); + } + + console.log('\n=== Engine Stats ==='); + const stats = engine.getStats(); + console.log('Nodes:', stats.num_nodes); + console.log('Active nodes:', stats.active_nodes); + console.log('Total insertions:', stats.total_insertions); + + console.log('\n✓ E2E test complete'); +} + +runTest().catch(err => { + console.error('Test failed:', err); + process.exit(1); +}); From 5b408db634b7f947a35602383006c4f3dcc40dc8 Mon Sep 17 00:00:00 2001 From: dalongbao Date: Fri, 3 Apr 2026 16:10:51 +0100 Subject: [PATCH 7/8] openclaw plugin bench --- openclaw-plugin/README.md | 18 ++- openclaw-plugin/benchmark.sh | 242 +++++++++++++++++------------------ 2 files changed, 131 insertions(+), 129 deletions(-) diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md index c36fc7c..edd6dbd 100644 --- a/openclaw-plugin/README.md +++ b/openclaw-plugin/README.md @@ -114,14 +114,24 @@ openclaw-plugin/ ## Verifying It Works -Check the gateway logs for ContextPilot output: +Check the gateway logs: ``` -[ContextPilot] assemble() called with 84 messages -[ContextPilot] Extractions found - system: 0 tool: 1 singleDoc: 3 -[ContextPilot] Optimization complete. Chars saved: 2389 +[ContextPilot] Stats: 5 requests, 28,356 chars saved (~7,089 tokens, ~$0.0213) ``` +## Expected Savings + +Savings depend on conversation length and repeated content: + +| Scenario | Chars Saved | Token Reduction | +|:---------|------------:|----------------:| +| Short session (few tool calls) | 0-5K | ~0-5% | +| Medium session (10+ file reads) | 20-50K | ~10-20% | +| Long session (repeated large files) | 100K+ | ~30-50% | + +Run `./benchmark.sh` to measure with/without comparison on your workload. + ## License Apache-2.0 diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh index b913178..02dd583 100755 --- a/openclaw-plugin/benchmark.sh +++ b/openclaw-plugin/benchmark.sh @@ -1,183 +1,175 @@ #!/bin/bash # -# ContextPilot OpenClaw Plugin Benchmark -# Compares token usage and cache hits with and without the plugin -# -# Usage: ./benchmark.sh [num_iterations] +# ContextPilot Token Usage Benchmark +# Compares prefill/input tokens with and without the plugin # set -e -NUM_ITERATIONS=${1:-3} OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json" -BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.backup" -GATEWAY_LOG="/tmp/gw-benchmark.log" +BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak" +LOG_WITH="/tmp/gw-with-cp.log" +LOG_WITHOUT="/tmp/gw-without-cp.log" -# Test that triggers multiple file reads to show dedup benefit -TEST_FILES=( - "/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts" - "/home/ryan/ContextPilot/openclaw-plugin/src/engine/cache-control.ts" - "/home/ryan/ContextPilot/openclaw-plugin/src/index.ts" -) +TEST_FILE="/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts" echo "==========================================" -echo "ContextPilot OpenClaw Plugin Benchmark" +echo "ContextPilot Token Usage Benchmark" echo "==========================================" -echo "Iterations: $NUM_ITERATIONS" -echo "" # Backup config cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG" cleanup() { echo "" - echo "Restoring original config..." + echo "Restoring config..." cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG" rm -f "$BACKUP_CONFIG" - pkill -9 -f "openclaw gateway" 2>/dev/null || true + openclaw gateway stop 2>/dev/null || pkill -9 -f "openclaw" 2>/dev/null || true } trap cleanup EXIT -restart_gateway() { - pkill -9 -f "openclaw gateway" 2>/dev/null || true - sleep 2 - openclaw gateway > "$GATEWAY_LOG" 2>&1 & - sleep 5 +enable_contextpilot() { + python3 << 'PYTHON' +import json, os +path = os.path.expanduser("~/.openclaw/openclaw.json") +with open(path) as f: c = json.load(f) +c.setdefault('plugins', {}).setdefault('slots', {})['contextEngine'] = 'contextpilot' +c['plugins'].setdefault('entries', {}).setdefault('contextpilot', {})['enabled'] = True +with open(path, 'w') as f: json.dump(c, f, indent=2) +PYTHON } -run_multi_read_test() { - local label=$1 - - echo "Running $label test..." - echo " Reading ${#TEST_FILES[@]} files multiple times to trigger dedup..." - - # First, read all files - for f in "${TEST_FILES[@]}"; do - openclaw agent --agent main --message "Read $f" > /dev/null 2>&1 - done +disable_contextpilot() { + python3 << 'PYTHON' +import json, os +path = os.path.expanduser("~/.openclaw/openclaw.json") +with open(path) as f: c = json.load(f) +if 'plugins' in c: + c['plugins'].get('slots', {}).pop('contextEngine', None) + if 'contextpilot' in c['plugins'].get('entries', {}): + c['plugins']['entries']['contextpilot']['enabled'] = False +with open(path, 'w') as f: json.dump(c, f, indent=2) +PYTHON +} - # Then read them again (should trigger dedup on second pass) - for f in "${TEST_FILES[@]}"; do - openclaw agent --agent main --message "Read $f again and count lines" > /dev/null 2>&1 - done +restart_gateway() { + local logfile=$1 + echo " Stopping gateway..." + openclaw gateway stop 2>/dev/null || true + pkill -9 -f "openclaw" 2>/dev/null || true + sleep 3 + echo " Starting gateway..." + openclaw gateway > "$logfile" 2>&1 & + sleep 6 + if ! pgrep -f "openclaw" > /dev/null; then + echo " ERROR: Gateway failed to start" + cat "$logfile" | tail -10 + exit 1 + fi + echo " Gateway running." +} +run_test_sequence() { + echo " Reading file 3 times to build up context..." + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE and count functions" > /dev/null 2>&1 || true + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE again" > /dev/null 2>&1 || true + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE one more time and summarize" > /dev/null 2>&1 || true echo " Done." } -extract_stats() { - local log_file=$1 - - # Extract chars saved - local chars_saved=$(grep -oP "Chars saved: \K\d+" "$log_file" 2>/dev/null | tail -1 || echo "0") - - # Extract cache stats from usage blocks - local cache_read=$(grep -oP '"cacheRead": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") - local cache_write=$(grep -oP '"cacheWrite": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") - local input_tokens=$(grep -oP '"input": \K\d+' "$log_file" 2>/dev/null | tail -1 || echo "0") +extract_last_usage() { + local logfile=$1 + # Find the last complete usage block and extract values + local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") + local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") + local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") + echo "$input $cache_read $cache_write" +} - echo "$chars_saved $cache_read $cache_write $input_tokens" +extract_chars_saved() { + local logfile=$1 + # Look for ContextPilot stats line + grep "Stats:" "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+(?= chars saved)' || echo "0" } # ========================================== -# Test WITH ContextPilot enabled +# Test WITH ContextPilot # ========================================== -echo "----------------------------------------" +echo "" echo "Test 1: WITH ContextPilot enabled" echo "----------------------------------------" +enable_contextpilot +restart_gateway "$LOG_WITH" +run_test_sequence -# Ensure plugin is enabled -python3 << 'PYTHON' -import json -config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"]) -with open(config_path, 'r') as f: - config = json.load(f) -if 'plugins' not in config: - config['plugins'] = {} -if 'slots' not in config['plugins']: - config['plugins']['slots'] = {} -config['plugins']['slots']['contextEngine'] = 'contextpilot' -if 'entries' not in config['plugins']: - config['plugins']['entries'] = {} -if 'contextpilot' not in config['plugins']['entries']: - config['plugins']['entries']['contextpilot'] = {} -config['plugins']['entries']['contextpilot']['enabled'] = True -with open(config_path, 'w') as f: - json.dump(config, f, indent=2) -PYTHON - -restart_gateway -run_multi_read_test "WITH_CONTEXTPILOT" - -WITH_STATS=$(extract_stats "$GATEWAY_LOG") -WITH_CHARS=$(echo $WITH_STATS | cut -d' ' -f1) -WITH_CACHE_READ=$(echo $WITH_STATS | cut -d' ' -f2) -WITH_CACHE_WRITE=$(echo $WITH_STATS | cut -d' ' -f3) -WITH_INPUT=$(echo $WITH_STATS | cut -d' ' -f4) +WITH_USAGE=$(extract_last_usage "$LOG_WITH") +WITH_INPUT=$(echo $WITH_USAGE | cut -d' ' -f1) +WITH_CACHE_READ=$(echo $WITH_USAGE | cut -d' ' -f2) +WITH_CACHE_WRITE=$(echo $WITH_USAGE | cut -d' ' -f3) +WITH_CHARS=$(extract_chars_saved "$LOG_WITH") echo "" -echo " Chars saved by dedup: $WITH_CHARS" -echo " Cache read tokens: $WITH_CACHE_READ" -echo " Cache write tokens: $WITH_CACHE_WRITE" -echo " Input tokens: $WITH_INPUT" +echo " Results:" +echo " Input tokens: $WITH_INPUT" +echo " Cache read: $WITH_CACHE_READ" +echo " Cache write: $WITH_CACHE_WRITE" +echo " Chars deduped: $WITH_CHARS" # ========================================== -# Test WITHOUT ContextPilot (disabled) +# Test WITHOUT ContextPilot # ========================================== echo "" +echo "Test 2: WITHOUT ContextPilot" echo "----------------------------------------" -echo "Test 2: WITHOUT ContextPilot (disabled)" -echo "----------------------------------------" +disable_contextpilot +restart_gateway "$LOG_WITHOUT" +run_test_sequence -# Disable the plugin -python3 << 'PYTHON' -import json -config_path = "$HOME/.openclaw/openclaw.json".replace("$HOME", __import__("os").environ["HOME"]) -with open(config_path, 'r') as f: - config = json.load(f) -if 'plugins' in config: - if 'slots' in config['plugins']: - config['plugins']['slots'].pop('contextEngine', None) - if 'entries' in config['plugins'] and 'contextpilot' in config['plugins']['entries']: - config['plugins']['entries']['contextpilot']['enabled'] = False -with open(config_path, 'w') as f: - json.dump(config, f, indent=2) -PYTHON - -restart_gateway -run_multi_read_test "WITHOUT_CONTEXTPILOT" - -WITHOUT_STATS=$(extract_stats "$GATEWAY_LOG") -WITHOUT_CHARS=$(echo $WITHOUT_STATS | cut -d' ' -f1) -WITHOUT_CACHE_READ=$(echo $WITHOUT_STATS | cut -d' ' -f2) -WITHOUT_CACHE_WRITE=$(echo $WITHOUT_STATS | cut -d' ' -f3) -WITHOUT_INPUT=$(echo $WITHOUT_STATS | cut -d' ' -f4) +WITHOUT_USAGE=$(extract_last_usage "$LOG_WITHOUT") +WITHOUT_INPUT=$(echo $WITHOUT_USAGE | cut -d' ' -f1) +WITHOUT_CACHE_READ=$(echo $WITHOUT_USAGE | cut -d' ' -f2) +WITHOUT_CACHE_WRITE=$(echo $WITHOUT_USAGE | cut -d' ' -f3) echo "" -echo " Chars saved by dedup: $WITHOUT_CHARS (expected: 0)" -echo " Cache read tokens: $WITHOUT_CACHE_READ" -echo " Cache write tokens: $WITHOUT_CACHE_WRITE" -echo " Input tokens: $WITHOUT_INPUT" +echo " Results:" +echo " Input tokens: $WITHOUT_INPUT" +echo " Cache read: $WITHOUT_CACHE_READ" +echo " Cache write: $WITHOUT_CACHE_WRITE" +echo " Chars deduped: 0 (plugin disabled)" # ========================================== -# Results Summary +# Summary # ========================================== echo "" echo "==========================================" -echo "RESULTS SUMMARY" +echo "COMPARISON" echo "==========================================" echo "" -echo " WITH WITHOUT" -echo " ContextPilot Plugin" -echo "----------------------------------------" -printf "Chars deduped: %8s %8s\n" "$WITH_CHARS" "$WITHOUT_CHARS" -printf "Cache read tokens: %8s %8s\n" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ" -printf "Cache write tokens: %8s %8s\n" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE" -printf "Input tokens: %8s %8s\n" "$WITH_INPUT" "$WITHOUT_INPUT" +printf "%-20s %12s %12s\n" "" "WITH CP" "WITHOUT CP" +printf "%-20s %12s %12s\n" "--------------------" "------------" "------------" +printf "%-20s %12s %12s\n" "Input tokens" "$WITH_INPUT" "$WITHOUT_INPUT" +printf "%-20s %12s %12s\n" "Cache read" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ" +printf "%-20s %12s %12s\n" "Cache write" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE" +printf "%-20s %12s %12s\n" "Chars deduped" "$WITH_CHARS" "0" echo "" -if [ "$WITH_CHARS" -gt "0" ]; then - echo "ContextPilot deduplication saved $WITH_CHARS characters" - # Rough estimate: 4 chars per token +# Calculate differences +if [ "$WITH_INPUT" -gt 0 ] && [ "$WITHOUT_INPUT" -gt 0 ]; then + if [ "$WITH_INPUT" -lt "$WITHOUT_INPUT" ]; then + diff=$((WITHOUT_INPUT - WITH_INPUT)) + pct=$((diff * 100 / WITHOUT_INPUT)) + echo ">>> ContextPilot reduced input tokens by $diff ($pct% savings)" + elif [ "$WITH_INPUT" -gt "$WITHOUT_INPUT" ]; then + diff=$((WITH_INPUT - WITHOUT_INPUT)) + pct=$((diff * 100 / WITHOUT_INPUT)) + echo ">>> ContextPilot added $diff tokens ($pct% overhead)" + else + echo ">>> No difference in input tokens" + fi +fi + +if [ "$WITH_CHARS" -gt 0 ]; then tokens_saved=$((WITH_CHARS / 4)) - echo "Estimated token savings: ~$tokens_saved tokens" + echo ">>> Deduplication removed ~$tokens_saved tokens worth of repeated content" fi From a388ae59c80cede82d939d3b2a53b0794c0edfb3 Mon Sep 17 00:00:00 2001 From: tsuiusi Date: Tue, 7 Apr 2026 00:09:39 +0200 Subject: [PATCH 8/8] fix for tests --- contextpilot/server/http_server.py | 109 ++++++++------ openclaw-plugin/benchmark.sh | 27 ++-- openclaw-plugin/openclaw.plugin.json | 4 +- .../src/engine/compute-distance.ts | 10 -- .../src/engine/conversation-tracker.ts | 12 +- openclaw-plugin/src/engine/dedup.ts | 3 +- openclaw-plugin/src/engine/eviction-heap.ts | 4 +- openclaw-plugin/src/engine/inter-scheduler.ts | 6 +- openclaw-plugin/src/engine/live-index.ts | 138 +++++++++--------- tests/test_http_intercept.py | 11 +- 10 files changed, 169 insertions(+), 155 deletions(-) diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py index 82ecadd..38f9b1e 100644 --- a/contextpilot/server/http_server.py +++ b/contextpilot/server/http_server.py @@ -129,9 +129,10 @@ class _InterceptConvState: last_message_count: int = 0 -# Per-session state dict keyed by session fingerprint (hash of first user msg). +# Per-session state dict keyed by session fingerprint (system prompt + first user msg). # This allows concurrent multi-user sessions to each maintain their own state. _intercept_states: dict[str, _InterceptConvState] = {} +_intercept_states_lock = asyncio.Lock() _MAX_TRACKED_SESSIONS = 64 # LRU eviction threshold # TTFT tracking for averages across a session @@ -1181,32 +1182,47 @@ def _hash_text(text: str) -> str: def _session_fingerprint(body: Dict[str, Any]) -> str: - """Derive a session fingerprint from the first user message. + """Derive a session fingerprint from the system prompt + first user message. - In a multi-turn conversation, messages grow but the first user message - stays constant. Hashing it gives a stable per-session key that lets - concurrent users each maintain their own intercept state. + In a multi-turn conversation, messages grow but the system prompt and + first user message stay constant. Hashing both gives a stable per-session + key that lets concurrent users each maintain their own intercept state, + even if different users share the same first user message. """ msgs = body.get("messages") or [] + parts_to_hash: list[str] = [] + + # Include system prompt for differentiation between sessions + system = body.get("system") + if system: + parts_to_hash.append(str(system)[:500]) + # Find the first user message (usually msg[0] or msg[1] after system) - for msg in msgs[:3]: - if isinstance(msg, dict) and msg.get("role") == "user": + for msg in msgs[:5]: + if isinstance(msg, dict) and msg.get("role") == "system": + parts_to_hash.append(str(msg.get("content", ""))[:500]) + elif isinstance(msg, dict) and msg.get("role") == "user": content = msg.get("content", "") if isinstance(content, list): # OpenAI format: [{type: text, text: "..."}] - parts = [p.get("text", "") for p in content - if isinstance(p, dict)] - content = "".join(parts) - return _hash_text(str(content)) - # Fallback: hash all messages (shouldn't happen in practice) - return _hash_text(json.dumps(msgs[:2], sort_keys=True)) + text_parts = [p.get("text", "") for p in content + if isinstance(p, dict)] + content = "".join(text_parts) + parts_to_hash.append(str(content)) + break + + if not parts_to_hash: + # Fallback: hash first two messages + return _hash_text(json.dumps(msgs[:2], sort_keys=True)) + + return _hash_text("\x00".join(parts_to_hash)) -def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: +async def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: """Return per-session intercept state, creating or resetting as needed. - Uses the first user message as a session fingerprint so concurrent - multi-user sessions each get their own state. + Uses the system prompt + first user message as a session fingerprint so + concurrent multi-user sessions each get their own state. Detection: in a multi-turn agent conversation the messages array only grows. If the count drops, either a new session started or the host @@ -1218,31 +1234,32 @@ def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: session_key = _session_fingerprint(body) msg_count = len(body.get("messages") or []) - state = _intercept_states.get(session_key) + async with _intercept_states_lock: + state = _intercept_states.get(session_key) - if state is None: - # New session - state = _InterceptConvState() - state.system_processed = True - logger.info( - f"Intercept: new session {session_key[:8]}… " - f"({msg_count} msgs, {len(_intercept_states)} active sessions)" - ) - # Evict oldest sessions if over limit - if len(_intercept_states) >= _MAX_TRACKED_SESSIONS: - oldest_key = next(iter(_intercept_states)) - del _intercept_states[oldest_key] - logger.info(f"Intercept: evicted session {oldest_key[:8]}…") - _intercept_states[session_key] = state - elif msg_count < state.last_message_count: - logger.info( - f"Intercept: session {session_key[:8]}… message count dropped " - f"({msg_count} < {state.last_message_count}), " - f"resetting state (compaction or restart)" - ) - state = _InterceptConvState() - state.system_processed = True - _intercept_states[session_key] = state + if state is None: + # New session + state = _InterceptConvState() + state.system_processed = True + logger.info( + f"Intercept: new session {session_key[:8]}… " + f"({msg_count} msgs, {len(_intercept_states)} active sessions)" + ) + # Evict oldest sessions if over limit + if len(_intercept_states) >= _MAX_TRACKED_SESSIONS: + oldest_key = next(iter(_intercept_states)) + del _intercept_states[oldest_key] + logger.info(f"Intercept: evicted session {oldest_key[:8]}…") + _intercept_states[session_key] = state + elif msg_count < state.last_message_count: + logger.info( + f"Intercept: session {session_key[:8]}… message count dropped " + f"({msg_count} < {state.last_message_count}), " + f"resetting state (compaction or restart)" + ) + state = _InterceptConvState() + state.system_processed = True + _intercept_states[session_key] = state state.last_message_count = msg_count return state @@ -1303,8 +1320,6 @@ def _strip_external_content_ids(body: Any) -> Any: "proxy-authenticate", "content-length", )) -# Previous message hashes for prefix divergence detection. -_debug_prev_msg_hashes: List[str] = [] def _doc_preview(doc: str, max_len: int = 60) -> str: @@ -1494,8 +1509,6 @@ async def _intercept_and_forward(request: Request, api_format: str): f"Intercept: session={_session_tag} {_debug_msg_count} msgs" ) - _debug_prev_msg_hashes = list(_debug_msg_hashes) - # ── Format handler (strategy pattern) ──────────────────────────── handler = get_format_handler(api_format) @@ -1504,7 +1517,7 @@ async def _intercept_and_forward(request: Request, api_format: str): body = copy.deepcopy(body) # ── Conversation-aware state (single-conversation model) ── - state = _get_intercept_state(body) + state = await _get_intercept_state(body) # ── Replace old messages with cached (modified) versions ── # On subsequent turns, the host sends original (unmodified) @@ -1952,9 +1965,9 @@ async def proxy_engine(path: str, request: Request): body["rid"] = request_id body["request_id"] = request_id - body["temperature"] = 0 + body.setdefault("temperature", 0) if _cloud_mode: - body["top_p"] = 0 + body.setdefault("top_p", 0) dedup_result = DedupResult() try: @@ -2167,7 +2180,7 @@ def main(): os.environ["CONTEXTPILOT_CLOUD_API_KEY"] = args.cloud_api_key # Also set global config for direct access - global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode + global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode, _chunk_modulus _max_tokens = args.max_tokens _infer_api_url = args.infer_api_url.rstrip("/") _stateless_mode = args.stateless diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh index 02dd583..a703866 100755 --- a/openclaw-plugin/benchmark.sh +++ b/openclaw-plugin/benchmark.sh @@ -11,7 +11,8 @@ BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak" LOG_WITH="/tmp/gw-with-cp.log" LOG_WITHOUT="/tmp/gw-without-cp.log" -TEST_FILE="/home/ryan/ContextPilot/openclaw-plugin/src/engine/dedup.ts" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TEST_FILE="${SCRIPT_DIR}/src/engine/dedup.ts" echo "==========================================" echo "ContextPilot Token Usage Benchmark" @@ -25,7 +26,7 @@ cleanup() { echo "Restoring config..." cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG" rm -f "$BACKUP_CONFIG" - openclaw gateway stop 2>/dev/null || pkill -9 -f "openclaw" 2>/dev/null || true + openclaw gateway stop 2>/dev/null || pkill -f "openclaw gateway" 2>/dev/null || true } trap cleanup EXIT @@ -57,7 +58,7 @@ restart_gateway() { local logfile=$1 echo " Stopping gateway..." openclaw gateway stop 2>/dev/null || true - pkill -9 -f "openclaw" 2>/dev/null || true + pkill -f "openclaw gateway" 2>/dev/null || true sleep 3 echo " Starting gateway..." openclaw gateway > "$logfile" 2>&1 & @@ -81,16 +82,16 @@ run_test_sequence() { extract_last_usage() { local logfile=$1 # Find the last complete usage block and extract values - local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") - local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") - local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+' || echo "0") + local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") + local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") + local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") echo "$input $cache_read $cache_write" } extract_chars_saved() { local logfile=$1 # Look for ContextPilot stats line - grep "Stats:" "$logfile" 2>/dev/null | tail -1 | grep -oP '\d+(?= chars saved)' || echo "0" + grep "Stats:" "$logfile" 2>/dev/null | tail -1 | sed -n 's/.*\([0-9][0-9,]*\) chars saved.*/\1/p' | tr -d ',' || echo "0" } # ========================================== @@ -104,9 +105,9 @@ restart_gateway "$LOG_WITH" run_test_sequence WITH_USAGE=$(extract_last_usage "$LOG_WITH") -WITH_INPUT=$(echo $WITH_USAGE | cut -d' ' -f1) -WITH_CACHE_READ=$(echo $WITH_USAGE | cut -d' ' -f2) -WITH_CACHE_WRITE=$(echo $WITH_USAGE | cut -d' ' -f3) +WITH_INPUT=$(echo "$WITH_USAGE" | cut -d' ' -f1) +WITH_CACHE_READ=$(echo "$WITH_USAGE" | cut -d' ' -f2) +WITH_CACHE_WRITE=$(echo "$WITH_USAGE" | cut -d' ' -f3) WITH_CHARS=$(extract_chars_saved "$LOG_WITH") echo "" @@ -127,9 +128,9 @@ restart_gateway "$LOG_WITHOUT" run_test_sequence WITHOUT_USAGE=$(extract_last_usage "$LOG_WITHOUT") -WITHOUT_INPUT=$(echo $WITHOUT_USAGE | cut -d' ' -f1) -WITHOUT_CACHE_READ=$(echo $WITHOUT_USAGE | cut -d' ' -f2) -WITHOUT_CACHE_WRITE=$(echo $WITHOUT_USAGE | cut -d' ' -f3) +WITHOUT_INPUT=$(echo "$WITHOUT_USAGE" | cut -d' ' -f1) +WITHOUT_CACHE_READ=$(echo "$WITHOUT_USAGE" | cut -d' ' -f2) +WITHOUT_CACHE_WRITE=$(echo "$WITHOUT_USAGE" | cut -d' ' -f3) echo "" echo " Results:" diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json index f9faee6..a8c336e 100644 --- a/openclaw-plugin/openclaw.plugin.json +++ b/openclaw-plugin/openclaw.plugin.json @@ -2,14 +2,14 @@ "id": "contextpilot", "name": "ContextPilot", "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.", - "version": "0.3.0", + "version": "0.2.0", "configSchema": { "type": "object", "additionalProperties": false, "properties": { "scope": { "type": "string", - "enum": ["all", "system", "tool_results"], + "enum": ["all", "tool_results"], "description": "Which messages ContextPilot optimizes", "default": "all" } diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts index 8aad2d5..5ae024f 100644 --- a/openclaw-plugin/src/engine/compute-distance.ts +++ b/openclaw-plugin/src/engine/compute-distance.ts @@ -60,21 +60,11 @@ export function computeDistancesBatch( return Array.from({ length: nQueries }, () => new Array(nTargets).fill(0)); } - const totalPairs = nQueries * nTargets; const distances: number[][] = Array.from( { length: nQueries }, () => new Array(nTargets).fill(1.0) ); - if (totalPairs < 1000) { - for (let i = 0; i < nQueries; i += 1) { - for (let j = 0; j < nTargets; j += 1) { - distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha); - } - } - return distances; - } - for (let i = 0; i < nQueries; i += 1) { for (let j = 0; j < nTargets; j += 1) { distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha); diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts index 845ee68..5e56f39 100644 --- a/openclaw-plugin/src/engine/conversation-tracker.ts +++ b/openclaw-plugin/src/engine/conversation-tracker.ts @@ -26,16 +26,18 @@ export interface ConversationTrackerStats { export class ConversationTracker { private _requests: Map; private _hintTemplate: string; + private _maxTrackedRequests: number; private _stats: { totalRequests: number; totalDedupCalls: number; totalDocsDeduplicated: number; }; - constructor(hintTemplate?: string) { + constructor(hintTemplate?: string, maxTrackedRequests: number = 256) { this._requests = new Map(); this._hintTemplate = hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn."; + this._maxTrackedRequests = maxTrackedRequests; this._stats = { totalRequests: 0, totalDedupCalls: 0, @@ -63,6 +65,14 @@ export class ConversationTracker { this._requests.set(requestId, history); this._stats.totalRequests += 1; + // LRU eviction: remove oldest entries when over limit + if (this._requests.size > this._maxTrackedRequests) { + const oldest = this._requests.keys().next().value; + if (oldest !== undefined) { + this._requests.delete(oldest); + } + } + return history; } diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts index eb3acb0..bb173f1 100644 --- a/openclaw-plugin/src/engine/dedup.ts +++ b/openclaw-plugin/src/engine/dedup.ts @@ -75,7 +75,8 @@ function emptyDedupResult(): DedupResult { export function hashString(str: string): number { let h = 5381; for (let i = 0; i < str.length; i++) { - h = ((h << 5) + h + str.charCodeAt(i)) & 0xFFFFFFFF; + // Use Math.imul for safe 32-bit multiplication to avoid float overflow + h = (Math.imul(h, 33) + str.charCodeAt(i)) | 0; } return h >>> 0; } diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts index 69de6ff..2c61c43 100644 --- a/openclaw-plugin/src/engine/eviction-heap.ts +++ b/openclaw-plugin/src/engine/eviction-heap.ts @@ -215,9 +215,7 @@ export class EvictionHeap { this._metadata.delete(nodeId); } - if (this._inHeap.has(nodeId)) { - this._inHeap.set(nodeId, false); - } + this._inHeap.delete(nodeId); } getNodeByRequestId(requestId: string): NodeMetadata | null { diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts index 702eebc..88ad6e3 100644 --- a/openclaw-plugin/src/engine/inter-scheduler.ts +++ b/openclaw-plugin/src/engine/inter-scheduler.ts @@ -15,7 +15,7 @@ export class InterContextScheduler { const searchPaths = clusteringResult.searchPaths; const groupsByRoot = this._groupByRootPrefix(searchPaths); - const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths, reorderedContexts); + const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths); const allGroupsWithInfo: Array<[number, number[]]> = []; for (const groupIndices of sortedGroups) { @@ -61,10 +61,8 @@ export class InterContextScheduler { _sortGroupsByPathLength( groupsByRoot: Map, - searchPaths: number[][], - contexts: number[][] + searchPaths: number[][] ): number[][] { - void contexts; const sortedGroups: number[][] = []; for (const groupIndices of groupsByRoot.values()) { diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts index 777b1e8..29ad83a 100644 --- a/openclaw-plugin/src/engine/live-index.ts +++ b/openclaw-plugin/src/engine/live-index.ts @@ -6,7 +6,7 @@ import { IntraContextOrderer } from './intra-ordering.js'; import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js'; import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js'; import { EvictionHeap } from './eviction-heap.js'; -import crypto from 'crypto'; +import * as crypto from 'node:crypto'; export function computePrefixLength(list1: number[], list2: number[]): number { let length = 0; @@ -50,8 +50,8 @@ export class ContextPilot extends ContextIndex { static readonly _DEFAULT_CONVERSATION = "_default"; - constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: string = "average", batchSize: number = 10000) { - super(alpha, useGpu, linkageMethod, batchSize); + constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: "single" | "complete" | "average" = "average", batchSize: number = 10000) { + super({ alpha, useGpu, linkageMethod, batchSize }); } getAllRequestIds(): Set { @@ -448,14 +448,15 @@ export class ContextPilot extends ContextIndex { const newNodeId = this.nextNodeId++; const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []); - const originalIndices = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set(); - + const originalIndices: Set = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set(); + const newNode = new ClusterNode( newNodeId, - content, + new Set(content), + originalIndices, + 0.0, [], - parentId, - originalIndices + parentId ); if (sourceNode.docIds) { @@ -475,15 +476,14 @@ export class ContextPilot extends ContextIndex { const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0; - const metadata = new NodeMetadata( - newNodeId, - isLeaf ? initialTokens : 0, - isLeaf ? Math.max(0, initialTokens - parentTokens) : 0, + const metadata = new NodeMetadata(newNodeId, { + totalTokens: isLeaf ? initialTokens : 0, + extraTokens: isLeaf ? Math.max(0, initialTokens - parentTokens) : 0, searchPath, - sourceNode.docIds ? [...sourceNode.docIds] : null, + docIds: sourceNode.docIds ? [...sourceNode.docIds] : null, isLeaf, - requestId - ); + requestId, + }); this.metadata.set(newNodeId, metadata); @@ -629,15 +629,14 @@ export class ContextPilot extends ContextIndex { leafDocIds = node.docIds || node.doc_ids; } - const metadata = new NodeMetadata( - nodeId, + const metadata = new NodeMetadata(nodeId, { totalTokens, extraTokens, searchPath, - leafDocIds, + docIds: leafDocIds, isLeaf, - requestId - ); + requestId, + }); this.metadata.set(nodeId, metadata); @@ -767,7 +766,8 @@ export class ContextPilot extends ContextIndex { if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) { const docs = this._getNodeDocs(currentId); if (docs && currentId !== this.rootId) { - const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + const docsSet = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length; const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false; return [currentPath, currentId, overlap, hasPrefix]; } @@ -798,7 +798,8 @@ export class ContextPilot extends ContextIndex { for (let j = 0; j < childIds.length; j++) { const docs = childDocsList[j]; - const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + const docsSet = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length; if (overlap === 0) continue; const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j]; @@ -814,7 +815,8 @@ export class ContextPilot extends ContextIndex { if (currentId !== this.rootId) { const docs = this._getNodeDocs(currentId); if (docs) { - const overlap = Array.from(contextSet).filter(x => new Set(docs).has(x)).length; + const docsSet2 = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet2.has(x)).length; return [currentPath, currentId, overlap, true]; } } @@ -930,10 +932,11 @@ export class ContextPilot extends ContextIndex { const newNodeId = this.nextNodeId++; const newNode = new ClusterNode( newNodeId, - context, + new Set(context), + new Set([newNodeId]), + 0.0, [], - parentNode.nodeId, - new Set([newNodeId]) + parentNode.nodeId ); this.nodes.set(newNodeId, newNode); @@ -942,15 +945,14 @@ export class ContextPilot extends ContextIndex { const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; const newSearchPath = [...searchPath, parentNode.children.length - 1]; - const metadata = new NodeMetadata( - newNodeId, + const metadata = new NodeMetadata(newNodeId, { totalTokens, - Math.max(0, totalTokens - parentTokens), - newSearchPath, - context, - true, - requestId - ); + extraTokens: Math.max(0, totalTokens - parentTokens), + searchPath: newSearchPath, + docIds: context, + isLeaf: true, + requestId, + }); this.metadata.set(newNodeId, metadata); this._requestToNode.set(requestId, newNodeId); @@ -975,10 +977,11 @@ export class ContextPilot extends ContextIndex { const newLeafId = this.nextNodeId++; const newLeaf = new ClusterNode( newLeafId, - context, + new Set(context), + new Set([newLeafId]), + 0.0, [], - parentNode.nodeId, - new Set([newLeafId]) + parentNode.nodeId ); this.nodes.set(newLeafId, newLeaf); @@ -987,15 +990,14 @@ export class ContextPilot extends ContextIndex { const newSearchPath = [...parentSearchPath, parentNode.children.length - 1]; const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; - const newMetadata = new NodeMetadata( - newLeafId, + const newMetadata = new NodeMetadata(newLeafId, { totalTokens, - Math.max(0, totalTokens - parentTokens), - newSearchPath, - context, - true, - requestId - ); + extraTokens: Math.max(0, totalTokens - parentTokens), + searchPath: newSearchPath, + docIds: context, + isLeaf: true, + requestId, + }); this.metadata.set(newLeafId, newMetadata); this._requestToNode.set(requestId, newLeafId); @@ -1042,10 +1044,11 @@ export class ContextPilot extends ContextIndex { const newInternal = new ClusterNode( newInternalId, - Array.from(allContent), + allContent, + new Set(), + 0.0, [leafNode.nodeId], - parentId, - new Set() + parentId ); newInternal.docIds = [...sharedPrefix]; @@ -1066,15 +1069,14 @@ export class ContextPilot extends ContextIndex { const internalPath = [...parentSearchPath, leafChildIdx]; - const internalMeta = new NodeMetadata( - newInternalId, - internalTokens, - Math.max(0, internalTokens - parentTokens), - internalPath, - [...sharedPrefix], - false, - null - ); + const internalMeta = new NodeMetadata(newInternalId, { + totalTokens: internalTokens, + extraTokens: Math.max(0, internalTokens - parentTokens), + searchPath: internalPath, + docIds: [...sharedPrefix], + isLeaf: false, + requestId: null, + }); this.metadata.set(newInternalId, internalMeta); if (leafMeta) { @@ -1087,10 +1089,11 @@ export class ContextPilot extends ContextIndex { const newLeaf = new ClusterNode( newLeafId, - context, + new Set(context), + new Set([newLeafId]), + 0.0, [], - newInternalId, - new Set([newLeafId]) + newInternalId ); newLeaf.docIds = [...context]; @@ -1099,15 +1102,14 @@ export class ContextPilot extends ContextIndex { const newLeafPath = [...internalPath, 1]; - const newLeafMeta = new NodeMetadata( - newLeafId, + const newLeafMeta = new NodeMetadata(newLeafId, { totalTokens, - Math.max(0, totalTokens - internalTokens), - newLeafPath, - [...context], - true, - requestId - ); + extraTokens: Math.max(0, totalTokens - internalTokens), + searchPath: newLeafPath, + docIds: [...context], + isLeaf: true, + requestId, + }); this.metadata.set(newLeafId, newLeafMeta); this._requestToNode.set(requestId, newLeafId); diff --git a/tests/test_http_intercept.py b/tests/test_http_intercept.py index d746595..1b4be7b 100644 --- a/tests/test_http_intercept.py +++ b/tests/test_http_intercept.py @@ -118,18 +118,19 @@ def client(mock_session): original_session = http_mod._aiohttp_session original_url = http_mod._infer_api_url original_intercept_index = http_mod._intercept_index - original_state = http_mod._intercept_state + original_states = http_mod._intercept_states.copy() http_mod._aiohttp_session = mock_session http_mod._infer_api_url = "http://mock-backend:30000" http_mod._intercept_index = None # reset so each test starts fresh - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() try: yield TestClient(app, raise_server_exceptions=False) finally: http_mod._aiohttp_session = original_session http_mod._infer_api_url = original_url http_mod._intercept_index = original_intercept_index - http_mod._intercept_state = original_state + http_mod._intercept_states.clear() + http_mod._intercept_states.update(original_states) # ============================================================================ @@ -146,7 +147,7 @@ def _warmup(client, path, body): resp = client.post(path, json=body) assert resp.status_code == 200 # Keep _intercept_index primed, but reset conversation tracking. - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() return resp @@ -1005,7 +1006,7 @@ def _make_body(marker_id): content1 = mock_session._last_json["messages"][3]["content"] # Reset intercept state for clean comparison - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() # Request 2 with different id "bbbb" resp2 = client.post("/v1/chat/completions", json=_make_body("cccc2222dddd3333"))