diff --git a/.gitignore b/.gitignore index 55ae470..e4d3908 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ dist/ */.DS_Store *.DS_Store +node_modules/ diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py index c71af9a..38f9b1e 100644 --- a/contextpilot/server/http_server.py +++ b/contextpilot/server/http_server.py @@ -25,6 +25,7 @@ import os import re import uuid +from dataclasses import dataclass, field as dc_field from typing import List, Dict, Any, Optional, cast from contextlib import asynccontextmanager @@ -105,13 +106,9 @@ # skip-old / dedup-new / reorder-new behaviour. Single-conversation # model (one user at a time). Resets when the system prompt changes. -from dataclasses import dataclass, field as dc_field - - @dataclass class _InterceptConvState: - """Global intercept state for the current conversation.""" - + """Per-session intercept state for a single conversation.""" # Cached copy of the full messages array after modification (reorder/dedup). # On subsequent turns, old messages are replaced with these cached versions # so the inference engine's prefix cache sees identical tokens. @@ -132,7 +129,11 @@ class _InterceptConvState: last_message_count: int = 0 -_intercept_state = _InterceptConvState() +# Per-session state dict keyed by session fingerprint (system prompt + first user msg). +# This allows concurrent multi-user sessions to each maintain their own state. +_intercept_states: dict[str, _InterceptConvState] = {} +_intercept_states_lock = asyncio.Lock() +_MAX_TRACKED_SESSIONS = 64 # LRU eviction threshold # TTFT tracking for averages across a session _ttft_history: List[float] = [] @@ -876,19 +877,13 @@ async def reset_index(): After reset, you must call /reorder again before other operations. """ - global \ - _index, \ - _str_to_id, \ - _id_to_str, \ - _next_str_id, \ - _intercept_index, \ - _intercept_state + global _index, _str_to_id, _id_to_str, _next_str_id, _intercept_index, _intercept_states # Reset conversation tracker reset_conversation_tracker() - # Reset intercept conversation state - _intercept_state = _InterceptConvState() + # Reset all per-session intercept states + _intercept_states.clear() _intercept_index = None # Reset string-to-ID mapping @@ -1186,31 +1181,88 @@ def _hash_text(text: str) -> str: return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()[:16] -def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: - """Return the global intercept state, resetting if the conversation changed. +def _session_fingerprint(body: Dict[str, Any]) -> str: + """Derive a session fingerprint from the system prompt + first user message. + + In a multi-turn conversation, messages grow but the system prompt and + first user message stay constant. Hashing both gives a stable per-session + key that lets concurrent users each maintain their own intercept state, + even if different users share the same first user message. + """ + msgs = body.get("messages") or [] + parts_to_hash: list[str] = [] + + # Include system prompt for differentiation between sessions + system = body.get("system") + if system: + parts_to_hash.append(str(system)[:500]) + + # Find the first user message (usually msg[0] or msg[1] after system) + for msg in msgs[:5]: + if isinstance(msg, dict) and msg.get("role") == "system": + parts_to_hash.append(str(msg.get("content", ""))[:500]) + elif isinstance(msg, dict) and msg.get("role") == "user": + content = msg.get("content", "") + if isinstance(content, list): + # OpenAI format: [{type: text, text: "..."}] + text_parts = [p.get("text", "") for p in content + if isinstance(p, dict)] + content = "".join(text_parts) + parts_to_hash.append(str(content)) + break + + if not parts_to_hash: + # Fallback: hash first two messages + return _hash_text(json.dumps(msgs[:2], sort_keys=True)) + + return _hash_text("\x00".join(parts_to_hash)) + + +async def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState: + """Return per-session intercept state, creating or resetting as needed. + + Uses the system prompt + first user message as a session fingerprint so + concurrent multi-user sessions each get their own state. Detection: in a multi-turn agent conversation the messages array only grows. If the count drops, either a new session started or the host - compacted old messages. Either way, reset all state: the old KV cache + compacted old messages. Either way, reset state: the old KV cache entries are gone (compaction rewrites content), so cached_messages, seen_doc_hashes, and reorder state are all invalid. """ - global _intercept_state + global _intercept_states + session_key = _session_fingerprint(body) msg_count = len(body.get("messages") or []) - if msg_count < _intercept_state.last_message_count: - logger.info( - f"Intercept: message count dropped " - f"({msg_count} < {_intercept_state.last_message_count}), " - f"resetting all state (compaction or new session)" - ) - _intercept_state = _InterceptConvState() - # Skip reorder for the first post-compaction tool result: - # prefix cache is fully invalidated, nothing to align with. - # Go straight to dedup mode so docs are registered for future turns. - _intercept_state.first_tool_result_done = True - _intercept_state.system_processed = True - _intercept_state.last_message_count = msg_count - return _intercept_state + + async with _intercept_states_lock: + state = _intercept_states.get(session_key) + + if state is None: + # New session + state = _InterceptConvState() + state.system_processed = True + logger.info( + f"Intercept: new session {session_key[:8]}… " + f"({msg_count} msgs, {len(_intercept_states)} active sessions)" + ) + # Evict oldest sessions if over limit + if len(_intercept_states) >= _MAX_TRACKED_SESSIONS: + oldest_key = next(iter(_intercept_states)) + del _intercept_states[oldest_key] + logger.info(f"Intercept: evicted session {oldest_key[:8]}…") + _intercept_states[session_key] = state + elif msg_count < state.last_message_count: + logger.info( + f"Intercept: session {session_key[:8]}… message count dropped " + f"({msg_count} < {state.last_message_count}), " + f"resetting state (compaction or restart)" + ) + state = _InterceptConvState() + state.system_processed = True + _intercept_states[session_key] = state + + state.last_message_count = msg_count + return state def _deduplicate_docs(docs: List[str], state: _InterceptConvState) -> tuple: @@ -1261,6 +1313,14 @@ def _strip_external_content_ids(body: Any) -> Any: _OPENAI_CHAT = "openai_chat" _ANTHROPIC_MESSAGES = "anthropic_messages" +# Hop-by-hop headers that must not be forwarded by proxies. +_HOP_BY_HOP = frozenset(( + "host", "connection", "keep-alive", "transfer-encoding", + "te", "trailer", "upgrade", "proxy-authorization", + "proxy-authenticate", "content-length", +)) + + def _doc_preview(doc: str, max_len: int = 60) -> str: """Truncate a document string for log preview.""" @@ -1368,14 +1428,12 @@ async def _intercept_and_forward(request: Request, api_format: str): total_reordered = 0 total_deduped = 0 total_slimmed = 0 - tool_results_skipped = 0 # TODO: never incremented — wire up or remove - _chars_before_slim = 0 - _chars_after_slim = 0 + chars_before_slim = 0 + chars_after_slim = 0 system_count = 0 tool_result_count = 0 - reorder_details = [] # collect per-source reorder info + reorder_details = [] _dedup_result = DedupResult() - state = _intercept_state # ── Debug: log conversation shape, divergence, and tool_result details ── _debug_messages = body.get("messages") or [] @@ -1383,12 +1441,11 @@ async def _intercept_and_forward(request: Request, api_format: str): # Per-message hashes for this request _debug_msg_hashes = [] - if logger.isEnabledFor(logging.DEBUG): - for m in _debug_messages: - h = hashlib.sha256( - json.dumps(m, sort_keys=True, ensure_ascii=False).encode() - ).hexdigest()[:12] - _debug_msg_hashes.append(h) + for m in _debug_messages: + h = hashlib.sha256( + json.dumps(m, sort_keys=True, ensure_ascii=False).encode() + ).hexdigest()[:12] + _debug_msg_hashes.append(h) # Build tool_call_id → function name mapping from assistant messages _tool_call_names = {} @@ -1421,7 +1478,7 @@ async def _intercept_and_forward(request: Request, api_format: str): _chars = len(_content_str) _is_compacted = "[compacted:" in _content_str _preview = _content_str[:150].replace("\n", "\\n") - logger.info( + logger.debug( f" msg[{idx}] role={_role} fn={_fn_label} " f"tool_call_id={_tc_id} " f"chars={_chars} compacted={_is_compacted} " @@ -1439,69 +1496,71 @@ async def _intercept_and_forward(request: Request, api_format: str): _chars = len(_tc_str) _is_compacted = "[compacted:" in _tc_str _preview = _tc_str[:150].replace("\n", "\\n") - logger.info( + logger.debug( f" msg[{idx}].content[{bi}] type=tool_result " f"tool_use_id={_tu_id} chars={_chars} " f"compacted={_is_compacted} preview: {_preview}" ) - global _debug_prev_msg_hashes - if "_debug_prev_msg_hashes" not in globals(): - _debug_prev_msg_hashes = [] - - _prev_n = len(_debug_prev_msg_hashes) - if _prev_n > 0 and _prev_n <= _debug_msg_count: - _first_diff = None - for idx in range(_prev_n): - if _debug_msg_hashes[idx] != _debug_prev_msg_hashes[idx]: - _first_diff = idx - break - if _first_diff is not None: - _diff_msg = _debug_messages[_first_diff] - _diff_role = _diff_msg.get("role", "?") - _diff_content = str(_diff_msg.get("content", "")) - logger.warning( - f"Intercept PREFIX MISMATCH at msg[{_first_diff}] " - f"(role={_diff_role}), " - f"hash was {_debug_prev_msg_hashes[_first_diff]} " - f"now {_debug_msg_hashes[_first_diff]}. " - f"Content preview ({len(_diff_content)} chars): " - f"{_diff_content[:300]}..." - ) - else: - logger.info( - f"Intercept: {_debug_msg_count} msgs (prev={_prev_n}), " - f"prefix[:{_prev_n}] MATCH, " - f"{_debug_msg_count - _prev_n} new msgs" - ) - else: - logger.info(f"Intercept: {_debug_msg_count} msgs (first request or reset)") - - _debug_prev_msg_hashes = list(_debug_msg_hashes) + # Per-session debug logging (uses session fingerprint, not global state) + _session_key = _session_fingerprint(body) + _session_tag = _session_key[:8] + logger.info( + f"Intercept: session={_session_tag} {_debug_msg_count} msgs" + ) # ── Format handler (strategy pattern) ──────────────────────────── handler = get_format_handler(api_format) if config.enabled: try: - # body is already a fresh copy from _strip_external_content_ids + body = copy.deepcopy(body) # ── Conversation-aware state (single-conversation model) ── - state = _get_intercept_state(body) + state = await _get_intercept_state(body) # ── Replace old messages with cached (modified) versions ── # On subsequent turns, the host sends original (unmodified) # messages. Replace them with our cached modified versions # so the inference engine's prefix cache sees identical tokens. + # IMPORTANT: Only replace if the old messages actually match + # (same session/user). Without this check, concurrent requests + # from different sessions would get cross-contaminated. old_msg_count = len(state.cached_messages) if old_msg_count > 0: msgs = body.get("messages", []) if len(msgs) >= old_msg_count: - msgs[:old_msg_count] = copy.deepcopy(state.cached_messages) - logger.info( - f"Intercept: replaced {old_msg_count} old messages " - f"with cached versions for prefix cache consistency" - ) + # Verify prefix match before replacing + prefix_ok = True + for _ci in range(old_msg_count): + _cached_h = hashlib.sha256( + json.dumps(state.cached_messages[_ci], + sort_keys=True, + ensure_ascii=False).encode() + ).hexdigest()[:16] + _current_h = hashlib.sha256( + json.dumps(msgs[_ci], + sort_keys=True, + ensure_ascii=False).encode() + ).hexdigest()[:16] + if _cached_h != _current_h: + prefix_ok = False + break + if prefix_ok: + msgs[:old_msg_count] = copy.deepcopy( + state.cached_messages) + logger.info( + f"Intercept: replaced {old_msg_count} old " + f"messages with cached versions for prefix " + f"cache consistency" + ) + else: + logger.info( + f"Intercept: prefix mismatch at msg[{_ci}], " + f"skipping cached message replay " + f"(different session/user)" + ) + old_msg_count = 0 handler.restore_system(body, state.cached_system) multi = handler.extract_all(body, config) @@ -1523,7 +1582,7 @@ async def _intercept_and_forward(request: Request, api_format: str): } ) handler.reconstruct_system( - body, extraction, reordered_docs, sys_idx, config + body, extraction, reordered_docs, sys_idx ) total_reordered += len(extraction.documents) system_count = 1 @@ -1570,8 +1629,8 @@ async def _intercept_and_forward(request: Request, api_format: str): f"previous tool result ({orig_chars} chars). " f"Refer to the earlier result above.]" ] - _chars_before_slim += orig_chars - _chars_after_slim += len(new_docs[0]) + chars_before_slim += orig_chars + chars_after_slim += len(new_docs[0]) total_slimmed += deduped reorder_details.append( { @@ -1626,13 +1685,8 @@ async def _intercept_and_forward(request: Request, api_format: str): single_doc.tool_call_id ) - if ( - total_reordered > 0 - or total_deduped > 0 - or total_slimmed > 0 - or tool_results_skipped > 0 - ): - saved = _chars_before_slim - _chars_after_slim + if total_reordered > 0 or total_deduped > 0 or total_slimmed > 0: + saved = chars_before_slim - chars_after_slim saved_tokens = saved // 4 if saved > 0 else 0 logger.info( f"Intercept ({api_format}): reordered {total_reordered}, " @@ -1648,8 +1702,8 @@ async def _intercept_and_forward(request: Request, api_format: str): _dedup_result = dedup_responses_api(body, chunk_modulus=_chunk_modulus) if _dedup_result.chars_saved > 0: - _chars_before_slim += _dedup_result.chars_before - _chars_after_slim += _dedup_result.chars_after + chars_before_slim += _dedup_result.chars_before + chars_after_slim += _dedup_result.chars_after logger.info( f"Dedup ({api_format}): " f"blocks={_dedup_result.blocks_deduped}/{_dedup_result.blocks_total}, " @@ -1697,22 +1751,6 @@ async def _intercept_and_forward(request: Request, api_format: str): else: target_url = f"{infer_api_url}{handler.target_path()}" - # Build outbound headers: forward everything except X-ContextPilot-* - # and hop-by-hop headers that must not be forwarded by proxies. - _HOP_BY_HOP = frozenset( - ( - "host", - "connection", - "keep-alive", - "transfer-encoding", - "te", - "trailer", - "upgrade", - "proxy-authorization", - "proxy-authenticate", - "content-length", - ) - ) if _cloud_mode and _cloud_adapter is not None and _cloud_api_key: outbound_headers = _cloud_adapter.get_auth_headers(_cloud_api_key) else: @@ -1732,34 +1770,30 @@ async def _intercept_and_forward(request: Request, api_format: str): total_reordered > 0 or total_deduped > 0 or total_slimmed > 0 - or tool_results_skipped > 0 or _dedup_result.chars_saved > 0 ) if _has_activity: - cp_response_headers["X-ContextPilot-Result"] = json.dumps( - { - "intercepted": True, - "documents_reordered": total_reordered > 0, - "total_documents": total_reordered, - "documents_deduplicated": total_deduped, - "documents_slimmed": total_slimmed, - "chars_before_slim": _chars_before_slim, - "chars_after_slim": _chars_after_slim, - "chars_saved": _chars_before_slim - _chars_after_slim, - "tool_results_skipped": tool_results_skipped, - "message_count": state.last_message_count, - "sources": { - "system": system_count, - "tool_results": tool_result_count, - }, - "reorder_details": reorder_details, - "dedup": { - "blocks_deduped": _dedup_result.blocks_deduped, - "blocks_total": _dedup_result.blocks_total, - "chars_saved": _dedup_result.chars_saved, - }, - } - ) + cp_response_headers["X-ContextPilot-Result"] = json.dumps({ + "intercepted": True, + "documents_reordered": total_reordered > 0, + "total_documents": total_reordered, + "documents_deduplicated": total_deduped, + "documents_slimmed": total_slimmed, + "chars_before_slim": chars_before_slim, + "chars_after_slim": chars_after_slim, + "chars_saved": chars_before_slim - chars_after_slim, + "message_count": state.last_message_count, + "sources": { + "system": system_count, + "tool_results": tool_result_count, + }, + "reorder_details": reorder_details, + "dedup": { + "blocks_deduped": _dedup_result.blocks_deduped, + "blocks_total": _dedup_result.blocks_total, + "chars_saved": _dedup_result.chars_saved, + }, + }) is_stream = body.get("stream", False) @@ -1785,7 +1819,7 @@ async def _stream_with_headers(): async for chunk in resp.content.iter_any(): if not _ttft_logged: _ttft_ms = (time.monotonic() - _request_start) * 1000 - _saved = _chars_before_slim - _chars_after_slim + _saved = chars_before_slim - chars_after_slim _log_ttft(_ttft_ms, total_slimmed, _saved) _ttft_logged = True yield chunk @@ -1795,12 +1829,9 @@ async def _stream_with_headers(): status, fwd_headers = cast(tuple[int, Dict[str, str]], first_event) async def _stream_content_only(): - try: - async for event in stream_iter: - if isinstance(event, bytes): - yield event - finally: - await stream_iter.aclose() + async for event in stream_iter: + if isinstance(event, bytes): + yield event return StreamingResponse( _stream_content_only(), @@ -1814,13 +1845,9 @@ async def _stream_content_only(): target_url, json=body, headers=outbound_headers ) as resp: _ttft_ms = (time.monotonic() - _request_start) * 1000 - _saved = _chars_before_slim - _chars_after_slim + _saved = chars_before_slim - chars_after_slim _log_ttft(_ttft_ms, total_slimmed, _saved) - try: - result = await resp.json() - except (json.JSONDecodeError, aiohttp.ContentTypeError): - text = await resp.text() - raise HTTPException(status_code=resp.status, detail=text[:500]) + result = await resp.json() # ── Cloud mode: track cache metrics from response ── if ( @@ -1858,7 +1885,7 @@ async def _stream_content_only(): except aiohttp.ClientError as e: logger.error(f"Error forwarding intercepted request: {e}") - raise HTTPException(status_code=502, detail="Backend connection error") + raise HTTPException(status_code=502, detail=f"Backend error: {str(e)}") @app.post("/v1/chat/completions") diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md new file mode 100644 index 0000000..edd6dbd --- /dev/null +++ b/openclaw-plugin/README.md @@ -0,0 +1,137 @@ +# @contextpilot/contextpilot + +OpenClaw plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context optimization. **Zero external dependencies** — no Python, no proxy server, just install and go. + +## What It Does + +ContextPilot registers as an OpenClaw **Context Engine** and optimizes every LLM request by: + +1. **Extracting** documents from tool results +2. **Reordering** documents for maximum prefix cache sharing across turns +3. **Deduplicating** repeated content blocks with compact reference hints +4. **Injecting** cache control markers (Anthropic `cache_control: { type: "ephemeral" }`) + +All processing happens in-process — no external services needed. + +## Installation + +### From npm (when published) + +```bash +openclaw plugins install @contextpilot/contextpilot +``` + +### From local path (development) + +Add to `~/.openclaw/openclaw.json`: + +```json +{ + "plugins": { + "load": { + "paths": [ + "/path/to/ContextPilot/openclaw-plugin" + ] + } + } +} +``` + +## Configuration + +In `~/.openclaw/openclaw.json`, enable the plugin and set it as the context engine: + +```json +{ + "plugins": { + "slots": { + "contextEngine": "contextpilot" + }, + "entries": { + "contextpilot": { + "enabled": true, + "config": { + "scope": "all" + } + } + } + }, + "tools": { + "allow": ["contextpilot"] + } +} +``` + +### Scope Options + +| Scope | Tool Results | Description | +|:------|:------------:|:------------| +| `all` (default) | Optimized | Optimize all tool results | +| `tool_results` | Optimized | Same as `all` | + +> **Note:** System prompt optimization is not currently available — OpenClaw's context engine API does not expose the system prompt to plugins. + +## How It Works + +``` +OpenClaw agent request + ↓ +ContextPilot Context Engine (assemble hook) + ├─ Convert OpenClaw message format (toolResult → tool_result) + ├─ Extract documents from tool results + ├─ Reorder for prefix cache sharing + ├─ Deduplicate repeated blocks + ├─ Inject cache_control markers + ↓ +Optimized context → LLM Backend +``` + +The plugin registers as an OpenClaw Context Engine using `api.registerContextEngine()`. The `assemble()` hook intercepts context assembly before each LLM call. + +## Files + +``` +openclaw-plugin/ +├── openclaw.plugin.json # Plugin manifest (id: "contextpilot") +├── package.json # npm package (@contextpilot/contextpilot) +├── src/ +│ ├── index.ts # Plugin entry point +│ └── engine/ +│ ├── cache-control.ts # Cache control injection +│ ├── dedup.ts # Content deduplication +│ ├── extract.ts # Document extraction +│ └── live-index.ts # Reordering engine +└── tsconfig.json +``` + +## Agent Tool + +| Tool | Description | +|------|-------------| +| `contextpilot_status` | Check engine status, request count, and chars saved | + +> **Note:** The status tool is registered but may not be visible to agents due to OpenClaw plugin API limitations. + +## Verifying It Works + +Check the gateway logs: + +``` +[ContextPilot] Stats: 5 requests, 28,356 chars saved (~7,089 tokens, ~$0.0213) +``` + +## Expected Savings + +Savings depend on conversation length and repeated content: + +| Scenario | Chars Saved | Token Reduction | +|:---------|------------:|----------------:| +| Short session (few tool calls) | 0-5K | ~0-5% | +| Medium session (10+ file reads) | 20-50K | ~10-20% | +| Long session (repeated large files) | 100K+ | ~30-50% | + +Run `./benchmark.sh` to measure with/without comparison on your workload. + +## License + +Apache-2.0 diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh new file mode 100755 index 0000000..a703866 --- /dev/null +++ b/openclaw-plugin/benchmark.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# +# ContextPilot Token Usage Benchmark +# Compares prefill/input tokens with and without the plugin +# + +set -e + +OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json" +BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak" +LOG_WITH="/tmp/gw-with-cp.log" +LOG_WITHOUT="/tmp/gw-without-cp.log" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TEST_FILE="${SCRIPT_DIR}/src/engine/dedup.ts" + +echo "==========================================" +echo "ContextPilot Token Usage Benchmark" +echo "==========================================" + +# Backup config +cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG" + +cleanup() { + echo "" + echo "Restoring config..." + cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG" + rm -f "$BACKUP_CONFIG" + openclaw gateway stop 2>/dev/null || pkill -f "openclaw gateway" 2>/dev/null || true +} +trap cleanup EXIT + +enable_contextpilot() { + python3 << 'PYTHON' +import json, os +path = os.path.expanduser("~/.openclaw/openclaw.json") +with open(path) as f: c = json.load(f) +c.setdefault('plugins', {}).setdefault('slots', {})['contextEngine'] = 'contextpilot' +c['plugins'].setdefault('entries', {}).setdefault('contextpilot', {})['enabled'] = True +with open(path, 'w') as f: json.dump(c, f, indent=2) +PYTHON +} + +disable_contextpilot() { + python3 << 'PYTHON' +import json, os +path = os.path.expanduser("~/.openclaw/openclaw.json") +with open(path) as f: c = json.load(f) +if 'plugins' in c: + c['plugins'].get('slots', {}).pop('contextEngine', None) + if 'contextpilot' in c['plugins'].get('entries', {}): + c['plugins']['entries']['contextpilot']['enabled'] = False +with open(path, 'w') as f: json.dump(c, f, indent=2) +PYTHON +} + +restart_gateway() { + local logfile=$1 + echo " Stopping gateway..." + openclaw gateway stop 2>/dev/null || true + pkill -f "openclaw gateway" 2>/dev/null || true + sleep 3 + echo " Starting gateway..." + openclaw gateway > "$logfile" 2>&1 & + sleep 6 + if ! pgrep -f "openclaw" > /dev/null; then + echo " ERROR: Gateway failed to start" + cat "$logfile" | tail -10 + exit 1 + fi + echo " Gateway running." +} + +run_test_sequence() { + echo " Reading file 3 times to build up context..." + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE and count functions" > /dev/null 2>&1 || true + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE again" > /dev/null 2>&1 || true + timeout 60 openclaw agent --agent main --message "Read $TEST_FILE one more time and summarize" > /dev/null 2>&1 || true + echo " Done." +} + +extract_last_usage() { + local logfile=$1 + # Find the last complete usage block and extract values + local input=$(grep '"input":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") + local cache_read=$(grep '"cacheRead":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") + local cache_write=$(grep '"cacheWrite":' "$logfile" 2>/dev/null | tail -1 | sed 's/[^0-9]//g' || echo "0") + echo "$input $cache_read $cache_write" +} + +extract_chars_saved() { + local logfile=$1 + # Look for ContextPilot stats line + grep "Stats:" "$logfile" 2>/dev/null | tail -1 | sed -n 's/.*\([0-9][0-9,]*\) chars saved.*/\1/p' | tr -d ',' || echo "0" +} + +# ========================================== +# Test WITH ContextPilot +# ========================================== +echo "" +echo "Test 1: WITH ContextPilot enabled" +echo "----------------------------------------" +enable_contextpilot +restart_gateway "$LOG_WITH" +run_test_sequence + +WITH_USAGE=$(extract_last_usage "$LOG_WITH") +WITH_INPUT=$(echo "$WITH_USAGE" | cut -d' ' -f1) +WITH_CACHE_READ=$(echo "$WITH_USAGE" | cut -d' ' -f2) +WITH_CACHE_WRITE=$(echo "$WITH_USAGE" | cut -d' ' -f3) +WITH_CHARS=$(extract_chars_saved "$LOG_WITH") + +echo "" +echo " Results:" +echo " Input tokens: $WITH_INPUT" +echo " Cache read: $WITH_CACHE_READ" +echo " Cache write: $WITH_CACHE_WRITE" +echo " Chars deduped: $WITH_CHARS" + +# ========================================== +# Test WITHOUT ContextPilot +# ========================================== +echo "" +echo "Test 2: WITHOUT ContextPilot" +echo "----------------------------------------" +disable_contextpilot +restart_gateway "$LOG_WITHOUT" +run_test_sequence + +WITHOUT_USAGE=$(extract_last_usage "$LOG_WITHOUT") +WITHOUT_INPUT=$(echo "$WITHOUT_USAGE" | cut -d' ' -f1) +WITHOUT_CACHE_READ=$(echo "$WITHOUT_USAGE" | cut -d' ' -f2) +WITHOUT_CACHE_WRITE=$(echo "$WITHOUT_USAGE" | cut -d' ' -f3) + +echo "" +echo " Results:" +echo " Input tokens: $WITHOUT_INPUT" +echo " Cache read: $WITHOUT_CACHE_READ" +echo " Cache write: $WITHOUT_CACHE_WRITE" +echo " Chars deduped: 0 (plugin disabled)" + +# ========================================== +# Summary +# ========================================== +echo "" +echo "==========================================" +echo "COMPARISON" +echo "==========================================" +echo "" +printf "%-20s %12s %12s\n" "" "WITH CP" "WITHOUT CP" +printf "%-20s %12s %12s\n" "--------------------" "------------" "------------" +printf "%-20s %12s %12s\n" "Input tokens" "$WITH_INPUT" "$WITHOUT_INPUT" +printf "%-20s %12s %12s\n" "Cache read" "$WITH_CACHE_READ" "$WITHOUT_CACHE_READ" +printf "%-20s %12s %12s\n" "Cache write" "$WITH_CACHE_WRITE" "$WITHOUT_CACHE_WRITE" +printf "%-20s %12s %12s\n" "Chars deduped" "$WITH_CHARS" "0" +echo "" + +# Calculate differences +if [ "$WITH_INPUT" -gt 0 ] && [ "$WITHOUT_INPUT" -gt 0 ]; then + if [ "$WITH_INPUT" -lt "$WITHOUT_INPUT" ]; then + diff=$((WITHOUT_INPUT - WITH_INPUT)) + pct=$((diff * 100 / WITHOUT_INPUT)) + echo ">>> ContextPilot reduced input tokens by $diff ($pct% savings)" + elif [ "$WITH_INPUT" -gt "$WITHOUT_INPUT" ]; then + diff=$((WITH_INPUT - WITHOUT_INPUT)) + pct=$((diff * 100 / WITHOUT_INPUT)) + echo ">>> ContextPilot added $diff tokens ($pct% overhead)" + else + echo ">>> No difference in input tokens" + fi +fi + +if [ "$WITH_CHARS" -gt 0 ]; then + tokens_saved=$((WITH_CHARS / 4)) + echo ">>> Deduplication removed ~$tokens_saved tokens worth of repeated content" +fi diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json new file mode 100644 index 0000000..a8c336e --- /dev/null +++ b/openclaw-plugin/openclaw.plugin.json @@ -0,0 +1,18 @@ +{ + "id": "contextpilot", + "name": "ContextPilot", + "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.", + "version": "0.2.0", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "scope": { + "type": "string", + "enum": ["all", "tool_results"], + "description": "Which messages ContextPilot optimizes", + "default": "all" + } + } + } +} diff --git a/openclaw-plugin/package-lock.json b/openclaw-plugin/package-lock.json new file mode 100644 index 0000000..aeda12e --- /dev/null +++ b/openclaw-plugin/package-lock.json @@ -0,0 +1,22 @@ +{ + "name": "@contextpilot/openclaw-plugin", + "version": "0.2.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@contextpilot/openclaw-plugin", + "version": "0.2.0", + "license": "Apache-2.0", + "dependencies": { + "@sinclair/typebox": "^0.34.49" + } + }, + "node_modules/@sinclair/typebox": { + "version": "0.34.49", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.49.tgz", + "integrity": "sha512-brySQQs7Jtn0joV8Xh9ZV/hZb9Ozb0pmazDIASBkYKCjXrXU3mpcFahmK/z4YDhGkQvP9mWJbVyahdtU5wQA+A==", + "license": "MIT" + } + } +} diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json new file mode 100644 index 0000000..7f03fc8 --- /dev/null +++ b/openclaw-plugin/package.json @@ -0,0 +1,36 @@ +{ + "name": "@contextpilot/contextpilot", + "version": "0.2.0", + "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.", + "type": "module", + "license": "Apache-2.0", + "author": "ContextPilot Contributors", + "repository": { + "type": "git", + "url": "https://github.com/EfficientContext/ContextPilot.git", + "directory": "openclaw-plugin" + }, + "keywords": [ + "openclaw", + "openclaw-plugin", + "contextpilot", + "kv-cache", + "context-reuse", + "prompt-cache", + "dedup", + "llm" + ], + "openclaw": { + "extensions": [ + "./src/index.ts" + ] + }, + "files": [ + "src/", + "openclaw.plugin.json", + "README.md" + ], + "dependencies": { + "@sinclair/typebox": "^0.34.49" + } +} diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts new file mode 100644 index 0000000..6ab3901 --- /dev/null +++ b/openclaw-plugin/src/engine/cache-control.ts @@ -0,0 +1,171 @@ +export const MIN_CONTENT_LENGTH_FOR_CACHE = 1024; +export const CACHE_CONTROL_EPHEMERAL = { type: 'ephemeral' } as const; + +type CacheControl = typeof CACHE_CONTROL_EPHEMERAL; + +interface TextBlock extends Record { + type?: unknown; + text?: unknown; + cache_control?: CacheControl; +} + +interface ToolResultBlock extends Record { + type?: unknown; + content?: unknown; + cache_control?: CacheControl; +} + +interface MessageBlock extends Record { + role?: unknown; + content?: unknown; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function injectSystemCacheControl( + body: Record, + cc: CacheControl +): Record { + const system = body.system; + if (system === undefined || system === null) { + return body; + } + + if (typeof system === 'string') { + body.system = [{ type: 'text', text: system, cache_control: cc }]; + return body; + } + + if (Array.isArray(system) && system.length > 0) { + const lastBlock = system[system.length - 1]; + if (isRecord(lastBlock)) { + lastBlock.cache_control = cc; + } + } + + return body; +} + +function maybeAddCacheControlToToolResult(block: ToolResultBlock, cc: CacheControl): void { + const toolResultContent = block.content ?? ''; + + if (typeof toolResultContent === 'string') { + if (toolResultContent.length >= MIN_CONTENT_LENGTH_FOR_CACHE) { + block.cache_control = cc; + } + return; + } + + if (!Array.isArray(toolResultContent)) { + return; + } + + const totalChars = toolResultContent.reduce((sum, inner) => { + if (!isRecord(inner) || inner.type !== 'text') { + return sum; + } + return sum + (typeof inner.text === 'string' ? inner.text.length : 0); + }, 0); + + if (totalChars < MIN_CONTENT_LENGTH_FOR_CACHE || toolResultContent.length === 0) { + return; + } + + let lastTextBlock: TextBlock | null = null; + for (let i = toolResultContent.length - 1; i >= 0; i -= 1) { + const inner = toolResultContent[i]; + if (isRecord(inner) && inner.type === 'text') { + lastTextBlock = inner as TextBlock; + break; + } + } + + if (lastTextBlock !== null) { + lastTextBlock.cache_control = cc; + } +} + +function injectToolResultCacheControl( + body: Record, + cc: CacheControl +): Record { + const messages = body.messages; + if (!Array.isArray(messages) || messages.length === 0) { + return body; + } + + for (const msg of messages) { + if (!isRecord(msg)) { + continue; + } + + const message = msg as MessageBlock; + + // Handle OpenClaw's toolResult role (content is the tool result itself) + if (message.role === 'toolResult') { + const toolResultContent = message.content ?? ''; + let totalChars = 0; + + if (typeof toolResultContent === 'string') { + totalChars = toolResultContent.length; + } else if (Array.isArray(toolResultContent)) { + totalChars = toolResultContent.reduce((sum, inner) => { + if (isRecord(inner) && inner.type === 'text') { + return sum + (typeof inner.text === 'string' ? inner.text.length : 0); + } + return sum; + }, 0); + } + + if (totalChars >= MIN_CONTENT_LENGTH_FOR_CACHE) { + (message as any).cache_control = cc; + } + continue; + } + + // Handle Anthropic's user message with tool_result blocks + if (message.role !== 'user' || !Array.isArray(message.content)) { + continue; + } + + for (const block of message.content) { + if (!isRecord(block)) { + continue; + } + if (block.type !== 'tool_result' && block.type !== 'toolResult') { + continue; + } + maybeAddCacheControlToToolResult(block as ToolResultBlock, cc); + } + } + + return body; +} + +export function injectAnthropicCacheControl(body: Record): Record { + if (!body || typeof body !== 'object') { + return body ?? {}; + } + const copiedBody = structuredClone(body); + injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); + injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL); + return copiedBody; +} + +export function injectOpenAICacheControl(body: Record): Record { + // OpenAI prompt caching is automatic and prefix-based, so no explicit + // cache_control block injection is required at request construction time. + return body; +} + +export function injectCacheControl( + body: Record, + provider: 'anthropic' | 'openai' +): Record { + if (provider === 'anthropic') { + return injectAnthropicCacheControl(body); + } + return injectOpenAICacheControl(body); +} diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts new file mode 100644 index 0000000..5ae024f --- /dev/null +++ b/openclaw-plugin/src/engine/compute-distance.ts @@ -0,0 +1,214 @@ +export interface PreparedContextsCpu { + chunkIds: number[]; + originalPositions: number[]; + lengths: number[]; + offsets: number[]; +} + +export function computeDistanceSingle( + contextA: number[], + contextB: number[], + alpha: number = 0.001 +): number { + if (contextA.length === 0 || contextB.length === 0) { + return 1.0; + } + + const posA = new Map(); + const posB = new Map(); + + for (let pos = 0; pos < contextA.length; pos += 1) { + posA.set(contextA[pos], pos); + } + for (let pos = 0; pos < contextB.length; pos += 1) { + posB.set(contextB[pos], pos); + } + + let intersectionSize = 0; + let positionDiffSum = 0; + + for (const [docId, aPos] of posA) { + const bPos = posB.get(docId); + if (bPos === undefined) { + continue; + } + + intersectionSize += 1; + positionDiffSum += Math.abs(aPos - bPos); + } + + if (intersectionSize === 0) { + return 1.0; + } + + const maxSize = Math.max(contextA.length, contextB.length); + const overlapTerm = 1.0 - intersectionSize / maxSize; + const positionTerm = alpha * (positionDiffSum / intersectionSize); + + return overlapTerm + positionTerm; +} + +export function computeDistancesBatch( + queries: number[][], + targets: number[][], + alpha: number = 0.001 +): number[][] { + const nQueries = queries.length; + const nTargets = targets.length; + + if (nQueries === 0 || nTargets === 0) { + return Array.from({ length: nQueries }, () => new Array(nTargets).fill(0)); + } + + const distances: number[][] = Array.from( + { length: nQueries }, + () => new Array(nTargets).fill(1.0) + ); + + for (let i = 0; i < nQueries; i += 1) { + for (let j = 0; j < nTargets; j += 1) { + distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha); + } + } + + return distances; +} + +export function prepareContextsForCpu(contexts: number[][]): PreparedContextsCpu { + const n = contexts.length; + const sortedData: Array> = new Array(n); + const lengths: number[] = new Array(n).fill(0); + + for (let idx = 0; idx < n; idx += 1) { + const ctx = contexts[idx]; + if (ctx.length === 0) { + sortedData[idx] = []; + lengths[idx] = 0; + continue; + } + + const pairs: Array<[number, number]> = new Array(ctx.length); + for (let origPos = 0; origPos < ctx.length; origPos += 1) { + pairs[origPos] = [ctx[origPos], origPos]; + } + pairs.sort((a, b) => a[0] - b[0]); + + sortedData[idx] = pairs; + lengths[idx] = pairs.length; + } + + const offsets: number[] = new Array(n + 1).fill(0); + for (let i = 0; i < n; i += 1) { + offsets[i + 1] = offsets[i] + lengths[i]; + } + + const totalElements = offsets[n]; + const chunkIds: number[] = new Array(totalElements).fill(0); + const originalPositions: number[] = new Array(totalElements).fill(0); + + for (let i = 0; i < n; i += 1) { + const pairs = sortedData[i]; + const start = offsets[i]; + for (let j = 0; j < pairs.length; j += 1) { + const [chunkId, origPos] = pairs[j]; + chunkIds[start + j] = chunkId; + originalPositions[start + j] = origPos; + } + } + + return { + chunkIds, + originalPositions, + lengths, + offsets + }; +} + +export function computeDistanceOptimized( + chunkIds: number[], + originalPositions: number[], + lengths: number[], + offsets: number[], + i: number, + j: number, + alpha: number +): number { + const lenI = lengths[i]; + const lenJ = lengths[j]; + + if (lenI === 0 || lenJ === 0) { + return 1.0; + } + + const offsetI = offsets[i]; + const offsetJ = offsets[j]; + const endI = offsetI + lenI; + const endJ = offsetJ + lenJ; + + let intersectionSize = 0; + let positionDiffSum = 0; + + let pi = offsetI; + let pj = offsetJ; + + while (pi < endI && pj < endJ) { + const chunkI = chunkIds[pi]; + const chunkJ = chunkIds[pj]; + + if (chunkI === chunkJ) { + intersectionSize += 1; + positionDiffSum += Math.abs(originalPositions[pi] - originalPositions[pj]); + pi += 1; + pj += 1; + } else if (chunkI < chunkJ) { + pi += 1; + } else { + pj += 1; + } + } + + const maxSize = Math.max(lenI, lenJ); + const overlapTerm = 1.0 - intersectionSize / maxSize; + + let positionTerm = 0.0; + if (intersectionSize !== 0) { + const avgPosDiff = positionDiffSum / intersectionSize; + positionTerm = alpha * avgPosDiff; + } + + return overlapTerm + positionTerm; +} + +export function computeDistanceMatrixCpu( + contexts: number[][], + alpha: number = 0.001 +): Float64Array { + const n = contexts.length; + const numPairs = (n * (n - 1)) / 2; + + if (numPairs === 0) { + return new Float64Array(0); + } + + const { chunkIds, originalPositions, lengths, offsets } = prepareContextsForCpu(contexts); + const condensedDistances = new Float64Array(numPairs); + + for (let i = 0; i < n; i += 1) { + for (let j = i + 1; j < n; j += 1) { + const dist = computeDistanceOptimized( + chunkIds, + originalPositions, + lengths, + offsets, + i, + j, + alpha + ); + + const condensedIdx = n * i - (i * (i + 1)) / 2 + j - i - 1; + condensedDistances[condensedIdx] = dist; + } + } + + return condensedDistances; +} diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts new file mode 100644 index 0000000..5e56f39 --- /dev/null +++ b/openclaw-plugin/src/engine/conversation-tracker.ts @@ -0,0 +1,251 @@ +export interface DeduplicationResult { + originalDocs: number[]; + overlappingDocs: number[]; + newDocs: number[]; + referenceHints: string[]; + deduplicatedDocs: number[]; + docSourceTurns: Map; + isNewConversation: boolean; +} + +export interface RequestHistory { + requestId: string; + docs: number[]; + parentRequestId: string | null; + turnNumber: number; + timestamp: number; +} + +export interface ConversationTrackerStats { + totalRequests: number; + totalDedupCalls: number; + totalDocsDeduplicated: number; + activeRequests: number; +} + +export class ConversationTracker { + private _requests: Map; + private _hintTemplate: string; + private _maxTrackedRequests: number; + private _stats: { + totalRequests: number; + totalDedupCalls: number; + totalDocsDeduplicated: number; + }; + + constructor(hintTemplate?: string, maxTrackedRequests: number = 256) { + this._requests = new Map(); + this._hintTemplate = + hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn."; + this._maxTrackedRequests = maxTrackedRequests; + this._stats = { + totalRequests: 0, + totalDedupCalls: 0, + totalDocsDeduplicated: 0 + }; + } + + registerRequest(requestId: string, docs: number[], parentRequestId?: string | null): RequestHistory { + let turnNumber = 1; + if (parentRequestId && this._requests.has(parentRequestId)) { + const parent = this._requests.get(parentRequestId); + if (parent) { + turnNumber = parent.turnNumber + 1; + } + } + + const history: RequestHistory = { + requestId, + docs: [...docs], + parentRequestId: parentRequestId ?? null, + turnNumber, + timestamp: Date.now() / 1000 + }; + + this._requests.set(requestId, history); + this._stats.totalRequests += 1; + + // LRU eviction: remove oldest entries when over limit + if (this._requests.size > this._maxTrackedRequests) { + const oldest = this._requests.keys().next().value; + if (oldest !== undefined) { + this._requests.delete(oldest); + } + } + + return history; + } + + getConversationChain(requestId: string): RequestHistory[] { + const chain: RequestHistory[] = []; + let currentId: string | null = requestId; + + while (currentId && this._requests.has(currentId)) { + const history = this._requests.get(currentId); + if (!history) { + break; + } + + chain.push(history); + currentId = history.parentRequestId; + } + + chain.reverse(); + return chain; + } + + getAllPreviousDocs(parentRequestId: string): [Set, Map] { + const allDocs = new Set(); + const docSources = new Map(); + + const chain = this.getConversationChain(parentRequestId); + + for (const history of chain) { + for (const docId of history.docs) { + if (!allDocs.has(docId)) { + allDocs.add(docId); + docSources.set(docId, history.requestId); + } + } + } + + return [allDocs, docSources]; + } + + deduplicate( + requestId: string, + docs: number[], + parentRequestId?: string | null, + hintTemplate?: string + ): DeduplicationResult { + this._stats.totalDedupCalls += 1; + + if (!parentRequestId || !this._requests.has(parentRequestId)) { + this.registerRequest(requestId, docs, null); + + return { + originalDocs: docs, + overlappingDocs: [], + newDocs: docs, + referenceHints: [], + deduplicatedDocs: docs, + docSourceTurns: new Map(), + isNewConversation: true + }; + } + + const [previousDocs, docSources] = this.getAllPreviousDocs(parentRequestId); + + const overlappingDocs: number[] = []; + const newDocs: number[] = []; + const docSourceTurns = new Map(); + + for (const docId of docs) { + if (previousDocs.has(docId)) { + overlappingDocs.push(docId); + const sourceRequestId = docSources.get(docId); + if (sourceRequestId !== undefined) { + docSourceTurns.set(docId, sourceRequestId); + } + } else { + newDocs.push(docId); + } + } + + const template = hintTemplate ?? this._hintTemplate; + const referenceHints: string[] = []; + + for (const docId of overlappingDocs) { + const sourceRequest = docSources.get(docId); + const sourceHistory = sourceRequest ? this._requests.get(sourceRequest) : undefined; + const turnNumber = sourceHistory ? String(sourceHistory.turnNumber) : "previous"; + + const hint = template + .replaceAll("{doc_id}", String(docId)) + .replaceAll("{turn_number}", turnNumber) + .replaceAll("{source_request}", sourceRequest ?? "previous"); + + referenceHints.push(hint); + } + + this.registerRequest(requestId, docs, parentRequestId); + this._stats.totalDocsDeduplicated += overlappingDocs.length; + + return { + originalDocs: docs, + overlappingDocs, + newDocs, + referenceHints, + deduplicatedDocs: newDocs, + docSourceTurns, + isNewConversation: false + }; + } + + deduplicateBatch( + requestIds: string[], + docsList: number[][], + parentRequestIds?: Array, + hintTemplate?: string + ): DeduplicationResult[] { + const effectiveParentRequestIds = + parentRequestIds ?? new Array(requestIds.length).fill(null); + + const results: DeduplicationResult[] = []; + const n = Math.min(requestIds.length, docsList.length, effectiveParentRequestIds.length); + + for (let i = 0; i < n; i += 1) { + const result = this.deduplicate( + requestIds[i], + docsList[i], + effectiveParentRequestIds[i], + hintTemplate + ); + results.push(result); + } + + return results; + } + + removeRequest(requestId: string): boolean { + if (this._requests.has(requestId)) { + this._requests.delete(requestId); + return true; + } + + return false; + } + + clearConversation(requestId: string): number { + const chain = this.getConversationChain(requestId); + let count = 0; + + for (const history of chain) { + if (this.removeRequest(history.requestId)) { + count += 1; + } + } + + return count; + } + + reset(): void { + this._requests.clear(); + this._stats = { + totalRequests: 0, + totalDedupCalls: 0, + totalDocsDeduplicated: 0 + }; + } + + getStats(): ConversationTrackerStats { + return { + ...this._stats, + activeRequests: this._requests.size + }; + } + + getRequestHistory(requestId: string): RequestHistory | null { + return this._requests.get(requestId) ?? null; + } +} diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts new file mode 100644 index 0000000..bb173f1 --- /dev/null +++ b/openclaw-plugin/src/engine/dedup.ts @@ -0,0 +1,378 @@ +import * as crypto from 'node:crypto'; + +export const MIN_BLOCK_CHARS = 80; +export const MIN_CONTENT_CHARS = 500; + +export const CHUNK_MODULUS = 13; +export const CHUNK_MIN_LINES = 5; +export const CHUNK_MAX_LINES = 40; + +export interface DedupResult { + blocksDeduped: number; + blocksTotal: number; + charsBefore: number; + charsAfter: number; + charsSaved: number; +} + +export interface DedupOptions { + minBlockChars?: number; + minContentChars?: number; + chunkModulus?: number; +} + +type SeenBlock = [number, string, number]; + +interface OpenAIToolCall { + id?: string; + function?: { + name?: string; + }; +} + +interface OpenAIAssistantMessage { + role?: string; + tool_calls?: OpenAIToolCall[]; +} + +interface OpenAIToolMessage { + role?: string; + content?: string; + tool_call_id?: string; + name?: string; +} + +interface ChatCompletionsBody { + messages?: OpenAIToolMessage[]; +} + +interface ResponsesFunctionCallItem { + type?: string; + call_id?: string; + name?: string; +} + +interface ResponsesFunctionCallOutputItem { + type?: string; + call_id?: string; + output?: string; +} + +interface ResponsesApiBody { + input?: ResponsesFunctionCallOutputItem[]; +} + +function emptyDedupResult(): DedupResult { + return { + blocksDeduped: 0, + blocksTotal: 0, + charsBefore: 0, + charsAfter: 0, + charsSaved: 0 + }; +} + +export function hashString(str: string): number { + let h = 5381; + for (let i = 0; i < str.length; i++) { + // Use Math.imul for safe 32-bit multiplication to avoid float overflow + h = (Math.imul(h, 33) + str.charCodeAt(i)) | 0; + } + return h >>> 0; +} + +export function buildToolNameMapOpenai(messages: OpenAIAssistantMessage[]): Record { + const mapping: Record = {}; + for (const msg of messages) { + if (!msg || typeof msg !== 'object' || msg.role !== 'assistant') { + continue; + } + + for (const tc of msg.tool_calls || []) { + if (!tc || typeof tc !== 'object') { + continue; + } + const tcId = tc.id || ''; + const fn = tc.function; + if (fn && typeof fn === 'object' && fn.name) { + mapping[tcId] = fn.name; + } + } + } + return mapping; +} + +export function buildToolNameMapResponses(items: ResponsesFunctionCallItem[]): Record { + const mapping: Record = {}; + for (const item of items) { + if (item && typeof item === 'object' && item.type === 'function_call') { + const callId = item.call_id || ''; + const name = item.name || ''; + if (callId && name) { + mapping[callId] = name; + } + } + } + return mapping; +} + +export function contentDefinedChunking( + text: string, + chunkModulus: number = CHUNK_MODULUS +): string[] { + const lines = text.split('\n'); + if (lines.length <= CHUNK_MIN_LINES) { + return [text]; + } + + const blocks: string[] = []; + let current: string[] = []; + + for (const line of lines) { + current.push(line); + const lineHash = hashString(line.trim()) & 0xFFFFFFFF; + const isBoundary = ( + lineHash % chunkModulus === 0 && current.length >= CHUNK_MIN_LINES + ) || current.length >= CHUNK_MAX_LINES; + + if (isBoundary) { + blocks.push(current.join('\n')); + current = []; + } + } + + if (current.length > 0) { + if (blocks.length > 0 && current.length < CHUNK_MIN_LINES) { + blocks[blocks.length - 1] += `\n${current.join('\n')}`; + } else { + blocks.push(current.join('\n')); + } + } + + return blocks; +} + +export function hashBlock(block: string): string { + const normalized = block.trim(); + return crypto.createHash('sha256').update(normalized, 'utf8').digest('hex').slice(0, 20); +} + +export function dedupChatCompletions(body: ChatCompletionsBody, opts: DedupOptions = {}): DedupResult { + const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS; + const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS; + const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS; + + const messages = body?.messages; + if (!Array.isArray(messages) || messages.length === 0) { + return emptyDedupResult(); + } + + const toolNames = buildToolNameMapOpenai(messages); + const seenBlocks = new Map(); + const result = emptyDedupResult(); + + for (let idx = 0; idx < messages.length; idx++) { + const msg = messages[idx]; + if (!msg || typeof msg !== 'object') { + continue; + } + // Support both OpenAI 'tool' role and OpenClaw 'toolResult' role + if (msg.role !== 'tool' && msg.role !== 'toolResult') { + continue; + } + + // For toolResult role, content might be an array of {type: "text", text: "..."} blocks + let content = msg.content || ''; + if (Array.isArray(content)) { + content = content + .filter((b: any) => b?.type === 'text') + .map((b: any) => b.text || '') + .join('\n'); + } + if (typeof content !== 'string' || content.length < minContentChars) { + continue; + } + + const toolCallId = msg.tool_call_id || ''; + const fnName = toolNames[toolCallId] || msg.name || 'tool'; + + const blocks = contentDefinedChunking(content, chunkModulus); + if (blocks.length < 2) { + for (const block of blocks) { + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, 0]); + } + } + } + continue; + } + + const newBlocks: string[] = []; + let dedupedInThis = 0; + + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length < minBlockChars) { + newBlocks.push(block); + continue; + } + + const h = hashBlock(block); + result.blocksTotal += 1; + + const seen = seenBlocks.get(h); + if (seen && seen[0] !== idx) { + const origFn = seen[1]; + const firstLine = block.trim().split('\n')[0].slice(0, 80); + const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`; + const charsSaved = block.length - ref.length; + if (charsSaved > 0) { + newBlocks.push(ref); + dedupedInThis += 1; + result.blocksDeduped += 1; + } else { + newBlocks.push(block); + } + } else { + if (!seen) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + newBlocks.push(block); + } + } + + if (dedupedInThis > 0) { + const originalLen = content.length; + const newContent = newBlocks.join('\n\n'); + + // Preserve original content format + if (Array.isArray(msg.content)) { + // For array content, update the first text block + const textBlockIdx = msg.content.findIndex((b: any) => b?.type === 'text'); + if (textBlockIdx >= 0) { + (msg.content as any[])[textBlockIdx].text = newContent; + } + } else { + msg.content = newContent; + } + + const newLen = newContent.length; + result.charsBefore += originalLen; + result.charsAfter += newLen; + result.charsSaved += (originalLen - newLen); + } else { + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + } + } + } + } + + return result; +} + +export function dedupResponsesApi(body: ResponsesApiBody, opts: DedupOptions = {}): DedupResult { + const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS; + const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS; + const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS; + + const inputItems = body?.input; + if (!Array.isArray(inputItems) || inputItems.length === 0) { + return emptyDedupResult(); + } + + const fnNames = buildToolNameMapResponses(inputItems); + const seenBlocks = new Map(); + const result = emptyDedupResult(); + + for (let idx = 0; idx < inputItems.length; idx++) { + const item = inputItems[idx]; + if (!item || typeof item !== 'object' || item.type !== 'function_call_output') { + continue; + } + + const output = item.output || ''; + if (typeof output !== 'string' || output.length < minContentChars) { + continue; + } + + const callId = item.call_id || ''; + const fnName = fnNames[callId] || callId || 'tool'; + + const blocks = contentDefinedChunking(output, chunkModulus); + if (blocks.length < 2) { + for (const block of blocks) { + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, 0]); + } + } + } + continue; + } + + const newBlocks: string[] = []; + let dedupedInThis = 0; + + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length < minBlockChars) { + newBlocks.push(block); + continue; + } + + const h = hashBlock(block); + result.blocksTotal += 1; + + const seen = seenBlocks.get(h); + if (seen && seen[0] !== idx) { + const origFn = seen[1]; + const firstLine = block.trim().split('\n')[0].slice(0, 80); + const ref = `[... "${firstLine}" — identical to earlier ${origFn} result, see above ...]`; + const charsSaved = block.length - ref.length; + if (charsSaved > 0) { + newBlocks.push(ref); + dedupedInThis += 1; + result.blocksDeduped += 1; + } else { + newBlocks.push(block); + } + } else { + if (!seen) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + newBlocks.push(block); + } + } + + if (dedupedInThis > 0) { + const originalLen = output.length; + const newOutput = newBlocks.join('\n\n'); + item.output = newOutput; + const newLen = newOutput.length; + result.charsBefore += originalLen; + result.charsAfter += newLen; + result.charsSaved += (originalLen - newLen); + } else { + for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) { + const block = blocks[blockIdx]; + if (block.trim().length >= minBlockChars) { + const h = hashBlock(block); + if (!seenBlocks.has(h)) { + seenBlocks.set(h, [idx, fnName, blockIdx]); + } + } + } + } + } + + return result; +} diff --git a/openclaw-plugin/src/engine/engine.test.ts b/openclaw-plugin/src/engine/engine.test.ts new file mode 100644 index 0000000..dcf0cab --- /dev/null +++ b/openclaw-plugin/src/engine/engine.test.ts @@ -0,0 +1,696 @@ +import { describe, expect, it } from "vitest"; +import { + injectAnthropicCacheControl, + injectCacheControl, + injectOpenAICacheControl, +} from "./cache-control.js"; +import { + buildToolNameMapOpenai, + contentDefinedChunking, + dedupChatCompletions, + dedupResponsesApi, + hashBlock, +} from "./dedup.js"; +import { + extractAllOpenai, + extractDocuments, + extractFromAnthropicMessages, + extractFromAnthropicToolResults, + extractFromOpenaiChat, + extractFromOpenaiToolResults, + extractSingleDocsFromOpenaiToolResults, + getFormatHandler, + parseInterceptHeaders, + reconstructAnthropicToolResult, + reconstructContent, + reconstructOpenaiToolResult, +} from "./extract.js"; +import { ReorderState, reorderDocuments } from "./reorder.js"; + +const DEFAULT_CONFIG = parseInterceptHeaders({}); + +const OPENAI_CHAT_BODY = { + model: "claude-sonnet-4-6", + messages: [ + { + role: "system", + content: + "Doc A content hereDoc B content hereDoc C content here", + }, + { role: "user", content: "What do these docs say?" }, + ], +}; + +const ANTHROPIC_MESSAGES_BODY = { + model: "claude-sonnet-4-6", + system: + "Doc A content hereDoc B content here", + messages: [{ role: "user", content: "Summarize the documents." }], +}; + +const LARGE_CONTENT = "x".repeat(600) + "\n".repeat(20) + "y".repeat(600); + +const DEDUP_BODY = { + messages: [ + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_1", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", function: { name: "read_file", arguments: "{}" } }, + ], + }, + { role: "tool", tool_call_id: "call_1", content: LARGE_CONTENT }, + { role: "tool", tool_call_id: "call_2", content: LARGE_CONTENT }, + ], +}; + +function makeLargeContent(prefix: string): string { + return Array.from( + { length: 20 }, + (_, i) => `${prefix} line ${i} ${"z".repeat(60)}`, + ).join("\n"); +} + +describe("extract", () => { + it("parseInterceptHeaders parses X-ContextPilot-* headers and defaults", () => { + const parsed = parseInterceptHeaders({ + "X-ContextPilot-Enabled": "0", + "x-contextpilot-mode": "xml_tag", + "x-contextpilot-tag": "context", + "x-contextpilot-separator": "===", + "x-contextpilot-alpha": "0.05", + "x-contextpilot-linkage": "single", + "x-contextpilot-scope": "invalid", + }); + + expect(parsed).toEqual({ + enabled: false, + mode: "xml_tag", + tag: "context", + separator: "===", + alpha: 0.05, + linkageMethod: "single", + scope: "all", + }); + + const defaults = parseInterceptHeaders({}); + expect(defaults.enabled).toBe(true); + expect(defaults.mode).toBe("auto"); + expect(defaults.tag).toBe("document"); + expect(defaults.separator).toBe("---"); + expect(defaults.alpha).toBe(0.001); + expect(defaults.linkageMethod).toBe("average"); + expect(defaults.scope).toBe("all"); + }); + + it("extractDocuments extracts XML-tagged documents", () => { + const text = + "AB"; + const extraction = extractDocuments(text, DEFAULT_CONFIG); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("xml_tag"); + expect(extraction?.documents).toEqual(["A", "B"]); + expect(extraction?.wrapperTag).toBe("documents"); + expect(extraction?.itemTag).toBe("document"); + }); + + it("extractDocuments extracts numbered documents", () => { + const extraction = extractDocuments( + "[1] First doc\n[2] Second doc", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("numbered"); + expect(extraction?.documents).toEqual(["First doc", "Second doc"]); + }); + + it("extractDocuments extracts JSON results documents", () => { + const extraction = extractDocuments( + JSON.stringify({ results: [{ url: "a.com" }, { url: "b.com" }] }), + parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }), + ); + expect(extraction).not.toBeNull(); + expect(extraction?.mode).toBe("json_results"); + expect(extraction?.documents).toEqual(["a.com", "b.com"]); + }); + + it("extractDocuments auto mode resolves XML > numbered > JSON", () => { + const xml = extractDocuments( + "[1] one[2] two", + DEFAULT_CONFIG, + ); + expect(xml?.mode).toBe("xml_tag"); + + const numbered = extractDocuments("[1] one\n[2] two", DEFAULT_CONFIG); + expect(numbered?.mode).toBe("numbered"); + + const json = extractDocuments( + JSON.stringify({ results: [{ url: "one" }, { url: "two" }] }), + DEFAULT_CONFIG, + ); + expect(json?.mode).toBe("json_results"); + }); + + it("extractDocuments returns null for fewer than two docs", () => { + const numberedSingle = extractDocuments( + "[1] Only one", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(numberedSingle).toBeNull(); + + const jsonSingle = extractDocuments( + JSON.stringify({ results: [{ url: "only-one" }] }), + parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }), + ); + expect(jsonSingle).toBeNull(); + }); + + it("reconstructContent rebuilds XML while preserving tags", () => { + const extraction = extractDocuments( + "prefixABsuffix", + DEFAULT_CONFIG, + ); + expect(extraction).not.toBeNull(); + if (!extraction) { + throw new Error("expected extraction"); + } + + const rebuilt = reconstructContent(extraction, ["B", "A"]); + expect(rebuilt).toContain("prefix"); + expect(rebuilt).toContain("suffix"); + expect(rebuilt).toContain(""); + expect(rebuilt).toContain("B"); + expect(rebuilt).toContain("A"); + }); + + it("reconstructContent rebuilds numbered format", () => { + const extraction = extractDocuments( + "Lead\n[1] First\n[2] Second", + parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }), + ); + expect(extraction).not.toBeNull(); + if (!extraction) { + throw new Error("expected extraction"); + } + + const rebuilt = reconstructContent(extraction, ["Second", "First"]); + expect(rebuilt).toContain("Lead"); + expect(rebuilt).toContain("[1] Second"); + expect(rebuilt).toContain("[2] First"); + }); + + it("extractFromOpenaiChat extracts from system message", () => { + const extraction = extractFromOpenaiChat(OPENAI_CHAT_BODY, DEFAULT_CONFIG); + expect(extraction).not.toBeNull(); + expect(extraction?.[1]).toBe(0); + expect(extraction?.[0].documents).toEqual([ + "Doc A content here", + "Doc B content here", + "Doc C content here", + ]); + }); + + it("extractFromAnthropicMessages extracts from system string", () => { + const extraction = extractFromAnthropicMessages( + ANTHROPIC_MESSAGES_BODY, + DEFAULT_CONFIG, + ); + expect(extraction).not.toBeNull(); + expect(extraction?.documents).toEqual([ + "Doc A content here", + "Doc B content here", + ]); + }); + + it("extractFromOpenaiToolResults extracts tool-result documents", () => { + const body = { + messages: [ + { role: "tool", content: "AB" }, + ], + }; + const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + expect(extractions[0]?.[0].documents).toEqual(["A", "B"]); + expect(extractions[0]?.[1]).toEqual({ + msgIndex: 0, + blockIndex: -1, + innerBlockIndex: -1, + }); + }); + + it("extractFromAnthropicToolResults extracts tool_result blocks", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { + type: "tool_result", + content: + "AB", + }, + ], + }, + ], + }; + const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + expect(extractions[0]?.[0].documents).toEqual(["A", "B"]); + expect(extractions[0]?.[1]).toEqual({ + msgIndex: 0, + blockIndex: 0, + innerBlockIndex: -1, + }); + }); + + it("FormatHandler OpenAI returns a working handler", () => { + const handler = getFormatHandler("openai_chat"); + expect(handler.targetPath()).toBe("/v1/chat/completions"); + + const body = structuredClone(OPENAI_CHAT_BODY); + const all = handler.extractAll(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.hasExtractions).toBe(true); + + if (!all.systemExtraction) { + throw new Error("expected system extraction"); + } + + handler.reconstructSystem( + body, + all.systemExtraction[0], + ["Doc C content here", "Doc B content here", "Doc A content here"], + all.systemExtraction[1], + ); + expect(body.messages[0]?.content).toContain("Doc C content here"); + }); + + it("FormatHandler Anthropic returns a working handler", () => { + const handler = getFormatHandler("anthropic_messages"); + expect(handler.targetPath()).toBe("/v1/messages"); + + const body = structuredClone(ANTHROPIC_MESSAGES_BODY); + const all = handler.extractAll(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.hasExtractions).toBe(true); + + if (!all.systemExtraction) { + throw new Error("expected system extraction"); + } + + handler.reconstructSystem( + body, + all.systemExtraction[0], + ["Doc B content here", "Doc A content here"], + all.systemExtraction[1], + ); + expect(body.system).toContain("Doc B content here"); + }); + + it("extractAllOpenai extracts from both system and tool results", () => { + const body = { + messages: [ + { + role: "system", + content: + "Sys ASys B", + }, + { + role: "tool", + content: + "Tool ATool B", + }, + ], + }; + + const all = extractAllOpenai(body, DEFAULT_CONFIG); + expect(all.systemExtraction).not.toBeNull(); + expect(all.toolExtractions).toHaveLength(1); + expect(all.totalDocuments).toBe(4); + }); + + it("extractSingleDocsFromOpenaiToolResults extracts single long docs", () => { + const body = { + messages: [ + { + role: "tool", + tool_call_id: "call_99", + content: `Result:\n${"r".repeat(240)}`, + }, + ], + }; + + const extracted = extractSingleDocsFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extracted).toHaveLength(1); + expect(extracted[0]?.[0].toolCallId).toBe("call_99"); + expect(extracted[0]?.[0].content.length).toBeGreaterThanOrEqual(200); + expect(extracted[0]?.[0].contentHash).toMatch(/^[0-9a-f]{64}$/); + }); + + it("reconstructOpenaiToolResult reconstructs a tool result in-place", () => { + const body = { + messages: [ + { + role: "tool", + content: + "AB", + }, + ], + }; + + const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + const first = extractions[0]; + if (!first) { + throw new Error("expected extraction"); + } + + reconstructOpenaiToolResult(body, first[0], ["B", "A"], first[1]); + expect(body.messages[0]?.content).toContain("B"); + expect(body.messages[0]?.content).toContain("A"); + }); + + it("reconstructAnthropicToolResult reconstructs a tool result in-place", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { + type: "tool_result", + content: + "AB", + }, + ], + }, + ], + }; + + const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG); + expect(extractions).toHaveLength(1); + const first = extractions[0]; + if (!first) { + throw new Error("expected extraction"); + } + + reconstructAnthropicToolResult(body, first[0], ["B", "A"], first[1]); + expect(body.messages[0]?.content[0]?.content).toContain("B"); + expect(body.messages[0]?.content[0]?.content).toContain("A"); + }); +}); + +describe("dedup", () => { + it("contentDefinedChunking splits text into multiple blocks at boundaries", () => { + const text = Array.from({ length: 12 }, (_, i) => `line-${i}`).join("\n"); + const blocks = contentDefinedChunking(text, 1); + expect(blocks).toHaveLength(2); + expect(blocks[0]?.split("\n")).toHaveLength(5); + expect(blocks[1]?.split("\n")).toHaveLength(7); + }); + + it("contentDefinedChunking returns one block for short text", () => { + const short = "a\nb\nc\nd\ne"; + const blocks = contentDefinedChunking(short); + expect(blocks).toEqual([short]); + }); + + it("hashBlock is consistent and returns 20-char hex", () => { + const h1 = hashBlock(" abc\n"); + const h2 = hashBlock("abc"); + expect(h1).toBe(h2); + expect(h1).toMatch(/^[0-9a-f]{20}$/); + }); + + it("dedupChatCompletions returns zero savings with no duplicates", () => { + const body = { + messages: [ + { + role: "assistant", + tool_calls: [ + { id: "a", function: { name: "read_file" } }, + { id: "b", function: { name: "read_file" } }, + ], + }, + { role: "tool", tool_call_id: "a", content: makeLargeContent("first") }, + { role: "tool", tool_call_id: "b", content: makeLargeContent("second") }, + ], + }; + + const before = body.messages[2]?.content; + const result = dedupChatCompletions(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBe(0); + expect(result.charsSaved).toBe(0); + expect(body.messages[2]?.content).toBe(before); + }); + + it("dedupChatCompletions dedups duplicate blocks and inserts references", () => { + const body = structuredClone(DEDUP_BODY); + const result = dedupChatCompletions(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + expect(body.messages[2]?.content).toContain( + "identical to earlier read_file result", + ); + }); + + it("dedupChatCompletions skips short content", () => { + const short = "s".repeat(300); + const body = { + messages: [ + { + role: "assistant", + tool_calls: [ + { id: "a", function: { name: "search" } }, + { id: "b", function: { name: "search" } }, + ], + }, + { role: "tool", tool_call_id: "a", content: short }, + { role: "tool", tool_call_id: "b", content: short }, + ], + }; + + const result = dedupChatCompletions(body); + expect(result.blocksTotal).toBe(0); + expect(result.blocksDeduped).toBe(0); + expect(result.charsSaved).toBe(0); + expect(body.messages[2]?.content).toBe(short); + }); + + it("dedupResponsesApi dedups duplicate function_call_output content", () => { + const body = { + input: [ + { type: "function_call", call_id: "r1", name: "search" }, + { type: "function_call", call_id: "r2", name: "search" }, + { type: "function_call_output", call_id: "r1", output: LARGE_CONTENT }, + { type: "function_call_output", call_id: "r2", output: LARGE_CONTENT }, + ], + }; + + const result = dedupResponsesApi(body, { chunkModulus: 1 }); + expect(result.blocksDeduped).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + expect(body.input[3]?.output).toContain("identical to earlier search result"); + }); + + it("buildToolNameMapOpenai maps tool_call_id to function name", () => { + const mapping = buildToolNameMapOpenai([ + { + role: "assistant", + tool_calls: [ + { id: "id_1", function: { name: "read_file" } }, + { id: "id_2", function: { name: "search" } }, + ], + }, + { role: "user" }, + ]); + + expect(mapping).toEqual({ id_1: "read_file", id_2: "search" }); + }); +}); + +describe("cache-control", () => { + it("injectAnthropicCacheControl converts string system into array with cache_control", () => { + const body: Record = { system: "system text", messages: [] }; + const result = injectAnthropicCacheControl(body); + + const system = result.system as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + expect(Array.isArray(system)).toBe(true); + expect(system[0]).toEqual({ + type: "text", + text: "system text", + cache_control: { type: "ephemeral" }, + }); + }); + + it("injectAnthropicCacheControl adds cache_control to last system block", () => { + const body: Record = { + system: [ + { type: "text", text: "first" }, + { type: "text", text: "last" }, + ], + messages: [], + }; + const result = injectAnthropicCacheControl(body); + const system = result.system as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + + expect(system[0]?.cache_control).toBeUndefined(); + expect(system[1]?.cache_control).toEqual({ type: "ephemeral" }); + }); + + it("injectAnthropicCacheControl adds cache_control to large tool_result blocks", () => { + const body: Record = { + messages: [ + { + role: "user", + content: [ + { type: "tool_result", content: "x".repeat(1200) }, + { + type: "tool_result", + content: [ + { type: "text", text: "a".repeat(800) }, + { type: "text", text: "b".repeat(300) }, + ], + }, + ], + }, + ], + }; + + const result = injectAnthropicCacheControl(body); + const messages = result.messages as Array<{ + role?: string; + content?: Array<{ + type?: string; + content?: string | Array<{ type?: string; text?: string; cache_control?: { type: string } }>; + cache_control?: { type: string }; + }>; + }>; + + const firstToolResult = messages[0]?.content?.[0]; + const secondToolResult = messages[0]?.content?.[1]; + const secondInner = secondToolResult?.content as Array<{ + type?: string; + text?: string; + cache_control?: { type: string }; + }>; + + expect(firstToolResult?.cache_control).toEqual({ type: "ephemeral" }); + expect(secondInner[0]?.cache_control).toBeUndefined(); + expect(secondInner[1]?.cache_control).toEqual({ type: "ephemeral" }); + }); + + it("injectAnthropicCacheControl does not mutate original body", () => { + const body: Record = { + system: "immutable", + messages: [{ role: "user", content: [] }], + }; + const snapshot = structuredClone(body); + const result = injectAnthropicCacheControl(body); + + expect(body).toEqual(snapshot); + expect(result).not.toBe(body); + }); + + it("injectOpenAICacheControl is a no-op", () => { + const body: Record = { + messages: [{ role: "system", content: "keep" }], + }; + const result = injectOpenAICacheControl(body); + expect(result).toBe(body); + }); + + it("injectCacheControl dispatches by provider", () => { + const anthropicBody: Record = { system: "hello", messages: [] }; + const openaiBody: Record = { messages: [] }; + + const anthropicResult = injectCacheControl(anthropicBody, "anthropic"); + const openaiResult = injectCacheControl(openaiBody, "openai"); + + expect(anthropicResult).not.toBe(anthropicBody); + expect(Array.isArray(anthropicResult.system)).toBe(true); + expect(openaiResult).toBe(openaiBody); + }); +}); + +describe("reorder", () => { + it("ReorderState first call matches deterministic hash sort", () => { + const docs = ["Doc C", "Doc A", "Doc B"]; + const state = new ReorderState(); + const [stateOrder] = state.reorder(docs); + const [statelessOrder] = reorderDocuments(docs); + expect(stateOrder).toEqual(statelessOrder); + }); + + it("ReorderState second call keeps known order and appends new docs", () => { + const state = new ReorderState(); + const [first] = state.reorder(["alpha", "beta", "gamma"]); + const [second] = state.reorder(["gamma", "alpha", "delta"]); + + const knownOrder = first.filter((doc) => doc === "gamma" || doc === "alpha"); + expect(second.slice(0, knownOrder.length)).toEqual(knownOrder); + expect(second[second.length - 1]).toBe("delta"); + }); + + it("ReorderState reset restores first-call behavior", () => { + const docs = ["alpha", "beta", "gamma"]; + const state = new ReorderState(); + + state.reorder(docs); + state.reorder(["gamma", "alpha", "delta"]); + state.reset(); + + const [afterReset] = state.reorder(docs); + const [expected] = reorderDocuments(docs); + expect(afterReset).toEqual(expected); + }); + + it("reorderDocuments is deterministic and stateless", () => { + const docs = ["one", "two", "three", "four"]; + const first = reorderDocuments(docs); + const second = reorderDocuments(docs); + expect(first).toEqual(second); + }); + + it("reorderDocuments returns correct originalOrder and newOrder mappings", () => { + const docs = ["one", "two", "three", "four"]; + const [reordered, originalOrder, newOrder] = reorderDocuments(docs); + + expect(originalOrder).toHaveLength(docs.length); + expect(newOrder).toHaveLength(docs.length); + + for (let newIndex = 0; newIndex < reordered.length; newIndex += 1) { + const originalIndex = originalOrder[newIndex]; + expect(reordered[newIndex]).toBe(docs[originalIndex]); + } + + for (let originalIndex = 0; originalIndex < docs.length; originalIndex += 1) { + const mappedNewIndex = newOrder[originalIndex]; + expect(reordered[mappedNewIndex]).toBe(docs[originalIndex]); + } + }); + + it("ReorderState preserves known-doc prefix stability across calls", () => { + const state = new ReorderState(); + const knownDocs = ["alpha", "beta", "gamma"]; + + const [first] = state.reorder(knownDocs); + const [second] = state.reorder(["gamma", "beta", "alpha", "delta"]); + const [third] = state.reorder(["alpha", "epsilon", "gamma", "beta", "zeta"]); + + const knownPrefix = first.filter((doc) => + knownDocs.includes(doc), + ); + + expect(second.slice(0, knownPrefix.length)).toEqual(knownPrefix); + expect(third.slice(0, knownPrefix.length)).toEqual(knownPrefix); + }); +}); diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts new file mode 100644 index 0000000..2c61c43 --- /dev/null +++ b/openclaw-plugin/src/engine/eviction-heap.ts @@ -0,0 +1,315 @@ +import type { NodeMetadata } from "./metadata.js"; + +type HeapEntry = [number, number]; + +export interface EvictionHeapStats { + size: number; + total_tokens: number; + max_tokens: number | null; + utilization_pct: number; + avg_tokens_per_node: number; + oldest_access_time: number | null; + newest_access_time: number | null; + num_requests: number; +} + +export class EvictionHeap { + private _heap: HeapEntry[]; + private _metadata: Map; + private _requestToNode: Map; + private _inHeap: Map; + private _maxTokens: number | null; + private _totalTokens: number; + + constructor(maxTokens?: number | null) { + this._heap = []; + this._metadata = new Map(); + this._requestToNode = new Map(); + this._inHeap = new Map(); + this._maxTokens = maxTokens ?? null; + this._totalTokens = 0; + } + + get maxTokens(): number | null { + return this._maxTokens; + } + + set maxTokens(value: number | null) { + this._maxTokens = value; + } + + private _compare(a: HeapEntry, b: HeapEntry): number { + if (a[0] !== b[0]) { + return a[0] - b[0]; + } + return a[1] - b[1]; + } + + private _swap(i: number, j: number): void { + const tmp = this._heap[i]; + this._heap[i] = this._heap[j]; + this._heap[j] = tmp; + } + + private _siftUp(index: number): void { + let current = index; + + while (current > 0) { + const parent = Math.floor((current - 1) / 2); + if (this._compare(this._heap[current], this._heap[parent]) >= 0) { + break; + } + + this._swap(current, parent); + current = parent; + } + } + + private _siftDown(index: number): void { + const n = this._heap.length; + let current = index; + + while (true) { + const left = 2 * current + 1; + const right = 2 * current + 2; + let smallest = current; + + if (left < n && this._compare(this._heap[left], this._heap[smallest]) < 0) { + smallest = left; + } + + if (right < n && this._compare(this._heap[right], this._heap[smallest]) < 0) { + smallest = right; + } + + if (smallest === current) { + break; + } + + this._swap(current, smallest); + current = smallest; + } + } + + private _heapPush(entry: HeapEntry): void { + this._heap.push(entry); + this._siftUp(this._heap.length - 1); + } + + private _heapPop(): HeapEntry | null { + if (this._heap.length === 0) { + return null; + } + + if (this._heap.length === 1) { + return this._heap.pop() ?? null; + } + + const min = this._heap[0]; + const last = this._heap.pop(); + if (last !== undefined) { + this._heap[0] = last; + this._siftDown(0); + } + return min; + } + + push(metadata: NodeMetadata): void { + const nodeId = metadata.nodeId; + + if (this._inHeap.get(nodeId) === true) { + const oldMetadata = this._metadata.get(nodeId); + if (oldMetadata) { + this._totalTokens += metadata.extraTokens - oldMetadata.extraTokens; + } + this._metadata.set(nodeId, metadata); + this.updateAccessTime(nodeId, metadata.lastAccessTime); + return; + } + + this._heapPush([metadata.lastAccessTime, nodeId]); + this._metadata.set(nodeId, metadata); + this._inHeap.set(nodeId, true); + this._totalTokens += metadata.extraTokens; + + if (metadata.requestId) { + this._requestToNode.set(metadata.requestId, nodeId); + } + } + + pop(): NodeMetadata | null { + while (this._heap.length > 0) { + const entry = this._heapPop(); + if (entry === null) { + return null; + } + + const [accessTime, nodeId] = entry; + + if (!this._metadata.has(nodeId)) { + continue; + } + + const metadata = this._metadata.get(nodeId); + if (!metadata) { + continue; + } + + if (metadata.lastAccessTime === accessTime) { + this._inHeap.set(nodeId, false); + this._totalTokens -= metadata.extraTokens; + return metadata; + } + } + + return null; + } + + peek(): NodeMetadata | null { + while (this._heap.length > 0) { + const [accessTime, nodeId] = this._heap[0]; + + if (!this._metadata.has(nodeId)) { + this._heapPop(); + continue; + } + + const metadata = this._metadata.get(nodeId); + if (!metadata) { + this._heapPop(); + continue; + } + + if (metadata.lastAccessTime === accessTime) { + return metadata; + } + + this._heapPop(); + } + + return null; + } + + updateAccessTime(nodeId: number, newTime?: number): void { + const metadata = this._metadata.get(nodeId); + if (!metadata) { + return; + } + + const effectiveTime = newTime ?? Date.now() / 1000; + metadata.lastAccessTime = effectiveTime; + + this._heapPush([effectiveTime, nodeId]); + } + + remove(nodeId: number): void { + const metadata = this._metadata.get(nodeId); + + if (metadata) { + this._totalTokens -= metadata.extraTokens; + + if (metadata.requestId) { + this._requestToNode.delete(metadata.requestId); + } + + this._metadata.delete(nodeId); + } + + this._inHeap.delete(nodeId); + } + + getNodeByRequestId(requestId: string): NodeMetadata | null { + const nodeId = this._requestToNode.get(requestId); + if (nodeId !== undefined) { + return this._metadata.get(nodeId) ?? null; + } + return null; + } + + updateTokensForRequest(requestId: string, inputTokens: number, outputTokens: number): boolean { + const metadata = this.getNodeByRequestId(requestId); + if (metadata === null) { + return false; + } + + const oldTokens = metadata.totalTokens; + const totalNew = inputTokens + outputTokens; + const delta = totalNew - oldTokens; + + metadata.totalTokens = totalNew; + metadata.extraTokens = Math.max(0, metadata.extraTokens + delta); + metadata.updateAccessTime(); + + this._totalTokens += delta; + this._heapPush([metadata.lastAccessTime, metadata.nodeId]); + + return true; + } + + needsEviction(): boolean { + if (this._maxTokens === null) { + return false; + } + return this._totalTokens > this._maxTokens; + } + + tokensToEvict(): number { + if (this._maxTokens === null || this._totalTokens <= this._maxTokens) { + return 0; + } + return this._totalTokens - this._maxTokens; + } + + getMetadata(nodeId: number): NodeMetadata | null { + return this._metadata.get(nodeId) ?? null; + } + + isEmpty(): boolean { + return this.peek() === null; + } + + size(): number { + return this._metadata.size; + } + + totalTokens(): number { + return this._totalTokens; + } + + getAllRequestIds(): Set { + return new Set(this._requestToNode.keys()); + } + + getStats(): EvictionHeapStats { + if (this._metadata.size === 0) { + return { + size: 0, + total_tokens: 0, + max_tokens: this._maxTokens, + utilization_pct: 0, + avg_tokens_per_node: 0, + oldest_access_time: null, + newest_access_time: null, + num_requests: 0 + }; + } + + const accessTimes = Array.from(this._metadata.values(), (m) => m.lastAccessTime); + const utilization = this._maxTokens ? (this._totalTokens / this._maxTokens) * 100 : 0; + + return { + size: this._metadata.size, + total_tokens: this._totalTokens, + max_tokens: this._maxTokens, + utilization_pct: utilization, + avg_tokens_per_node: this._totalTokens / this._metadata.size, + oldest_access_time: Math.min(...accessTimes), + newest_access_time: Math.max(...accessTimes), + num_requests: this._requestToNode.size + }; + } + + toString(): string { + return `EvictionHeap(size=${this._metadata.size}, total_tokens=${this._totalTokens}, max_tokens=${this._maxTokens})`; + } +} diff --git a/openclaw-plugin/src/engine/extract.ts b/openclaw-plugin/src/engine/extract.ts new file mode 100644 index 0000000..10b8c67 --- /dev/null +++ b/openclaw-plugin/src/engine/extract.ts @@ -0,0 +1,969 @@ +import * as crypto from 'crypto'; + +/** + * HTTP Intercept Parser for ContextPilot + * + * Pure parsing/extraction/reconstruction logic for intercepting LLM API requests. + * Extracts documents from system messages, supports reordering, and reconstructs + * the request body with reordered documents. + * + * No server dependencies — independently testable. + */ + +const _KNOWN_WRAPPER_TAGS = new Set(["documents", "contexts", "docs", "passages", "references", "files"]); +const _KNOWN_ITEM_TAGS = new Set(["document", "context", "doc", "passage", "reference", "file"]); + +const _NUMBERED_RE = /\[(\d+)\]\s*/; +const _SEPARATOR_PATTERNS = ["---", "==="]; +const _SINGLE_DOC_MIN_CHARS = 200; + +export interface InterceptConfig { + enabled: boolean; + mode: string; + tag: string; + separator: string; + alpha: number; + linkageMethod: string; + scope: string; +} + +export interface ExtractionResult { + documents: string[]; + prefix: string; + suffix: string; + mode: string; + wrapperTag: string; + itemTag: string; + separatorChar: string; + originalContent: string; + jsonItems: any[] | null; +} + +export interface ToolResultLocation { + msgIndex: number; + blockIndex: number; // -1 = content is string + innerBlockIndex: number; // For Anthropic nested content blocks +} + +export interface SingleDocExtraction { + content: string; + contentHash: string; + toolCallId: string; +} + +export class MultiExtractionResult { + systemExtraction: [ExtractionResult, number] | null = null; + toolExtractions: [ExtractionResult, ToolResultLocation][] = []; + singleDocExtractions: [SingleDocExtraction, ToolResultLocation][] = []; + + get hasExtractions(): boolean { + return ( + this.systemExtraction !== null || + this.toolExtractions.length > 0 || + this.singleDocExtractions.length > 0 + ); + } + + get totalDocuments(): number { + let total = this.singleDocExtractions.length; + if (this.systemExtraction) { + total += this.systemExtraction[0].documents.length; + } + for (const [ext, _] of this.toolExtractions) { + total += ext.documents.length; + } + return total; + } +} + +/** + * Parse X-ContextPilot-* headers into an InterceptConfig. + */ +export function parseInterceptHeaders(headers: Record): InterceptConfig { + const get = (name: string, def: string = ""): string => { + const key = `x-contextpilot-${name}`; + for (const [k, v] of Object.entries(headers)) { + if (k.toLowerCase() === key) { + return v; + } + } + return def; + }; + + const enabledStr = get("enabled", "true").toLowerCase(); + const enabled = !["false", "0", "no"].includes(enabledStr); + + let scope = get("scope", "all").toLowerCase(); + if (!["system", "tool_results", "all"].includes(scope)) { + scope = "all"; + } + + return { + enabled, + mode: get("mode", "auto").toLowerCase(), + tag: get("tag", "document").toLowerCase(), + separator: get("separator", "---"), + alpha: parseFloat(get("alpha", "0.001")) || 0.001, + linkageMethod: get("linkage", "average"), + scope + }; +} + +// ── Document extraction ───────────────────────────────────────────────────── + +function _escapeRegExp(string: string): string { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +export function extractXmlTags(text: string, config: InterceptConfig): ExtractionResult | null { + let itemTagsToTry: string[] = []; + let wrapperTagsToTry: string[] = []; + + if (config.mode === "xml_tag") { + itemTagsToTry.push(config.tag); + wrapperTagsToTry.push(config.tag + "s"); + for (const t of _KNOWN_ITEM_TAGS) { + if (t !== config.tag) itemTagsToTry.push(t); + } + for (const t of _KNOWN_WRAPPER_TAGS) { + if (t !== config.tag + "s") wrapperTagsToTry.push(t); + } + } else { + itemTagsToTry = Array.from(_KNOWN_ITEM_TAGS); + wrapperTagsToTry = Array.from(_KNOWN_WRAPPER_TAGS); + } + + for (const wrapperTag of wrapperTagsToTry) { + const wrapperPattern = new RegExp(`(<${wrapperTag}(?:\\s[^>]*)?>)(.*?)()`, "s"); + const wrapperMatch = wrapperPattern.exec(text); + if (!wrapperMatch) continue; + + const innerText = wrapperMatch[2]; + const prefix = text.substring(0, wrapperMatch.index); + const suffix = text.substring(wrapperMatch.index + wrapperMatch[0].length); + + for (const itemTag of itemTagsToTry) { + const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)()`, "gs"); + let items: string[] = []; + while (true) { + const itemMatch = itemPattern.exec(innerText); + if (itemMatch === null) break; + items.push(itemMatch[2].trim()); + } + if (items.length > 0) { + return { + documents: items, + prefix, + suffix, + mode: "xml_tag", + wrapperTag, + itemTag, + separatorChar: "", + originalContent: text, + jsonItems: null + }; + } + } + } + + for (const itemTag of itemTagsToTry) { + const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)()`, "gs"); + const items: RegExpExecArray[] = []; + while (true) { + const match = itemPattern.exec(text); + if (match === null) break; + items.push(match); + } + + if (items.length >= 2) { + const firstStart = items[0].index; + const lastEnd = items[items.length - 1].index + items[items.length - 1][0].length; + return { + documents: items.map(m => m[2].trim()), + prefix: text.substring(0, firstStart), + suffix: text.substring(lastEnd), + mode: "xml_tag", + wrapperTag: "", + itemTag, + separatorChar: "", + originalContent: text, + jsonItems: null + }; + } + } + + return null; +} + +export function extractNumbered(text: string, config: InterceptConfig): ExtractionResult | null { + const splits = text.split(_NUMBERED_RE); + if (splits.length < 4) { + return null; + } + + const prefix = splits[0]; + const documents: string[] = []; + let i = 1; + while (i + 1 < splits.length) { + const docText = splits[i + 1].trim(); + if (docText) { + documents.push(docText); + } + i += 2; + } + + if (documents.length < 2) return null; + + return { + documents, + prefix, + suffix: "", + mode: "numbered", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: null + }; +} + +export function extractSeparator(text: string, config: InterceptConfig): ExtractionResult | null { + let sep = config.separator; + let parts: string[] = []; + let documents: string[] = []; + + if (config.mode === "auto") { + let found = false; + for (const candidate of _SEPARATOR_PATTERNS) { + const regex = new RegExp(`\\n${_escapeRegExp(candidate)}\\n`); + parts = text.split(regex); + if (parts.length >= 3) { + sep = candidate; + found = true; + break; + } + } + if (!found) return null; + documents = parts.map(p => p.trim()).filter(p => p); + } else { + const regex = new RegExp(`\\n${_escapeRegExp(sep)}\\n`); + parts = text.split(regex); + documents = parts.map(p => p.trim()).filter(p => p); + } + + if (documents.length < 2) return null; + + return { + documents, + prefix: "", + suffix: "", + mode: "separator", + wrapperTag: "", + itemTag: "", + separatorChar: sep, + originalContent: text, + jsonItems: null + }; +} + +export function extractMarkdownHeaders(text: string, config: InterceptConfig): ExtractionResult | null { + const parts = text.split(/(?=^#{1,2}\s)/m); + if (!parts || parts.length === 0) return null; + + let prefix = ""; + const sections: string[] = []; + + for (const part of parts) { + const stripped = part.trim(); + if (!stripped) continue; + + if (/^#{1,2}\s/.test(stripped)) { + sections.push(stripped); + } else { + prefix = part; + } + } + + if (sections.length < 2) return null; + + return { + documents: sections, + prefix, + suffix: "", + mode: "markdown_header", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: null + }; +} + +const _JSON_ID_KEYS = ["url", "path", "file", "filename", "uri", "href"]; + +function _extractJsonId(item: any): string | null { + for (const key of _JSON_ID_KEYS) { + if (item && typeof item === "object" && key in item) { + const val = item[key]; + if (typeof val === "string" && val.trim()) { + return val.trim(); + } + } + } + return null; +} + +export function extractJsonResults(text: string, config: InterceptConfig): ExtractionResult | null { + const stripped = text.trim(); + if (!stripped.startsWith("{")) return null; + + let obj: any; + try { + obj = JSON.parse(stripped); + } catch (e) { + return null; + } + + if (typeof obj !== "object" || obj === null) return null; + + const results = obj.results; + if (!Array.isArray(results) || results.length < 2) return null; + + const documents: string[] = []; + for (const item of results) { + if (typeof item === "object" && item !== null) { + const docId = _extractJsonId(item); + if (docId !== null) { + documents.push(docId); + } else { + documents.push(JSON.stringify(item)); + } + } else { + documents.push(JSON.stringify(item)); + } + } + + if (documents.length < 2) return null; + + return { + documents, + prefix: "", + suffix: "", + mode: "json_results", + wrapperTag: "", + itemTag: "", + separatorChar: "", + originalContent: text, + jsonItems: results + }; +} + +export function extractDocuments(text: string, config: InterceptConfig): ExtractionResult | null { + if (config.mode === "xml_tag") { + return extractXmlTags(text, config); + } else if (config.mode === "numbered") { + return extractNumbered(text, config); + } else if (config.mode === "json_results") { + return extractJsonResults(text, config); + } else if (config.mode === "separator") { + return extractSeparator(text, config); + } else if (config.mode === "markdown_header") { + return extractMarkdownHeaders(text, config); + } else { + let result = extractXmlTags(text, config); + if (result) return result; + result = extractNumbered(text, config); + if (result) return result; + result = extractJsonResults(text, config); + if (result) return result; + return null; + } +} + +// ── Reconstruction ─────────────────────────────────────────────────────────── + +export function reconstructContent(extraction: ExtractionResult, reorderedDocs: string[]): string { + if (extraction.mode === "xml_tag") { + return reconstructXml(extraction, reorderedDocs); + } else if (extraction.mode === "numbered") { + return reconstructNumbered(extraction, reorderedDocs); + } else if (extraction.mode === "json_results") { + return reconstructJsonResults(extraction, reorderedDocs); + } else if (extraction.mode === "separator") { + return reconstructSeparator(extraction, reorderedDocs); + } else if (extraction.mode === "markdown_header") { + return reconstructMarkdownHeaders(extraction, reorderedDocs); + } else { + return extraction.originalContent; + } +} + +export function reconstructXml(extraction: ExtractionResult, reorderedDocs: string[]): string { + const itemTag = extraction.itemTag; + const items = reorderedDocs.map(doc => `<${itemTag}>${doc}`).join("\n"); + + let block: string; + if (extraction.wrapperTag) { + const wrapper = extraction.wrapperTag; + block = `<${wrapper}>\n${items}\n`; + } else { + block = items; + } + + return extraction.prefix + block + extraction.suffix; +} + +export function reconstructNumbered(extraction: ExtractionResult, reorderedDocs: string[]): string { + const parts = extraction.prefix ? [extraction.prefix] : []; + for (let i = 0; i < reorderedDocs.length; i++) { + parts.push(`[${i + 1}] ${reorderedDocs[i]}`); + } + let result = parts.length > 0 ? parts.join("\n") : ""; + if (extraction.suffix) { + result += extraction.suffix; + } + return result; +} + +export function reconstructJsonResults(extraction: ExtractionResult, reorderedDocs: string[]): string { + const obj = JSON.parse(extraction.originalContent); + if (extraction.jsonItems !== null) { + const origDocs = extraction.documents; + const docToIndices: Record = {}; + for (let i = 0; i < origDocs.length; i++) { + if (!docToIndices[origDocs[i]]) { + docToIndices[origDocs[i]] = []; + } + docToIndices[origDocs[i]].push(i); + } + + const used = new Set(); + const reorderedItems: any[] = []; + for (const doc of reorderedDocs) { + const indices = docToIndices[doc] || []; + for (const idx of indices) { + if (!used.has(idx)) { + reorderedItems.push(extraction.jsonItems[idx]); + used.add(idx); + break; + } + } + } + obj.results = reorderedItems; + } else { + obj.results = reorderedDocs.map(doc => JSON.parse(doc)); + } + return JSON.stringify(obj, null, 2); +} + +export function reconstructSeparator(extraction: ExtractionResult, reorderedDocs: string[]): string { + const sep = extraction.separatorChar || "---"; + return reorderedDocs.join(`\n${sep}\n`); +} + +export function reconstructMarkdownHeaders(extraction: ExtractionResult, reorderedDocs: string[]): string { + const parts: string[] = []; + if (extraction.prefix.trim()) { + parts.push(extraction.prefix.trimEnd()); + } + parts.push(...reorderedDocs); + return parts.join("\n\n"); +} + +// ── OpenAI Chat format ────────────────────────────────────────────────────── + +export function extractFromOpenaiChat(body: any, config: InterceptConfig): [ExtractionResult, number] | null { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return null; + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "system") continue; + + const content = msg.content || ""; + if (typeof content === "string") { + const result = extractDocuments(content, config); + if (result) return [result, i]; + } else if (Array.isArray(content)) { + for (const block of content) { + if (block && typeof block === "object" && block.type === "text") { + const result = extractDocuments(block.text || "", config); + if (result) return [result, i]; + } + } + } + } + return null; +} + +export function reconstructOpenaiChat( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + systemMsgIndex: number +): any { + const newBody = structuredClone(body); + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = newBody.messages[systemMsgIndex]; + + if (typeof msg.content === "string") { + msg.content = newContent; + } else if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && block.type === "text") { + // Using dummy config since we just check if it was the block with documents + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + return newBody; +} + +// ── Anthropic Messages format ─────────────────────────────────────────────── + +export function extractFromAnthropicMessages(body: any, config: InterceptConfig): ExtractionResult | null { + const system = body?.system; + if (system === undefined || system === null) return null; + + if (typeof system === "string") { + return extractDocuments(system, config); + } else if (Array.isArray(system)) { + for (const block of system) { + if (block && typeof block === "object" && block.type === "text") { + const result = extractDocuments(block.text || "", config); + if (result) return result; + } + } + } + return null; +} + +export function reconstructAnthropicMessages( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[] +): any { + const newBody = structuredClone(body); + const newContent = reconstructContent(extraction, reorderedDocs); + + if (typeof newBody.system === "string") { + newBody.system = newContent; + } else if (Array.isArray(newBody.system)) { + for (const block of newBody.system) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + return newBody; +} + +// ── Tool result extraction ───────────────────────────────────────────────── + +export function extractFromOpenaiToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [ExtractionResult, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "tool" && msg?.role !== "toolResult") continue; + + const content = msg.content || ""; + if (typeof content === "string") { + const extraction = extractDocuments(content, config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }]); + } + } else if (Array.isArray(content)) { + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (block && typeof block === "object" && block.type === "text") { + const extraction = extractDocuments(block.text || "", config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]); + } + } + } + } + } + return results; +} + +export function extractFromAnthropicToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [ExtractionResult, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "user") continue; + + const content = msg.content; + if (!Array.isArray(content)) continue; + + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object" || (block.type !== "tool_result" && block.type !== "toolResult")) continue; + + const trContent = block.content || ""; + if (typeof trContent === "string") { + const extraction = extractDocuments(trContent, config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]); + } + } else if (Array.isArray(trContent)) { + for (let k = 0; k < trContent.length; k++) { + const inner = trContent[k]; + if (inner && typeof inner === "object" && inner.type === "text") { + const extraction = extractDocuments(inner.text || "", config); + if (extraction && extraction.documents.length >= 2) { + results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: k }]); + } + } + } + } + } + } + return results; +} + +// ── Tool result reconstruction ───────────────────────────────────────────── + +export function reconstructOpenaiToolResult( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + location: ToolResultLocation +): void { + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = body.messages[location.msgIndex]; + if (location.blockIndex === -1) { + msg.content = newContent; + } else { + msg.content[location.blockIndex].text = newContent; + } +} + +export function reconstructAnthropicToolResult( + body: any, + extraction: ExtractionResult, + reorderedDocs: string[], + location: ToolResultLocation +): void { + const newContent = reconstructContent(extraction, reorderedDocs); + const msg = body.messages[location.msgIndex]; + const block = msg.content[location.blockIndex]; + if (location.innerBlockIndex === -1) { + block.content = newContent; + } else { + block.content[location.innerBlockIndex].text = newContent; + } +} + +// ── Aggregate extraction ─────────────────────────────────────────────────── + +export function extractAllOpenai(body: any, config: InterceptConfig): MultiExtractionResult { + const result = new MultiExtractionResult(); + if (["system", "all"].includes(config.scope)) { + const sysResult = extractFromOpenaiChat(body, config); + if (sysResult) { + result.systemExtraction = sysResult; + } + } + if (["tool_results", "all"].includes(config.scope)) { + result.toolExtractions = extractFromOpenaiToolResults(body, config); + result.singleDocExtractions = extractSingleDocsFromOpenaiToolResults(body, config); + } + return result; +} + +export function extractAllAnthropic(body: any, config: InterceptConfig): MultiExtractionResult { + const result = new MultiExtractionResult(); + if (["system", "all"].includes(config.scope)) { + const sysExtraction = extractFromAnthropicMessages(body, config); + if (sysExtraction && sysExtraction.documents.length >= 2) { + result.systemExtraction = [sysExtraction, -1]; + } + } + if (["tool_results", "all"].includes(config.scope)) { + result.toolExtractions = extractFromAnthropicToolResults(body, config); + result.singleDocExtractions = extractSingleDocsFromAnthropicToolResults(body, config); + } + return result; +} + +// ── Single-document extraction (for cross-turn dedup) ───────────────────── + +function _makeSingleDoc(content: string, toolCallId: string = ""): SingleDocExtraction { + const stripped = content.trim(); + const contentHash = crypto.createHash("sha256").update(stripped).digest("hex"); + return { + content: stripped, + contentHash, + toolCallId + }; +} + +export function extractSingleDocsFromOpenaiToolResults( + body: any, config: InterceptConfig +): [SingleDocExtraction, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [SingleDocExtraction, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "tool" && msg?.role !== "toolResult") continue; + + const toolCallId = msg.tool_call_id || ""; + const content = msg.content || ""; + + if (typeof content === "string") { + const extraction = extractDocuments(content, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (content.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(content, toolCallId), + { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 } + ]); + } + } else if (Array.isArray(content)) { + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object" || block.type !== "text") continue; + + const text = block.text || ""; + const extraction = extractDocuments(text, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(text, toolCallId), + { msgIndex: i, blockIndex: j, innerBlockIndex: -1 } + ]); + } + } + } + } + return results; +} + +export function extractSingleDocsFromAnthropicToolResults( + body: any, config: InterceptConfig +): [SingleDocExtraction, ToolResultLocation][] { + const messages = body?.messages; + if (!messages || !Array.isArray(messages)) return []; + + const results: [SingleDocExtraction, ToolResultLocation][] = []; + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg?.role !== "user") continue; + + const content = msg.content; + if (!Array.isArray(content)) continue; + + for (let j = 0; j < content.length; j++) { + const block = content[j]; + if (!block || typeof block !== "object") continue; + if (block.type !== "tool_result" && block.type !== "toolResult") continue; + + const toolUseId = block.tool_use_id || ""; + const trContent = block.content || ""; + + if (typeof trContent === "string") { + const extraction = extractDocuments(trContent, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (trContent.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(trContent, toolUseId), + { msgIndex: i, blockIndex: j, innerBlockIndex: -1 } + ]); + } + } else if (Array.isArray(trContent)) { + for (let k = 0; k < trContent.length; k++) { + const inner = trContent[k]; + if (!inner || typeof inner !== "object" || inner.type !== "text") continue; + + const text = inner.text || ""; + const extraction = extractDocuments(text, config); + if (extraction && extraction.documents.length >= 2) continue; + + if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) { + results.push([ + _makeSingleDoc(text, toolUseId), + { msgIndex: i, blockIndex: j, innerBlockIndex: k } + ]); + } + } + } + } + } + return results; +} + +// ── Single-document hint replacement ────────────────────────────────────── + +export function replaceSingleDocOpenai( + body: any, location: ToolResultLocation, hint: string +): void { + const msg = body.messages[location.msgIndex]; + if (location.blockIndex === -1) { + msg.content = hint; + } else { + msg.content[location.blockIndex].text = hint; + } +} + +export function replaceSingleDocAnthropic( + body: any, location: ToolResultLocation, hint: string +): void { + const msg = body.messages[location.msgIndex]; + const block = msg.content[location.blockIndex]; + if (location.innerBlockIndex === -1) { + block.content = hint; + } else { + block.content[location.innerBlockIndex].text = hint; + } +} + +// ── Format handler abstraction ───────────────────────────────────────────── + +export interface FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult; + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void; + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void; + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void; + toolCallPresent(body: any, toolCallId: string): boolean; + targetPath(): string; + cacheSystem(body: any): any; + restoreSystem(body: any, cached: any): void; +} + +export class OpenAIChatHandler implements FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult { + return extractAllOpenai(body, config); + } + + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void { + const newContent = reconstructContent(extraction, docs); + const msg = body.messages[sysIdx]; + if (typeof msg.content === "string") { + msg.content = newContent; + } else if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + } + + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void { + reconstructOpenaiToolResult(body, extraction, docs, location); + } + + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void { + replaceSingleDocOpenai(body, location, hint); + } + + toolCallPresent(body: any, toolCallId: string): boolean { + for (const msg of (body.messages || [])) { + if (msg.role === "tool" || msg.role === "toolResult") { + if (msg.tool_call_id === toolCallId) return true; + } + } + return false; + } + + targetPath(): string { + return "/v1/chat/completions"; + } + + cacheSystem(body: any): any { + return null; // System prompt is inside messages array + } + + restoreSystem(body: any, cached: any): void { + // No-op + } +} + +export class AnthropicMessagesHandler implements FormatHandler { + extractAll(body: any, config: InterceptConfig): MultiExtractionResult { + return extractAllAnthropic(body, config); + } + + reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void { + const newContent = reconstructContent(extraction, docs); + if (typeof body.system === "string") { + body.system = newContent; + } else if (Array.isArray(body.system)) { + for (const block of body.system) { + if (block && typeof block === "object" && block.type === "text") { + if (extractDocuments(block.text || "", parseInterceptHeaders({}))) { + block.text = newContent; + break; + } + } + } + } + } + + reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void { + reconstructAnthropicToolResult(body, extraction, docs, location); + } + + replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void { + replaceSingleDocAnthropic(body, location, hint); + } + + toolCallPresent(body: any, toolCallId: string): boolean { + for (const msg of (body.messages || [])) { + if (msg.role === "user" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block && typeof block === "object" && + (block.type === "tool_result" || block.type === "toolResult") && + block.tool_use_id === toolCallId) { + return true; + } + } + } + } + return false; + } + + targetPath(): string { + return "/v1/messages"; + } + + cacheSystem(body: any): any { + return structuredClone(body.system); + } + + restoreSystem(body: any, cached: any): void { + if (cached !== null && cached !== undefined) { + body.system = structuredClone(cached); + } + } +} + +const _FORMAT_HANDLERS: Record = { + "openai_chat": new OpenAIChatHandler(), + "anthropic_messages": new AnthropicMessagesHandler() +}; + +export function getFormatHandler(apiFormat: string): FormatHandler { + return _FORMAT_HANDLERS[apiFormat] || _FORMAT_HANDLERS["openai_chat"]; +} diff --git a/openclaw-plugin/src/engine/http-client.ts b/openclaw-plugin/src/engine/http-client.ts new file mode 100644 index 0000000..1166785 --- /dev/null +++ b/openclaw-plugin/src/engine/http-client.ts @@ -0,0 +1,267 @@ +type JsonObject = Record; + +function isJsonObject(value: unknown): value is JsonObject { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +async function fetchJson( + url: string, + init: RequestInit, + timeoutMs: number, +): Promise { + try { + const response = await fetch(url, { + ...init, + signal: AbortSignal.timeout(timeoutMs), + }); + + if (!response.ok) { + return null; + } + + const data: unknown = await response.json(); + return isJsonObject(data) ? data : null; + } catch { + return null; + } +} + +export class ContextPilotIndexClient { + private readonly baseUrl: string; + + private readonly timeout: number; + + private readonly retryOnFailure: boolean; + + constructor( + baseUrl: string = "http://localhost:8765", + timeout: number = 1000, + retryOnFailure: boolean = false, + ) { + this.baseUrl = baseUrl.replace(/\/+$/, ""); + this.timeout = timeout; + this.retryOnFailure = retryOnFailure; + } + + private async _post(endpoint: string, jsonData: JsonObject): Promise { + const url = `${this.baseUrl}${endpoint}`; + const attempt = () => + fetchJson( + url, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(jsonData), + }, + this.timeout, + ); + + const result = await attempt(); + if (result !== null || !this.retryOnFailure) { + return result; + } + + return attempt(); + } + + private async _get(endpoint: string): Promise { + const url = `${this.baseUrl}${endpoint}`; + const attempt = () => + fetchJson( + url, + { + method: "GET", + }, + this.timeout, + ); + + const result = await attempt(); + if (result !== null || !this.retryOnFailure) { + return result; + } + + return attempt(); + } + + async evict(requestIds: string[]): Promise { + return this._post("/evict", { request_ids: requestIds }); + } + + async search(context: number[], updateAccess: boolean = true): Promise { + return this._post("/search", { + context, + update_access: updateAccess, + }); + } + + async updateNode(searchPath: number[], tokenDelta: number): Promise { + return this._post("/update", { + search_path: searchPath, + token_delta: tokenDelta, + }); + } + + async insert( + context: number[], + searchPath: number[], + totalTokens: number = 0, + ): Promise { + return this._post("/insert", { + context, + search_path: searchPath, + total_tokens: totalTokens, + }); + } + + async reorder( + contexts: Array>, + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + initialTokensPerContext: number = 0, + deduplicate: boolean = false, + parentRequestIds?: Array, + hintTemplate?: string, + ): Promise<[Array>, number[]] | null> { + const result = await this.reorderRaw( + contexts, + alpha, + useGpu, + linkageMethod, + initialTokensPerContext, + deduplicate, + parentRequestIds, + hintTemplate, + ); + + if (result === null) { + return null; + } + + const reorderedContexts = result.reordered_contexts; + const originalIndices = result.original_indices; + + if (!Array.isArray(reorderedContexts) || !Array.isArray(originalIndices)) { + return null; + } + + if (!originalIndices.every((index) => typeof index === "number")) { + return null; + } + + return [reorderedContexts as Array>, originalIndices as number[]]; + } + + async reorderRaw( + contexts: Array>, + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + initialTokensPerContext: number = 0, + deduplicate: boolean = false, + parentRequestIds?: Array, + hintTemplate?: string, + ): Promise { + const payload: JsonObject = { + contexts, + alpha, + use_gpu: useGpu, + linkage_method: linkageMethod, + initial_tokens_per_context: initialTokensPerContext, + deduplicate, + }; + + if (parentRequestIds !== undefined) { + payload.parent_request_ids = parentRequestIds; + } + + if (hintTemplate !== undefined) { + payload.hint_template = hintTemplate; + } + + return this._post("/reorder", payload); + } + + async deduplicate( + contexts: number[][], + parentRequestIds: Array, + hintTemplate?: string, + ): Promise { + const payload: JsonObject = { + contexts, + parent_request_ids: parentRequestIds, + }; + + if (hintTemplate !== undefined) { + payload.hint_template = hintTemplate; + } + + return this._post("/deduplicate", payload); + } + + async reset(): Promise { + return this._post("/reset", {}); + } + + async getRequests(): Promise { + return this._get("/requests"); + } + + async getStats(): Promise { + return this._get("/stats"); + } + + async health(): Promise { + return this._get("/health"); + } + + async isReady(): Promise { + const health = await this.health(); + return health !== null && health.status === "ready"; + } +} + +export async function evictRequests( + requestIds: string[], + serverUrl: string = "http://localhost:8765", +): Promise { + return fetchJson( + `${serverUrl.replace(/\/+$/, "")}/evict`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ request_ids: requestIds }), + }, + 1000, + ); +} + +export async function scheduleBatch( + contexts: number[][], + serverUrl: string = "http://localhost:8765", + alpha: number = 0.001, + useGpu: boolean = false, + linkageMethod: string = "average", + timeout: number = 30000, +): Promise { + return fetchJson( + `${serverUrl.replace(/\/+$/, "")}/reorder`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + contexts, + alpha, + use_gpu: useGpu, + linkage_method: linkageMethod, + }), + }, + timeout, + ); +} diff --git a/openclaw-plugin/src/engine/index-construction.ts b/openclaw-plugin/src/engine/index-construction.ts new file mode 100644 index 0000000..a0f28d5 --- /dev/null +++ b/openclaw-plugin/src/engine/index-construction.ts @@ -0,0 +1,348 @@ +import { ClusterNode, NodeManager, NodeStats } from './tree-nodes.js'; +import { IntraContextOrderer } from './intra-ordering.js'; +import { computeDistanceMatrixCpu } from './compute-distance.js'; + +export function linkage( + condensedDistances: Float64Array, + n: number, + method: "single" | "complete" | "average" = "average" +): number[][] { + const dist: number[][] = Array.from({length: n}, () => new Array(n).fill(Infinity)); + for (let i = 0; i < n; i++) dist[i][i] = 0; + for (let i = 0; i < n; i++) { + for (let j = i + 1; j < n; j++) { + const idx = n * i - (i * (i + 1)) / 2 + j - i - 1; + dist[i][j] = condensedDistances[idx]; + dist[j][i] = condensedDistances[idx]; + } + } + + const active = new Set(Array.from({length: n}, (_, i) => i)); + const sizes = new Array(2 * n - 1).fill(1); + const result: number[][] = []; + + for (let step = 0; step < n - 1; step++) { + let minDist = Infinity; + let minI = -1, minJ = -1; + + for (const i of active) { + for (const j of active) { + if (j <= i) continue; + if (dist[i][j] < minDist) { + minDist = dist[i][j]; + minI = i; + minJ = j; + } + } + } + + const newClusterId = n + step; + const sizeNew = sizes[minI] + sizes[minJ]; + sizes[newClusterId] = sizeNew; + + result.push([minI, minJ, minDist, sizeNew]); + + while (dist.length <= newClusterId) { + dist.push(new Array(dist[0]?.length ?? 0).fill(Infinity)); + } + for (const row of dist) { + while (row.length <= newClusterId) row.push(Infinity); + } + dist[newClusterId][newClusterId] = 0; + + for (const k of active) { + if (k === minI || k === minJ) continue; + let newDist: number; + if (method === "single") { + newDist = Math.min(dist[minI][k], dist[minJ][k]); + } else if (method === "complete") { + newDist = Math.max(dist[minI][k], dist[minJ][k]); + } else { // average (UPGMA) + newDist = (dist[minI][k] * sizes[minI] + dist[minJ][k] * sizes[minJ]) / sizeNew; + } + dist[newClusterId][k] = newDist; + dist[k][newClusterId] = newDist; + } + + active.delete(minI); + active.delete(minJ); + active.add(newClusterId); + } + + return result; +} + +export class IndexResult { + linkageMatrix: number[][]; + clusterNodes: Map; + uniqueNodes: Map; + reorderedContexts: (number[] | string[])[]; + originalContexts: (number[] | string[])[]; + stats: NodeStats; + searchPaths: number[][] | null; + + // Legacy attributes for backward compatibility + reorderedPrompts: (number[] | string[])[]; + originalPrompts: (number[] | string[])[]; + + constructor( + linkageMatrix: number[][], + clusterNodes: Map, + uniqueNodes: Map, + reorderedContexts: (number[] | string[])[], + originalContexts: (number[] | string[])[], + stats: NodeStats, + searchPaths: number[][] | null = null + ) { + this.linkageMatrix = linkageMatrix; + this.clusterNodes = clusterNodes; + this.uniqueNodes = uniqueNodes; + this.reorderedContexts = reorderedContexts; + this.originalContexts = originalContexts; + this.stats = stats; + this.searchPaths = searchPaths; + + this.reorderedPrompts = this.reorderedContexts; + this.originalPrompts = this.originalContexts; + } + + printTree(): void { + console.log("\n--- Unique Cluster Tree Nodes ---"); + const sortedKeys = Array.from(this.uniqueNodes.keys()).sort((a, b) => a - b); + for (const nodeId of sortedKeys) { + const node = this.uniqueNodes.get(nodeId); + if (!node) continue; + console.log(`ClusterNode ${nodeId}`); + console.log(` Content: [${node.docIds.join(', ')}]`); + console.log(` Original indices: [${Array.from(node.originalIndices).sort((a, b) => a - b).join(', ')}]`); + if (node.searchPath && node.searchPath.length > 0) { + const pathStr = "[" + node.searchPath.join("][") + "]"; + console.log(` Search path (child indices from root): ${pathStr}`); + } else { + console.log(` Search path: (root node)`); + } + if (!node.isLeaf) { + console.log(` Children: [${node.children.join(', ')}]`); + console.log(` Merge distance: ${node.mergeDistance.toFixed(4)}`); + } + console.log("-".repeat(40)); + } + } +} + +export interface ContextIndexOptions { + linkageMethod?: "single" | "complete" | "average"; + useGpu?: boolean; + alpha?: number; + numWorkers?: number | null; + batchSize?: number; +} + +export class ContextIndex { + linkageMethod: "single" | "complete" | "average"; + useGpu: boolean; + alpha: number; + numWorkers: number | null; + batchSize: number; + + nodeManager: NodeManager; + contextOrderer: IntraContextOrderer; + + _strToId: Map; + _idToStr: Map; + _nextStrId: number; + _isStringInput: boolean; + + constructor(options: ContextIndexOptions = {}) { + this.linkageMethod = options.linkageMethod || "average"; + this.useGpu = false; + this.alpha = options.alpha !== undefined ? options.alpha : 0.001; + this.numWorkers = options.numWorkers || null; + this.batchSize = options.batchSize || 1000; + + this.nodeManager = new NodeManager(); + this.contextOrderer = new IntraContextOrderer(); + + this._strToId = new Map(); + this._idToStr = new Map(); + this._nextStrId = 0; + this._isStringInput = false; + } + + _convertToInt(contexts: (number[] | string[])[]): number[][] { + if (!contexts || contexts.length === 0 || !contexts[0] || contexts[0].length === 0) { + return contexts as number[][]; + } + if (typeof contexts[0][0] === "string") { + this._isStringInput = true; + const converted: number[][] = []; + for (const ctx of contexts as string[][]) { + const convertedCtx: number[] = []; + for (const item of ctx) { + let sid = this._strToId.get(item); + if (sid === undefined) { + sid = this._nextStrId; + this._strToId.set(item, sid); + this._idToStr.set(sid, item); + this._nextStrId += 1; + } + convertedCtx.push(sid); + } + converted.push(convertedCtx); + } + return converted; + } + return contexts as number[][]; + } + + _convertToStr(contexts: number[][]): string[][] { + if (!this._isStringInput || !contexts || contexts.length === 0) { + return contexts as any; + } + if (contexts[0] && typeof contexts[0][0] === "string") { + return contexts as any; + } + const result: string[][] = []; + for (const ctx of contexts) { + const strCtx: string[] = []; + for (const i of ctx) { + strCtx.push(this._idToStr.get(i) as string); + } + result.push(strCtx); + } + return result; + } + + fitTransform(contexts: (number[] | string[])[]): IndexResult { + const intContexts = this._convertToInt(contexts); + const n = intContexts.length; + + if (n < 2) { + return this._handleSinglePrompt(intContexts); + } + + const condensedDistances = this._computeDistanceMatrix(intContexts); + const linkageMatrix = linkage(condensedDistances, n, this.linkageMethod); + + this._buildTree(intContexts, linkageMatrix); + + this.nodeManager.cleanupEmptyNodes(); + this.nodeManager.updateSearchPaths(); + + const reorderedContexts = this.contextOrderer.reorderContexts( + intContexts, + this.nodeManager.uniqueNodes + ); + + const searchPaths = this.contextOrderer.extractSearchPaths( + this.nodeManager.uniqueNodes, + intContexts.length + ); + + const stats = this.nodeManager.getNodeStats(); + + return new IndexResult( + linkageMatrix, + this.nodeManager.clusterNodes, + this.nodeManager.uniqueNodes, + reorderedContexts, + intContexts, + stats, + searchPaths + ); + } + + _computeDistanceMatrix(contexts: number[][]): Float64Array { + return computeDistanceMatrixCpu(contexts, this.alpha); + } + + _handleSinglePrompt(contexts: number[][]): IndexResult { + for (let i = 0; i < contexts.length; i++) { + const prompt = contexts[i]; + const node = this.nodeManager.createLeafNode(i, prompt); + node.docIds = [...prompt]; + } + + const leafIds = Array.from(this.nodeManager.uniqueNodes.keys()); + const virtualRootId = leafIds.length > 0 ? Math.max(...leafIds) + 1 : 0; + + let freqSum = 0; + for (const nid of leafIds) { + const n = this.nodeManager.uniqueNodes.get(nid); + if (n) freqSum += n.frequency; + } + + const virtualRoot = new ClusterNode( + virtualRootId, + new Set(), + new Set(), + 0.0, + leafIds, + null, + freqSum + ); + this.nodeManager.uniqueNodes.set(virtualRootId, virtualRoot); + + for (const nid of leafIds) { + const n = this.nodeManager.uniqueNodes.get(nid); + if (n) { + n.parent = virtualRootId; + } + } + + this.nodeManager.updateSearchPaths(); + + const searchPaths = this.contextOrderer.extractSearchPaths( + this.nodeManager.uniqueNodes, + contexts.length + ); + + const reorderedContexts = contexts.map(c => [...c]); + + return new IndexResult( + [], + this.nodeManager.clusterNodes, + this.nodeManager.uniqueNodes, + reorderedContexts, + contexts, + this.nodeManager.getNodeStats(), + searchPaths + ); + } + + _buildTree(contexts: number[][], linkageMatrix: number[][]): void { + const n = contexts.length; + + for (let i = 0; i < n; i++) { + this.nodeManager.createLeafNode(i, contexts[i]); + } + + for (let i = 0; i < linkageMatrix.length; i++) { + const [idx1, idx2, distance] = linkageMatrix[i]; + const newNodeId = n + i; + this.nodeManager.createInternalNode( + newNodeId, + Math.floor(idx1), + Math.floor(idx2), + distance + ); + } + } +} + +export function buildContextIndex( + contexts: (number[] | string[])[], + options: ContextIndexOptions = {} +): IndexResult { + const indexer = new ContextIndex(options); + const result = indexer.fitTransform(contexts); + + if (indexer._isStringInput) { + result.reorderedContexts = indexer._convertToStr(result.reorderedContexts as number[][]); + result.originalContexts = indexer._convertToStr(result.originalContexts as number[][]); + result.reorderedPrompts = result.reorderedContexts; + result.originalPrompts = result.originalContexts; + } + + return result; +} diff --git a/openclaw-plugin/src/engine/integration.test.ts b/openclaw-plugin/src/engine/integration.test.ts new file mode 100644 index 0000000..74328a6 --- /dev/null +++ b/openclaw-plugin/src/engine/integration.test.ts @@ -0,0 +1,362 @@ +import { describe, it, expect } from "vitest"; +import { getFormatHandler, type InterceptConfig } from "./extract.js"; +import { dedupChatCompletions, dedupResponsesApi } from "./dedup.js"; +import { injectCacheControl } from "./cache-control.js"; +import { ReorderState } from "./reorder.js"; + +function runPipeline( + body: Record, + opts: { + provider?: "anthropic" | "openai"; + scope?: string; + reorderState?: ReorderState; + } = {} +): Record { + const provider = opts.provider ?? "anthropic"; + const scope = opts.scope ?? "all"; + const reorderState = opts.reorderState ?? new ReorderState(); + + const clonedBody = structuredClone(body); + const apiFormat = provider === "anthropic" ? "anthropic_messages" : "openai_chat"; + + const interceptConfig: InterceptConfig = { + enabled: true, + mode: "auto", + tag: "document", + separator: "---", + alpha: 0.001, + linkageMethod: "average", + scope, + }; + + const handler = getFormatHandler(apiFormat); + const multi = handler.extractAll(clonedBody, interceptConfig); + + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructSystem(clonedBody, extraction, reordered, sysIdx); + } + } + + for (const [extraction, location] of multi.toolExtractions) { + if (extraction.documents.length >= 2) { + const [reordered] = reorderState.reorder(extraction.documents); + handler.reconstructToolResult(clonedBody, extraction, reordered, location); + } + } + + if (apiFormat === "openai_chat") { + dedupChatCompletions(clonedBody as any); + } + if (clonedBody.input && Array.isArray(clonedBody.input)) { + dedupResponsesApi(clonedBody as any); + } + + return injectCacheControl(clonedBody, provider); +} + +describe("full pipeline — Anthropic", () => { + it("system prompt with XML documents gets reordered and cache-controlled", () => { + const body = { + model: "claude-sonnet-4-6", + system: `You are a helpful assistant.\n\n\nFirst document about TypeScript.\nIt has multiple lines.\n\n\nSecond document about Python.\nAlso multi-line.\n\n\nThird document about Rust.\nYet another multi-line doc.\n\n\nPlease answer based on the above.`, + messages: [{ role: "user", content: "Summarize the documents." }], + }; + + const reorderState = new ReorderState(); + const result = runPipeline(body, { provider: "anthropic", reorderState }); + + expect(Array.isArray(result.system)).toBe(true); + const systemArray = result.system as any[]; + + const lastBlock = systemArray[systemArray.length - 1]; + expect(lastBlock.cache_control).toEqual({ type: "ephemeral" }); + + const textContent = systemArray.map(b => b.text).join(""); + expect(textContent).toContain("You are a helpful assistant."); + expect(textContent).toContain("Please answer based on the above."); + + expect(textContent).toContain("First document about TypeScript."); + expect(textContent).toContain("Second document about Python."); + expect(textContent).toContain("Third document about Rust."); + }); + + it("Anthropic tool_result with large content gets cache_control", () => { + const body = { + model: "claude-sonnet-4-6", + system: "You are helpful.", + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: "A".repeat(2000) }, + ], + }, + ], + }; + + const result = runPipeline(body, { provider: "anthropic" }); + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + expect(content[0].cache_control).toEqual({ type: "ephemeral" }); + }); + + it("Anthropic scope=\"system\" only processes system, not tool results", () => { + const docText = `\nFirst document about TypeScript.\nIt has multiple lines.\n\nSecond document about Python.\nAlso multi-line.\n`; + const body = { + model: "claude-sonnet-4-6", + system: `You are helpful.\n${docText}`, + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: docText }, + ], + }, + ], + }; + + const reorderState = new ReorderState(); + // Reorder stability means it will process it + const result = runPipeline(body, { provider: "anthropic", scope: "system", reorderState }); + + // System should have its format modified to array due to reconstruction/cache control + expect(Array.isArray(result.system)).toBe(true); + + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + // Tool result shouldn't have been reconstructed into blocks of its internal documents + expect(content[0].content).toBe(docText); + }); + + it("Anthropic scope=\"tool_results\" only processes tools, not system", () => { + const docText = `\nFirst document about TypeScript.\nIt has multiple lines.\n\nSecond document about Python.\nAlso multi-line.\n`; + const body = { + model: "claude-sonnet-4-6", + system: `You are helpful.\n${docText}`, + messages: [ + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: "tu_1", content: docText }, + ], + }, + ], + }; + + const reorderState = new ReorderState(); + const result = runPipeline(body, { provider: "anthropic", scope: "tool_results", reorderState }); + + // System should not be processed for documents (though it may be arrayified for cache control) + // Cache control injects string to array conversion for Anthropic system if needed + if (Array.isArray(result.system)) { + const textContent = (result.system as any[]).map(b => b.text).join(""); + expect(textContent).toBe(`You are helpful.\n${docText}`); + } else { + expect(result.system).toBe(`You are helpful.\n${docText}`); + } + + // Tool results should be reconstructed/reordered + const messages = result.messages as any[]; + const content = messages[0].content as any[]; + expect(typeof content[0].content).toBe("string"); + expect(content[0].content).toContain("First document about TypeScript."); + expect(content[0].content).toContain("Second document about Python."); + }); +}); + +describe("full pipeline — OpenAI", () => { + it("OpenAI chat system message with XML documents gets reordered", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C content" }, + { role: "user", content: "Hello" } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + const sysMsg = msgs[0].content; + expect(sysMsg).toContain("Doc A content"); + expect(sysMsg).toContain("Doc B content"); + expect(sysMsg).toContain("Doc C content"); + }); + + it("OpenAI chat with duplicate tool results gets deduped", () => { + const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n"); + const body = { + model: "gpt-4o", + messages: [ + { role: "assistant", content: null, tool_calls: [ + { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } } + ]}, + { role: "tool", tool_call_id: "call_1", content: sharedContent }, + { role: "tool", tool_call_id: "call_2", content: sharedContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + + expect(msgs[1].content).toBe(sharedContent); + expect(msgs[2].content).not.toBe(sharedContent); + expect(msgs[2].content).toContain("identical to earlier read_file result"); + }); + + it("OpenAI body with no extractable docs passes through unchanged", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "You are helpful." }, + { role: "user", content: "Hi" } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("OpenAI responses API format gets deduped", () => { + const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n"); + const body = { + input: [ + { type: "function_call_output", call_id: "c1", output: sharedContent }, + { type: "function_call_output", call_id: "c2", output: sharedContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const input = result.input as any[]; + + expect(input[0].output).toBe(sharedContent); + expect(input[1].output).not.toBe(sharedContent); + expect(input[1].output).toContain("identical"); + }); +}); + +describe("multi-turn state — reorder stability", () => { + it("reorder state preserves doc order across turns", () => { + const reorderState = new ReorderState(); + + const bodyTurn1 = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C content" } + ] + }; + + runPipeline(bodyTurn1, { provider: "openai", reorderState }); + + const bodyTurn2 = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B contentDoc C contentDoc D content" } + ] + }; + + const res2 = runPipeline(bodyTurn2, { provider: "openai", reorderState }); + const sysMsg2 = (res2.messages as any[])[0].content; + + // In multi-turn, ReorderState should put the new item (D) at top, and preserve relative ordering of A, B, C. + // We just verify all are present and stable. + expect(sysMsg2).toContain("Doc A content"); + expect(sysMsg2).toContain("Doc B content"); + expect(sysMsg2).toContain("Doc C content"); + expect(sysMsg2).toContain("Doc D content"); + }); + + it("reorder state reset clears history", () => { + const reorderState = new ReorderState(); + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Doc A contentDoc B content" } + ] + }; + + runPipeline(body, { provider: "openai", reorderState }); + + reorderState.reset(); + + const res2 = runPipeline(body, { provider: "openai", reorderState }); + const sysMsg2 = (res2.messages as any[])[0].content; + + expect(sysMsg2).toContain("Doc A content"); + expect(sysMsg2).toContain("Doc B content"); + }); +}); + +describe("edge cases", () => { + it("empty body passes through", () => { + const result = runPipeline({}, { provider: "anthropic" }); + expect(result).toEqual({}); + }); + + it("body with no messages passes through", () => { + const body = { model: "gpt-4o" }; + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("body with single document doesn't get reordered", () => { + const body = { + model: "gpt-4o", + messages: [ + { role: "system", content: "Only Doc" } + ] + }; + const result = runPipeline(body, { provider: "openai" }); + // It should be unchanged + expect(result).toEqual(body); + }); + + it("very short tool result content not deduped", () => { + const shortContent = "Too short for dedup."; + const body = { + model: "gpt-4o", + messages: [ + { role: "assistant", content: null, tool_calls: [ + { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } }, + { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } } + ]}, + { role: "tool", tool_call_id: "call_1", content: shortContent }, + { role: "tool", tool_call_id: "call_2", content: shortContent } + ] + }; + + const result = runPipeline(body, { provider: "openai" }); + const msgs = result.messages as any[]; + expect(msgs[1].content).toBe(shortContent); + expect(msgs[2].content).toBe(shortContent); + }); + + it("null/undefined messages gracefully handled", () => { + const body = { model: "gpt-4o", messages: null }; + const result = runPipeline(body, { provider: "openai" }); + expect(result).toEqual(body); + }); + + it("Anthropic body with system as content block array", () => { + const body = { + model: "claude-sonnet-4-6", + system: [ + { type: "text", text: "AB" } + ], + messages: [{ role: "user", content: "hi" }] + }; + + const result = runPipeline(body, { provider: "anthropic" }); + const sys = result.system as any[]; + expect(Array.isArray(sys)).toBe(true); + // Last block should have cache_control + expect(sys[sys.length - 1].cache_control).toEqual({ type: "ephemeral" }); + + const fullText = sys.map(b => b.text).join(""); + expect(fullText).toContain("A"); + expect(fullText).toContain("B"); + }); +}); diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts new file mode 100644 index 0000000..88ad6e3 --- /dev/null +++ b/openclaw-plugin/src/engine/inter-scheduler.ts @@ -0,0 +1,114 @@ +import type { ClusterNode } from './tree-nodes.js'; + +export interface ClusteringResult { + reorderedPrompts: number[][]; + originalPrompts: number[][]; + searchPaths: number[][]; +} + +export class InterContextScheduler { + scheduleContexts( + clusteringResult: ClusteringResult + ): [number[][], number[][], number[], Array<[number, number[]]>] { + const reorderedContexts = clusteringResult.reorderedPrompts; + const originalContexts = clusteringResult.originalPrompts; + const searchPaths = clusteringResult.searchPaths; + + const groupsByRoot = this._groupByRootPrefix(searchPaths); + const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths); + + const allGroupsWithInfo: Array<[number, number[]]> = []; + for (const groupIndices of sortedGroups) { + allGroupsWithInfo.push([0, groupIndices]); + } + + allGroupsWithInfo.sort((a, b) => { + const sizeDiff = b[1].length - a[1].length; + if (sizeDiff !== 0) { + return sizeDiff; + } + + const aFirst = a[1].length > 0 ? a[1][0] : Number.POSITIVE_INFINITY; + const bFirst = b[1].length > 0 ? b[1][0] : Number.POSITIVE_INFINITY; + return aFirst - bFirst; + }); + + const finalIndexMapping = allGroupsWithInfo.flatMap(([, group]) => group); + + const scheduledReordered = finalIndexMapping.map((idx) => reorderedContexts[idx]); + const scheduledOriginals = finalIndexMapping.map((idx) => originalContexts[idx]); + + return [scheduledReordered, scheduledOriginals, finalIndexMapping, allGroupsWithInfo]; + } + + _groupByRootPrefix(searchPaths: number[][]): Map { + const groups = new Map(); + + for (let contextIdx = 0; contextIdx < searchPaths.length; contextIdx += 1) { + const path = searchPaths[contextIdx]; + const groupKey = path.length >= 1 ? path[0] : -1; + + const existing = groups.get(groupKey); + if (existing) { + existing.push(contextIdx); + } else { + groups.set(groupKey, [contextIdx]); + } + } + + return groups; + } + + _sortGroupsByPathLength( + groupsByRoot: Map, + searchPaths: number[][] + ): number[][] { + const sortedGroups: number[][] = []; + + for (const groupIndices of groupsByRoot.values()) { + const sortedGroup = [...groupIndices].sort((a, b) => { + const lengthDiff = searchPaths[b].length - searchPaths[a].length; + if (lengthDiff !== 0) { + return lengthDiff; + } + + const lexCompare = this._compareNumberArrays(searchPaths[a], searchPaths[b]); + if (lexCompare !== 0) { + return lexCompare; + } + + return a - b; + }); + + sortedGroups.push(sortedGroup); + } + + return sortedGroups; + } + + reorderPrompts( + clusteringResult: ClusteringResult + ): [number[][], number[][], number[], Array<[number, number[]]>] { + return this.scheduleContexts(clusteringResult); + } + + _reorderSinglePrompt( + promptIndex: number, + originalPrompt: number[], + uniqueNodes: Map + ): number[] { + void promptIndex; + void uniqueNodes; + return [...originalPrompt]; + } + + private _compareNumberArrays(a: number[], b: number[]): number { + const minLength = Math.min(a.length, b.length); + for (let i = 0; i < minLength; i += 1) { + if (a[i] !== b[i]) { + return a[i] - b[i]; + } + } + return a.length - b.length; + } +} diff --git a/openclaw-plugin/src/engine/intra-ordering.ts b/openclaw-plugin/src/engine/intra-ordering.ts new file mode 100644 index 0000000..0d2eed0 --- /dev/null +++ b/openclaw-plugin/src/engine/intra-ordering.ts @@ -0,0 +1,349 @@ +import type { ClusterNode } from './tree-nodes.js'; + +export class IntraContextOrderer { + reorderContexts(originalContexts: number[][], uniqueNodes: Map): number[][] { + let rootNode: ClusterNode | null = null; + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + rootNode = node; + break; + } + } + + if (!rootNode) { + return originalContexts; + } + + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.size > 0) { + const firstIdx = Math.min(...node.originalIndices); + if (firstIdx < originalContexts.length) { + this._setNodeDocs(node, [...originalContexts[firstIdx]]); + } + } + } + + const queue: number[] = [rootNode.nodeId]; + const visited = new Set(); + + while (queue.length > 0) { + const nodeId = queue.shift(); + if (nodeId === undefined || visited.has(nodeId) || !uniqueNodes.has(nodeId)) { + continue; + } + + visited.add(nodeId); + const node = uniqueNodes.get(nodeId); + if (!node) { + continue; + } + + if (!node.isRoot && node.parent !== null) { + const parentNode = uniqueNodes.get(node.parent); + if (parentNode) { + const parentDocs = this._getNodeDocs(parentNode); + const nodeDocs = this._getNodeDocs(node); + if (parentDocs.length > 0 && nodeDocs.length > 0) { + this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs)); + } + } + } + + for (const childId of node.children) { + if (uniqueNodes.has(childId)) { + queue.push(childId); + } + } + } + + const reorderedContexts: number[][] = []; + for (let i = 0; i < originalContexts.length; i += 1) { + const leafNode = this._findLeafNode(i, uniqueNodes); + if (leafNode) { + const leafDocs = this._getNodeDocs(leafNode); + if (leafDocs.length > 0) { + reorderedContexts.push(leafDocs); + continue; + } + } + + reorderedContexts.push([...originalContexts[i]]); + } + + return reorderedContexts; + } + + _updateTreeAndReorderNodes(uniqueNodes: Map, reorderedContexts: number[][]): void { + let rootNode: ClusterNode | null = null; + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + rootNode = node; + break; + } + } + + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.size > 0) { + const firstIdx = Math.min(...node.originalIndices); + if (firstIdx < reorderedContexts.length) { + this._setNodeDocs(node, [...reorderedContexts[firstIdx]]); + } + } + } + + if (!rootNode) { + return; + } + + const queue: Array<[number, boolean]> = []; + for (const childId of rootNode.children) { + if (uniqueNodes.has(childId)) { + queue.push([childId, true]); + } + } + + while (queue.length > 0) { + const item = queue.shift(); + if (!item) { + continue; + } + + const [nodeId, isChildOfRoot] = item; + const node = uniqueNodes.get(nodeId); + if (!node) { + continue; + } + + if (!isChildOfRoot && node.parent !== null) { + const parentNode = uniqueNodes.get(node.parent); + if (parentNode) { + const parentDocs = this._getNodeDocs(parentNode); + const nodeDocs = this._getNodeDocs(node); + if (parentDocs.length > 0 && nodeDocs.length > 0) { + this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs)); + } + } + } + + for (const childId of node.children) { + if (uniqueNodes.has(childId)) { + queue.push([childId, false]); + } + } + } + } + + _reorderWithParentPrefix(nodeDocs: number[], parentDocs: number[]): number[] { + if (parentDocs.length === 0) { + return nodeDocs; + } + + const result = [...parentDocs]; + const parentSet = new Set(parentDocs); + + for (const doc of nodeDocs) { + if (!parentSet.has(doc)) { + result.push(doc); + } + } + + return result; + } + + _reorderContextWithTreePrefix( + contextIndex: number, + originalContext: number[], + uniqueNodes: Map + ): number[] { + const leafNode = this._findLeafNode(contextIndex, uniqueNodes); + if (!leafNode) { + return [...originalContext]; + } + + const prefixDocs: number[] = []; + const visited = new Set(); + let currentNode: ClusterNode | undefined = leafNode; + + const ancestors: ClusterNode[] = []; + while (currentNode && !currentNode.isRoot) { + if (visited.has(currentNode.nodeId)) { + break; + } + + visited.add(currentNode.nodeId); + ancestors.push(currentNode); + + if (currentNode.parent !== null && uniqueNodes.has(currentNode.parent)) { + currentNode = uniqueNodes.get(currentNode.parent); + } else { + break; + } + } + + ancestors.reverse(); + + const seenDocs = new Set(); + for (const ancestor of ancestors) { + const ancestorDocs = this._getNodeDocs(ancestor); + for (const doc of ancestorDocs) { + if (!seenDocs.has(doc)) { + prefixDocs.push(doc); + seenDocs.add(doc); + } + } + } + + const result = [...prefixDocs]; + for (const doc of originalContext) { + if (!seenDocs.has(doc)) { + result.push(doc); + seenDocs.add(doc); + } + } + + return result; + } + + extractSearchPaths(uniqueNodes: Map, numContexts: number): number[][] { + const searchPaths: number[][] = Array.from({ length: numContexts }, () => []); + + const contextToLeaf = new Map(); + for (const [nodeId, node] of uniqueNodes.entries()) { + if (!node.isLeaf) { + continue; + } + + for (const origIdx of node.originalIndices) { + contextToLeaf.set(origIdx, nodeId); + } + } + + for (let contextIdx = 0; contextIdx < numContexts; contextIdx += 1) { + const leafId = contextToLeaf.get(contextIdx); + if (leafId === undefined) { + searchPaths[contextIdx] = []; + continue; + } + + const childIndices: number[] = []; + let currentId: number | null = leafId; + const visited = new Set(); + + while (currentId !== null) { + if (visited.has(currentId)) { + break; + } + visited.add(currentId); + + const currentNode = uniqueNodes.get(currentId); + if (!currentNode) { + break; + } + + if (currentNode.parent !== null) { + const parentNode = uniqueNodes.get(currentNode.parent); + if (parentNode) { + const childIndex = parentNode.children.indexOf(currentId); + if (childIndex !== -1) { + childIndices.push(childIndex); + } + } + } + + currentId = currentNode.parent; + } + + searchPaths[contextIdx] = [...childIndices].reverse(); + } + + return searchPaths; + } + + _reorderSingleContext( + contextIndex: number, + originalContext: number[], + uniqueNodes: Map + ): number[] { + const originalSet = new Set(originalContext); + + const leafNode = this._findLeafNode(contextIndex, uniqueNodes); + if (!leafNode) { + return [...originalContext]; + } + + if (leafNode.isRoot) { + return Array.from(leafNode.content).sort((a, b) => a - b); + } + + if (leafNode.frequency > 1) { + const prefixContent = leafNode.content; + const prefixList = Array.from(prefixContent).sort((a, b) => a - b); + const remainingList = Array.from(originalSet) + .filter((value) => !prefixContent.has(value)) + .sort((a, b) => a - b); + return [...prefixList, ...remainingList]; + } + + const bestNode = this._findBestAncestor(leafNode, uniqueNodes); + if (!bestNode) { + return [...originalContext]; + } + + const prefixContent = bestNode.content; + const prefixList = Array.from(prefixContent).sort((a, b) => a - b); + const remainingList = Array.from(originalSet) + .filter((value) => !prefixContent.has(value)) + .sort((a, b) => a - b); + return [...prefixList, ...remainingList]; + } + + _findLeafNode(contextIndex: number, uniqueNodes: Map): ClusterNode | null { + for (const node of uniqueNodes.values()) { + if (node.isLeaf && node.originalIndices.has(contextIndex)) { + return node; + } + } + + return null; + } + + _findBestAncestor(startNode: ClusterNode, uniqueNodes: Map): ClusterNode | null { + let currentNode: ClusterNode = startNode; + + while (currentNode.parent !== null) { + const parentId = currentNode.parent; + const parentNode = uniqueNodes.get(parentId); + if (!parentNode) { + return null; + } + + if (parentNode.frequency > 1 && !parentNode.isEmpty) { + return parentNode; + } + + currentNode = parentNode; + } + + return null; + } + + reorderPrompts(originalPrompts: number[][], uniqueNodes: Map): number[][] { + return this.reorderContexts(originalPrompts, uniqueNodes); + } + + _reorderSinglePrompt( + promptIndex: number, + originalPrompt: number[], + uniqueNodes: Map + ): number[] { + return this._reorderSingleContext(promptIndex, originalPrompt, uniqueNodes); + } + + private _getNodeDocs(node: ClusterNode): number[] { + return Array.from(node.content); + } + + private _setNodeDocs(node: ClusterNode, docs: number[]): void { + node.content = new Set(docs); + } +} diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts new file mode 100644 index 0000000..29ad83a --- /dev/null +++ b/openclaw-plugin/src/engine/live-index.ts @@ -0,0 +1,1234 @@ +import { ContextIndex, IndexResult } from './index-construction.js'; +import { ClusterNode, NodeManager } from './tree-nodes.js'; +import { NodeMetadata } from './metadata.js'; +import { InterContextScheduler } from './inter-scheduler.js'; +import { IntraContextOrderer } from './intra-ordering.js'; +import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js'; +import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js'; +import { EvictionHeap } from './eviction-heap.js'; +import * as crypto from 'node:crypto'; + +export function computePrefixLength(list1: number[], list2: number[]): number { + let length = 0; + const minLen = Math.min(list1.length, list2.length); + for (let i = 0; i < minLen; i++) { + if (list1[i] === list2[i]) { + length++; + } else { + break; + } + } + return length; +} + +export class ContextPilot extends ContextIndex { + metadata: Map = new Map(); + interScheduler = new InterContextScheduler(); + + protected _requestToNode: Map = new Map(); + protected _nextRequestCounter: number = 0; + + protected _conversations: Map; turnCount: number }> = new Map(); + protected _hasExplicitConversation: boolean = false; + + isLive: boolean = false; + initialResult: any = null; + scheduledResult: any = null; + + nodes: Map = new Map(); + rootId: number | null = null; + nextNodeId: number = 0; + + liveStats = { + totalSearches: 0, + totalInsertions: 0, + totalEvictions: 0, + totalSearchTimeUs: 0, + totalTraversalTimeUs: 0, + totalRemovals: 0 + }; + + static readonly _DEFAULT_CONVERSATION = "_default"; + + constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: "single" | "complete" | "average" = "average", batchSize: number = 10000) { + super({ alpha, useGpu, linkageMethod, batchSize }); + } + + getAllRequestIds(): Set { + return new Set(this._requestToNode.keys()); + } + + reset(): void { + this.metadata.clear(); + this._requestToNode.clear(); + this._nextRequestCounter = 0; + this.isLive = false; + this.initialResult = null; + this.scheduledResult = null; + this.nodes.clear(); + this.rootId = null; + this.nextNodeId = 0; + this.liveStats = { + totalSearches: 0, + totalInsertions: 0, + totalEvictions: 0, + totalSearchTimeUs: 0, + totalTraversalTimeUs: 0, + totalRemovals: 0 + }; + } + + buildAndSchedule(contexts: number[][], initialTokensPerContext: number = 0): any { + this.initialResult = this.fitTransform(contexts); + + const [scheduledReordered, scheduledOriginals, finalMapping, groups] = + this.interScheduler.scheduleContexts(this.initialResult); + + this.scheduledResult = { + reordered_contexts: scheduledReordered, + original_indices: finalMapping, + scheduled_originals: scheduledOriginals, + groups: groups, + clustering_result: this.initialResult + }; + + const [requestIdMapping, requestIdsOrdered] = this._initializeLiveMetadata( + initialTokensPerContext, + contexts.length + ); + + this.scheduledResult['request_id_mapping'] = requestIdMapping; + this.scheduledResult['request_ids'] = requestIdsOrdered; + + this.isLive = true; + + return this.scheduledResult; + } + + reorder(contexts: any, initialTokensPerContext: number = 0, conversationId?: string): [any[], number[]] { + if (contexts && !Array.isArray(contexts[0])) { + contexts = [contexts]; + } + + const result = this.buildIncremental(contexts, initialTokensPerContext); + const reordered = result.reordered_contexts; + + const cid = conversationId || ContextPilot._DEFAULT_CONVERSATION; + if (conversationId !== undefined && conversationId !== null) { + this._hasExplicitConversation = true; + } + + let conv = this._conversations.get(cid); + if (!conv) { + conv = { seenDocs: new Set(), turnCount: 0 }; + this._conversations.set(cid, conv); + } + + for (const ctx of reordered) { + for (const doc of ctx) { + conv.seenDocs.add(doc); + } + } + conv.turnCount += 1; + + return [reordered, result.original_indices]; + } + + optimize(docs: string[], query: string, conversationId?: string, systemInstruction?: string): any[] { + const [reordered, _indices] = this.reorder(docs, 0, conversationId); + const reorderedDocs = reordered[0]; + + const systemContent = [systemInstruction, ...reorderedDocs].filter(Boolean).join("\n\n"); + + return [ + { role: "system", content: systemContent }, + { role: "user", content: query } + ]; + } + + optimizeBatch(allDocs: string[][], allQueries: string[], systemInstruction?: string): [any[][], number[]] { + if (allDocs.length !== allQueries.length) { + throw new Error(`all_docs (${allDocs.length}) and all_queries (${allQueries.length}) must have the same length.`); + } + + const [reorderedContexts, order] = this.reorder(allDocs); + const messagesBatch: any[][] = []; + + for (let i = 0; i < reorderedContexts.length; i++) { + const ctx = reorderedContexts[i]; + const origIdx = order[i]; + + const systemContent = [systemInstruction, ...ctx].filter(Boolean).join("\n\n"); + messagesBatch.push([ + { role: "system", content: systemContent }, + { role: "user", content: allQueries[origIdx] } + ]); + } + + return [messagesBatch, order]; + } + + deduplicate(contexts: any[][], conversationId: string, hintTemplate?: string): any[] { + if (!conversationId) { + throw new Error("conversation_id is required for .deduplicate()."); + } + + const template = hintTemplate || "Please refer to [Doc {doc_id}] from the previous conversation."; + + if (!this._conversations.has(conversationId)) { + throw new Error(`No prior .reorder() call found for conversation_id='${conversationId}'.`); + } + + const conv = this._conversations.get(conversationId)!; + const seen = conv.seenDocs; + const results: any[] = []; + + for (const ctx of contexts) { + const overlapping = ctx.filter(d => seen.has(d)); + const newDocs = ctx.filter(d => !seen.has(d)); + const hints = overlapping.map(d => template.replace("{doc_id}", String(d))); + + results.push({ + new_docs: newDocs, + overlapping_docs: overlapping, + reference_hints: hints, + deduplicated_docs: newDocs + }); + + for (const d of ctx) { + seen.add(d); + } + } + + conv.turnCount += 1; + return results; + } + + buildIncremental(contexts: any[][], initialTokensPerContext: number = 0): any { + // @ts-ignore - Assuming inherited from ContextIndex + const convertedContexts = this._convertToInt ? this._convertToInt(contexts) : contexts; + + if (!this.isLive) { + const result = this.buildAndSchedule(convertedContexts, initialTokensPerContext); + const reordered = result.reordered_contexts || convertedContexts; + // @ts-ignore + const stringReordered = this._convertToStr ? this._convertToStr(reordered) : reordered; + + return { + request_ids: result.request_ids || [], + reordered_contexts: stringReordered, + matched_count: 0, + inserted_count: convertedContexts.length, + merged_count: 0, + original_indices: result.original_indices || Array.from({ length: convertedContexts.length }, (_, i) => i), + groups: result.groups || [] + }; + } + + const matchedContexts: any[] = []; + const unmatchedContexts: any[] = []; + + const searchResults = this.searchBatch(convertedContexts); + + for (let i = 0; i < convertedContexts.length; i++) { + const context = convertedContexts[i]; + let [searchPath, matchedNodeId, overlapCount, hasPrefix] = searchResults[i]; + + if (overlapCount > 0 && matchedNodeId >= 0 && matchedNodeId !== this.rootId) { + const matchedNode = this.nodes.get(matchedNodeId); + let nodeDocs: number[] | null = null; + + if (this.metadata.has(matchedNodeId) && this.metadata.get(matchedNodeId)!.docIds) { + nodeDocs = this.metadata.get(matchedNodeId)!.docIds as number[]; + } else if (matchedNode && matchedNode.docIds) { + nodeDocs = matchedNode.docIds as number[]; + } + + let reordered = context; + if (nodeDocs) { + reordered = this._reorderWithPrefix(context, nodeDocs); + } else { + hasPrefix = true; + } + + matchedContexts.push([i, reordered, searchPath, hasPrefix]); + } else { + unmatchedContexts.push([i, context]); + } + } + + const requestIds: (string | null)[] = new Array(convertedContexts.length).fill(null); + const reorderedContexts: any[] = new Array(convertedContexts.length).fill(null); + const contextInfo: any[] = []; + + for (const [origIdx, reordered, searchPath, hasPrefix] of matchedContexts) { + const matchedNode = this.traverse(searchPath); + let newNodeId: number, newSearchPath: number[], requestId: string; + + if (hasPrefix && matchedNode && matchedNode.isLeaf) { + [newNodeId, newSearchPath, requestId] = this._splitLeafAndInsert( + reordered, matchedNode, searchPath, initialTokensPerContext + ); + } else if (hasPrefix) { + [newNodeId, newSearchPath, requestId] = this.insert( + reordered, searchPath, initialTokensPerContext + ); + } else { + const insertPath = searchPath.length > 0 ? searchPath.slice(0, -1) : searchPath; + [newNodeId, newSearchPath, requestId] = this.insert( + reordered, insertPath, initialTokensPerContext + ); + } + + requestIds[origIdx] = requestId; + reorderedContexts[origIdx] = reordered; + contextInfo.push([origIdx, requestId, newSearchPath]); + } + + let mergedCount = 0; + if (unmatchedContexts.length > 0) { + const unmatchedOnly = unmatchedContexts.map(x => x[1]); + + const tempIndex = new ContextPilot( + this.alpha, + // @ts-ignore + this.useGpu, + // @ts-ignore + this.linkageMethod, + // @ts-ignore + this.batchSize + ); + + const tempResult = tempIndex.fitTransform(unmatchedOnly); + + const [mergedRequestIds, mergedSearchPaths] = this._mergeIndex( + tempResult, + unmatchedContexts, + initialTokensPerContext + ); + + for (let i = 0; i < unmatchedContexts.length; i++) { + const [origIdx, origContext] = unmatchedContexts[i]; + requestIds[origIdx] = mergedRequestIds[i]; + + if (tempResult.reordered_contexts && i < tempResult.reordered_contexts.length) { + reorderedContexts[origIdx] = tempResult.reordered_contexts[i]; + } else { + reorderedContexts[origIdx] = origContext; + } + + contextInfo.push([origIdx, mergedRequestIds[i], mergedSearchPaths[i]]); + } + + mergedCount = unmatchedContexts.length; + } + + const scheduledOrder = this._scheduleIncremental(contextInfo); + const groups = this._groupByPathPrefix(contextInfo); + + // @ts-ignore + const finalReorderedStr = this._convertToStr ? this._convertToStr(reorderedContexts) : reorderedContexts; + + return { + request_ids: requestIds, + reordered_contexts: finalReorderedStr, + matched_count: matchedContexts.length, + inserted_count: convertedContexts.length, + merged_count: mergedCount, + original_indices: scheduledOrder, + groups: groups + }; + } + + _reorderWithPrefix(context: number[], prefix: number[]): number[] { + const contextSet = new Set(context); + const result: number[] = []; + const prefixUsed = new Set(); + + for (const elem of prefix) { + if (contextSet.has(elem) && !prefixUsed.has(elem)) { + result.push(elem); + prefixUsed.add(elem); + } + } + + for (const elem of context) { + if (!prefixUsed.has(elem)) { + result.push(elem); + } + } + + return result; + } + + _mergeIndex(tempResult: any, unmatchedInfo: any[], initialTokens: number): [string[], number[][]] { + const requestIds: string[] = []; + const searchPaths: number[][] = []; + + const uniqueNodes = tempResult.unique_nodes || tempResult.uniqueNodes; + let tempRoot: any = null; + + if (uniqueNodes) { + for (const node of uniqueNodes.values()) { + if (node.isRoot) { + tempRoot = node; + break; + } + } + } + + const fallbackInsert = () => { + for (const [origIdx, context] of unmatchedInfo) { + const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens); + requestIds.push(reqId); + searchPaths.push(newPath); + } + }; + + if (!tempRoot || this.rootId === null) { + fallbackInsert(); + return [requestIds, searchPaths]; + } + + const globalRoot = this.nodes.get(this.rootId); + if (!globalRoot) { + fallbackInsert(); + return [requestIds, searchPaths]; + } + + const nodeIdMap = new Map(); + const baseChildIdx = globalRoot.children.length; + + for (let childIdx = 0; childIdx < tempRoot.children.length; childIdx++) { + const tempChildId = tempRoot.children[childIdx]; + const newChildIdx = baseChildIdx + childIdx; + this._copySubtree( + uniqueNodes, + tempChildId, + this.rootId, + nodeIdMap, + initialTokens, + [newChildIdx] + ); + } + + for (let i = 0; i < unmatchedInfo.length; i++) { + const [origIdx, context] = unmatchedInfo[i]; + let tempLeafId: number | null = null; + + for (const [nodeId, node] of uniqueNodes.entries()) { + if (node.isLeaf && node.originalIndices && node.originalIndices.has(i)) { + tempLeafId = nodeId; + break; + } + } + + if (tempLeafId !== null && nodeIdMap.has(tempLeafId)) { + const newNodeId = nodeIdMap.get(tempLeafId)!; + if (this.metadata.has(newNodeId)) { + const meta = this.metadata.get(newNodeId)!; + requestIds.push(meta.requestId!); + searchPaths.push(meta.searchPath); + continue; + } + } + + const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens); + requestIds.push(reqId); + searchPaths.push(newPath); + } + + return [requestIds, searchPaths]; + } + + _copySubtree(sourceNodes: Map, sourceNodeId: number, parentId: number, + nodeIdMap: Map, initialTokens: number, searchPath: number[]): void { + const sourceNode = sourceNodes.get(sourceNodeId); + if (!sourceNode) return; + + const newNodeId = this.nextNodeId++; + const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []); + const originalIndices: Set = sourceNode.originalIndices ? new Set(sourceNode.originalIndices) : new Set(); + + const newNode = new ClusterNode( + newNodeId, + new Set(content), + originalIndices, + 0.0, + [], + parentId + ); + + if (sourceNode.docIds) { + newNode.docIds = [...sourceNode.docIds]; + } + + this.nodes.set(newNodeId, newNode); + nodeIdMap.set(sourceNodeId, newNodeId); + + const parentNode = this.nodes.get(parentId); + if (parentNode) { + parentNode.addChild(newNodeId); + } + + const isLeaf = sourceNode.isLeaf || sourceNode.is_leaf; + const requestId = isLeaf ? `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}` : null; + + const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0; + + const metadata = new NodeMetadata(newNodeId, { + totalTokens: isLeaf ? initialTokens : 0, + extraTokens: isLeaf ? Math.max(0, initialTokens - parentTokens) : 0, + searchPath, + docIds: sourceNode.docIds ? [...sourceNode.docIds] : null, + isLeaf, + requestId, + }); + + this.metadata.set(newNodeId, metadata); + + if (isLeaf && requestId) { + this._requestToNode.set(requestId, newNodeId); + } + + if (sourceNode.children) { + for (let childIdx = 0; childIdx < sourceNode.children.length; childIdx++) { + const childId = sourceNode.children[childIdx]; + const childSearchPath = [...searchPath, childIdx]; + this._copySubtree( + sourceNodes, childId, newNodeId, + nodeIdMap, initialTokens, childSearchPath + ); + } + } + } + + _scheduleIncremental(contextInfo: any[]): number[] { + const groups = new Map(); + + for (const [ctxIdx, reqId, path] of contextInfo) { + const groupKey = path && path.length > 0 ? path[0] : -1; + if (!groups.has(groupKey)) { + groups.set(groupKey, []); + } + groups.get(groupKey)!.push({ ctxIdx, len: path ? path.length : 0 }); + } + + const scheduled: number[] = []; + const sortedKeys = Array.from(groups.keys()).sort((a, b) => a - b); + + for (const groupKey of sortedKeys) { + const items = groups.get(groupKey)!; + items.sort((a, b) => b.len - a.len); + scheduled.push(...items.map(item => item.ctxIdx)); + } + + return scheduled; + } + + _groupByPathPrefix(contextInfo: any[]): [number, number[]][] { + const groups = new Map(); + + for (const [ctxIdx, reqId, path] of contextInfo) { + const groupKey = path && path.length > 0 ? path[0] : -1; + if (!groups.has(groupKey)) { + groups.set(groupKey, []); + } + groups.get(groupKey)!.push(ctxIdx); + } + + const result: [number, number[]][] = []; + for (const [groupKey, indices] of groups.entries()) { + result.push([indices.length, indices]); + } + + result.sort((a, b) => b[0] - a[0]); + return result; + } + + scheduleOnly(contexts: number[][]): any { + const result = this.fitTransform(contexts); + + const [scheduledReordered, scheduledOriginals, finalMapping, groups] = + this.interScheduler.scheduleContexts(result); + + return { + reordered_contexts: scheduledReordered, + original_indices: finalMapping, + scheduled_originals: scheduledOriginals, + groups: groups, + stats: { + total_nodes: result.stats?.total_nodes || result.stats?.totalNodes, + leaf_nodes: result.stats?.leaf_nodes || result.stats?.leafNodes, + num_contexts: contexts.length, + num_groups: groups.length + } + }; + } + + _initializeLiveMetadata(initialTokensPerContext: number, numInputContexts?: number): [Record, (string | null)[]] { + if (!this.initialResult) { + throw new Error("Must call fitTransform() before initializing metadata"); + } + + const uniqueNodes = this.initialResult.unique_nodes || this.initialResult.uniqueNodes; + const reorderedContexts = this.initialResult.reordered_contexts || this.initialResult.reorderedContexts; + const requestIdMapping: Record = {}; + + this.nodes = uniqueNodes; + + for (const [nodeId, node] of uniqueNodes.entries()) { + if (node.isRoot || node.is_root) { + this.rootId = nodeId; + break; + } + } + + this.nextNodeId = uniqueNodes.size > 0 ? Math.max(...Array.from(uniqueNodes.keys())) + 1 : 0; + let leafCounter = 0; + const originalIndexToRequestId = new Map(); + + for (const [nodeId, node] of uniqueNodes.entries()) { + const searchPath = this._computeSearchPath(nodeId); + const isLeaf = node.isLeaf || node.is_leaf; + + let totalTokens = 0; + let requestId: string | null = null; + + if (isLeaf) { + totalTokens = initialTokensPerContext; + requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + leafCounter++; + + if (node.originalIndices || node.original_indices) { + const indices = node.originalIndices || node.original_indices; + for (const origIdx of indices) { + originalIndexToRequestId.set(origIdx, requestId); + } + } + } + + let parentTokens = 0; + if (node.parent !== null && this.metadata.has(node.parent)) { + parentTokens = this.metadata.get(node.parent)!.totalTokens; + } + const extraTokens = Math.max(0, totalTokens - parentTokens); + + let leafDocIds: number[] | null = null; + if (isLeaf && (node.originalIndices || node.original_indices)) { + const indices = Array.from((node.originalIndices || node.original_indices) as Set); + if (indices.length > 0) { + const firstOrigIdx = Math.min(...indices); + if (reorderedContexts && firstOrigIdx < reorderedContexts.length) { + leafDocIds = reorderedContexts[firstOrigIdx]; + } else { + leafDocIds = node.docIds || node.doc_ids; + } + } + } else { + leafDocIds = node.docIds || node.doc_ids; + } + + const metadata = new NodeMetadata(nodeId, { + totalTokens, + extraTokens, + searchPath, + docIds: leafDocIds, + isLeaf, + requestId, + }); + + this.metadata.set(nodeId, metadata); + + if (isLeaf && requestId) { + this._requestToNode.set(requestId, nodeId); + requestIdMapping[requestId] = nodeId; + } + } + + this.nextNodeId = this.nodes.size > 0 ? Math.max(...Array.from(this.nodes.keys())) + 1 : 0; + this._nextRequestCounter = leafCounter; + + const numContexts = numInputContexts !== undefined ? numInputContexts : originalIndexToRequestId.size; + const requestIdsOrdered: (string | null)[] = []; + + for (let i = 0; i < numContexts; i++) { + requestIdsOrdered.push(originalIndexToRequestId.get(i) || null); + } + + return [requestIdMapping, requestIdsOrdered]; + } + + trackRequest(requestId: string): void { + if (!this._requestToNode.has(requestId)) { + this._requestToNode.set(requestId, null); + } + } + + removeRequests(requestIds: Set): any { + const evictedNodes: number[] = []; + const notFound: string[] = []; + + for (const requestId of requestIds) { + if (!this._requestToNode.has(requestId)) { + notFound.push(requestId); + continue; + } + + const nodeId = this._requestToNode.get(requestId); + this._requestToNode.delete(requestId); + + if (nodeId !== null && nodeId !== undefined) { + evictedNodes.push(nodeId); + this._removeNodeAndPrune(nodeId); + } + } + + this.liveStats.totalEvictions += evictedNodes.length; + + const arrayReqs = Array.from(requestIds); + return { + removed_count: evictedNodes.length, + evicted_node_ids: evictedNodes, + evicted_request_ids: arrayReqs.filter(id => !notFound.includes(id)), + not_found: notFound, + nodes_remaining: this.nodes.size, + requests_remaining: this._requestToNode.size + }; + } + + removeRequestById(requestId: string): boolean { + const result = this.removeRequests(new Set([requestId])); + return result.evicted_node_ids.length > 0; + } + + getRequestNode(requestId: string): number | null { + return this._requestToNode.get(requestId) ?? null; + } + + _collectAllNodeDocs(): [number[], number[][], Record] { + const nodeIds: number[] = []; + const nodeDocsList: number[][] = []; + const nodeIdToPath: Record = {}; + + if (this.rootId === null) return [nodeIds, nodeDocsList, nodeIdToPath]; + + const queue: [number, number[]][] = [[this.rootId, []]]; + + while (queue.length > 0) { + const [nodeId, path] = queue.shift()!; + + if (!this.nodes.has(nodeId)) continue; + + const node = this.nodes.get(nodeId)!; + const nodeMeta = this.metadata.get(nodeId); + + let docs: number[] | null = null; + if (nodeMeta && nodeMeta.docIds) { + docs = nodeMeta.docIds; + } else if (node.docIds) { + docs = node.docIds; + } + + if (docs) { + nodeIds.push(nodeId); + nodeDocsList.push(docs); + nodeIdToPath[nodeId] = path; + } + + if (!node.isLeaf && node.children) { + for (let idx = 0; idx < node.children.length; idx++) { + queue.push([node.children[idx], [...path, idx]]); + } + } + } + + return [nodeIds, nodeDocsList, nodeIdToPath]; + } + + _getNodeDocs(nodeId: number): number[] | null { + const meta = this.metadata.get(nodeId); + if (meta && meta.docIds) return meta.docIds; + const node = this.nodes.get(nodeId); + if (node && node.docIds) return node.docIds; + return null; + } + + _searchSingleHierarchical(context: number[]): [number[], number, number, boolean] { + const contextSet = new Set(context); + let currentId = this.rootId; + let currentPath: number[] = []; + + while (true) { + if (currentId === null) return [[], -1, 0, false]; + const currentNode = this.nodes.get(currentId); + + if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) { + const docs = this._getNodeDocs(currentId); + if (docs && currentId !== this.rootId) { + const docsSet = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length; + const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false; + return [currentPath, currentId, overlap, hasPrefix]; + } + return [[], -1, 0, false]; + } + + const childIds: number[] = []; + const childDocsList: number[][] = []; + const childIndices: number[] = []; + + for (let idx = 0; idx < currentNode.children.length; idx++) { + const childId = currentNode.children[idx]; + const docs = this._getNodeDocs(childId); + if (docs) { + childIds.push(childId); + childDocsList.push(docs); + childIndices.push(idx); + } + } + + if (childIds.length === 0) return [[], -1, 0, false]; + + const distances = computeDistancesBatch([context], childDocsList, this.alpha); + + let bestJ = -1; + let bestDistance = Infinity; + let bestOverlap = 0; + + for (let j = 0; j < childIds.length; j++) { + const docs = childDocsList[j]; + const docsSet = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length; + if (overlap === 0) continue; + + const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j]; + + if (dist < bestDistance) { + bestDistance = dist; + bestOverlap = overlap; + bestJ = j; + } + } + + if (bestJ < 0) { + if (currentId !== this.rootId) { + const docs = this._getNodeDocs(currentId); + if (docs) { + const docsSet2 = new Set(docs); + const overlap = Array.from(contextSet).filter(x => docsSet2.has(x)).length; + return [currentPath, currentId, overlap, true]; + } + } + return [[], -1, 0, false]; + } + + const bestChildId = childIds[bestJ]; + const bestChildIdx = childIndices[bestJ]; + const bestDocs = childDocsList[bestJ]; + const childPath = [...currentPath, bestChildIdx]; + + if (contextSet.has(bestDocs[0])) { + const bestChildNode = this.nodes.get(bestChildId); + if (bestChildNode && !bestChildNode.isLeaf && bestChildNode.children && bestChildNode.children.length > 0) { + currentId = bestChildId; + currentPath = childPath; + continue; + } else { + return [childPath, bestChildId, bestOverlap, true]; + } + } else { + return [childPath, bestChildId, bestOverlap, false]; + } + } + } + + searchBatch(contexts: number[][]): [number[], number, number, boolean][] { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + if (this.rootId === null || contexts.length === 0) { + return contexts.map(() => [[], -1, 0, false]); + } + + const results = contexts.map(ctx => this._searchSingleHierarchical(ctx)); + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + const elapsedUs = (endTime - startTime) * 1000; + + this.liveStats.totalSearches += contexts.length; + this.liveStats.totalSearchTimeUs += elapsedUs; + + return results; + } + + search(context: number[], updateAccess: boolean = true): [number[], number, number, boolean] { + const results = this.searchBatch([context]); + const [searchPath, nodeId, overlap, hasPrefix] = results[0]; + + if (updateAccess && nodeId >= 0 && this.metadata.has(nodeId)) { + this.metadata.get(nodeId)!.updateAccessTime(); + } + + return [searchPath, nodeId, overlap, hasPrefix]; + } + + traverse(searchPath: number[]): ClusterNode | null { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + if (this.rootId === null) return null; + + let currentId = this.rootId; + + for (const childIdx of searchPath) { + if (!this.nodes.has(currentId)) return null; + + const currentNode = this.nodes.get(currentId)!; + + if (!currentNode.children || childIdx >= currentNode.children.length) { + return null; + } + + currentId = currentNode.children[childIdx]; + } + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + const elapsedUs = (endTime - startTime) * 1000; + this.liveStats.totalTraversalTimeUs += elapsedUs; + + return this.nodes.get(currentId) || null; + } + + insert(context: number[], searchPath: number[], totalTokens: number = 0): [number, number[], string] { + const startTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + + let matchedNode = this.traverse(searchPath); + + if (!matchedNode) { + matchedNode = this.nodes.get(this.rootId!)!; + searchPath = []; + } + + let newNodeId: number, newSearchPath: number[], requestId: string; + + if (matchedNode.isLeaf) { + [newNodeId, newSearchPath, requestId] = this._insertAtLeaf( + context, matchedNode, searchPath, totalTokens + ); + } else { + [newNodeId, newSearchPath, requestId] = this._insertAtInternal( + context, matchedNode, searchPath, totalTokens + ); + } + + const endTime = globalThis.performance ? globalThis.performance.now() : Date.now(); + this.liveStats.totalInsertions += 1; + + return [newNodeId, newSearchPath, requestId]; + } + + _insertAtInternal(context: number[], parentNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + + const newNodeId = this.nextNodeId++; + const newNode = new ClusterNode( + newNodeId, + new Set(context), + new Set([newNodeId]), + 0.0, + [], + parentNode.nodeId + ); + + this.nodes.set(newNodeId, newNode); + parentNode.addChild(newNodeId); + + const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; + const newSearchPath = [...searchPath, parentNode.children.length - 1]; + + const metadata = new NodeMetadata(newNodeId, { + totalTokens, + extraTokens: Math.max(0, totalTokens - parentTokens), + searchPath: newSearchPath, + docIds: context, + isLeaf: true, + requestId, + }); + + this.metadata.set(newNodeId, metadata); + this._requestToNode.set(requestId, newNodeId); + + return [newNodeId, newSearchPath, requestId]; + } + + _insertAtLeaf(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + + let parentNode: ClusterNode; + let parentSearchPath: number[]; + + if (leafNode.parent === null) { + parentNode = this.nodes.get(this.rootId!)!; + parentSearchPath = []; + } else { + parentNode = this.nodes.get(leafNode.parent)!; + parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : []; + } + + const newLeafId = this.nextNodeId++; + const newLeaf = new ClusterNode( + newLeafId, + new Set(context), + new Set([newLeafId]), + 0.0, + [], + parentNode.nodeId + ); + + this.nodes.set(newLeafId, newLeaf); + parentNode.addChild(newLeafId); + + const newSearchPath = [...parentSearchPath, parentNode.children.length - 1]; + const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0; + + const newMetadata = new NodeMetadata(newLeafId, { + totalTokens, + extraTokens: Math.max(0, totalTokens - parentTokens), + searchPath: newSearchPath, + docIds: context, + isLeaf: true, + requestId, + }); + + this.metadata.set(newLeafId, newMetadata); + this._requestToNode.set(requestId, newLeafId); + + return [newLeafId, newSearchPath, requestId]; + } + + _splitLeafAndInsert(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] { + const matchedDocs = this._getNodeDocs(leafNode.nodeId); + + if (!matchedDocs) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + const sharedPrefix: number[] = []; + for (let i = 0; i < Math.min(matchedDocs.length, context.length); i++) { + if (matchedDocs[i] === context[i]) { + sharedPrefix.push(matchedDocs[i]); + } else { + break; + } + } + + if (sharedPrefix.length === 0) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + if (sharedPrefix.length === matchedDocs.length && new Set(matchedDocs).size === new Set(context).size && + [...new Set(matchedDocs)].every(d => new Set(context).has(d))) { + return this._insertAtLeaf(context, leafNode, searchPath, totalTokens); + } + + let parentId = leafNode.parent; + if (parentId === null) { + parentId = this.rootId!; + } + const parentNode = this.nodes.get(parentId)!; + const parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : []; + + const leafChildIdx = parentNode.children.indexOf(leafNode.nodeId); + + const newInternalId = this.nextNodeId++; + const allContent = new Set([...leafNode.content, ...context]); + + const newInternal = new ClusterNode( + newInternalId, + allContent, + new Set(), + 0.0, + [leafNode.nodeId], + parentId + ); + newInternal.docIds = [...sharedPrefix]; + + this.nodes.set(newInternalId, newInternal); + + parentNode.children[leafChildIdx] = newInternalId; + leafNode.parent = newInternalId; + + const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0; + const leafMeta = this.metadata.get(leafNode.nodeId); + const leafTotal = leafMeta ? leafMeta.totalTokens : 0; + + let internalTokens = parentTokens; + if (matchedDocs && matchedDocs.length > 0) { + const prefixRatio = sharedPrefix.length / matchedDocs.length; + internalTokens = Math.floor(parentTokens + (leafTotal - parentTokens) * prefixRatio); + } + + const internalPath = [...parentSearchPath, leafChildIdx]; + + const internalMeta = new NodeMetadata(newInternalId, { + totalTokens: internalTokens, + extraTokens: Math.max(0, internalTokens - parentTokens), + searchPath: internalPath, + docIds: [...sharedPrefix], + isLeaf: false, + requestId: null, + }); + this.metadata.set(newInternalId, internalMeta); + + if (leafMeta) { + leafMeta.extraTokens = Math.max(0, leafTotal - internalTokens); + leafMeta.searchPath = [...internalPath, 0]; + } + + const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`; + const newLeafId = this.nextNodeId++; + + const newLeaf = new ClusterNode( + newLeafId, + new Set(context), + new Set([newLeafId]), + 0.0, + [], + newInternalId + ); + newLeaf.docIds = [...context]; + + this.nodes.set(newLeafId, newLeaf); + newInternal.addChild(newLeafId); + + const newLeafPath = [...internalPath, 1]; + + const newLeafMeta = new NodeMetadata(newLeafId, { + totalTokens, + extraTokens: Math.max(0, totalTokens - internalTokens), + searchPath: newLeafPath, + docIds: [...context], + isLeaf: true, + requestId, + }); + + this.metadata.set(newLeafId, newLeafMeta); + this._requestToNode.set(requestId, newLeafId); + + return [newLeafId, newLeafPath, requestId]; + } + + updateNode(searchPath: number[], tokenDelta: number): boolean { + const node = this.traverse(searchPath); + + if (!node || !this.metadata.has(node.nodeId)) { + return false; + } + + const metadata = this.metadata.get(node.nodeId)!; + + if (tokenDelta > 0) { + metadata.addTokens(tokenDelta); + } else { + metadata.removeTokens(Math.abs(tokenDelta)); + } + + return true; + } + + _removeNode(nodeId: number): void { + this._removeNodeAndPrune(nodeId); + } + + _removeNodeAndPrune(nodeId: number): number { + if (!this.nodes.has(nodeId)) { + return 0; + } + + let nodesPruned = 0; + const node = this.nodes.get(nodeId)!; + const parentId = node.parent; + + if (parentId !== null && this.nodes.has(parentId)) { + const parent = this.nodes.get(parentId)!; + const idx = parent.children.indexOf(nodeId); + if (idx > -1) { + parent.children.splice(idx, 1); + } + + if (parent.children.length === 0 && !parent.isRoot) { + nodesPruned += 1; + nodesPruned += this._removeNodeAndPrune(parentId); + } + } + + this.nodes.delete(nodeId); + + if (this.metadata.has(nodeId)) { + this.metadata.delete(nodeId); + } + + return nodesPruned; + } + + _computeSearchPath(nodeId: number): number[] { + if (nodeId === this.rootId) return []; + + const path: number[] = []; + let currentId: number | null = nodeId; + const visited = new Set(); + + while (currentId !== this.rootId && currentId !== null) { + if (visited.has(currentId)) break; + visited.add(currentId); + + const node = this.nodes.get(currentId); + if (!node || node.parent === null) break; + + const parent = this.nodes.get(node.parent); + if (!parent) break; + + const childIdx = parent.children.indexOf(currentId); + if (childIdx === -1) break; + + path.push(childIdx); + currentId = node.parent; + } + + return path.reverse(); + } + + _findCommonPrefix(list1: number[], list2: number[]): number[] { + const prefix: number[] = []; + const minLen = Math.min(list1.length, list2.length); + for (let i = 0; i < minLen; i++) { + if (list1[i] === list2[i]) { + prefix.push(list1[i]); + } else { + break; + } + } + return prefix; + } + + getStats(): any { + const avgSearchTime = this.liveStats.totalSearches > 0 + ? this.liveStats.totalSearchTimeUs / this.liveStats.totalSearches + : 0; + + let totalTokens = 0; + for (const meta of this.metadata.values()) { + totalTokens += meta.extraTokens; + } + + return { + num_nodes: this.nodes.size, + active_nodes: this.metadata.size, + total_tokens: totalTokens, + num_requests: this._requestToNode.size, + total_searches: this.liveStats.totalSearches, + total_insertions: this.liveStats.totalInsertions, + total_removals: this.liveStats.totalRemovals, + avg_search_time_us: avgSearchTime + }; + } +} diff --git a/openclaw-plugin/src/engine/metadata.ts b/openclaw-plugin/src/engine/metadata.ts new file mode 100644 index 0000000..d2bf562 --- /dev/null +++ b/openclaw-plugin/src/engine/metadata.ts @@ -0,0 +1,82 @@ +export interface NodeMetadataInit { + totalTokens?: number; + extraTokens?: number; + lastAccessTime?: number; + searchPath?: number[]; + isActive?: boolean; + isLeaf?: boolean; + docIds?: number[] | null; + requestId?: string | null; +} + +export class NodeMetadata { + nodeId: number; + totalTokens: number; + extraTokens: number; + lastAccessTime: number; + searchPath: number[]; + isActive: boolean; + isLeaf: boolean; + docIds: number[] | null; + requestId: string | null; + + constructor(nodeId: number, init: NodeMetadataInit = {}) { + this.nodeId = nodeId; + this.totalTokens = init.totalTokens ?? 0; + this.extraTokens = init.extraTokens ?? 0; + this.lastAccessTime = init.lastAccessTime ?? Date.now() / 1000; + this.searchPath = init.searchPath ?? []; + this.isActive = init.isActive ?? true; + this.isLeaf = init.isLeaf ?? false; + this.docIds = init.docIds ?? null; + this.requestId = init.requestId ?? null; + } + + updateAccessTime(): void { + this.lastAccessTime = Date.now() / 1000; + } + + addTokens(delta: number): void { + this.totalTokens += delta; + this.extraTokens += delta; + this.updateAccessTime(); + } + + removeTokens(delta: number): number { + if (delta <= 0) { + return 0; + } + + let tokensRemoved = Math.min(delta, this.extraTokens); + this.extraTokens -= tokensRemoved; + this.totalTokens -= tokensRemoved; + + const remaining = delta - tokensRemoved; + if (remaining > 0) { + const actualRemoved = Math.min(remaining, this.totalTokens); + this.totalTokens -= actualRemoved; + tokensRemoved += actualRemoved; + } + + return tokensRemoved; + } + + isEmpty(): boolean { + return this.totalTokens <= 0; + } + + lessThan(other: NodeMetadata): boolean { + return this.lastAccessTime < other.lastAccessTime; + } + + toString(): string { + const req = this.requestId ? `, request_id=${this.requestId}` : ""; + return ( + `NodeMetadata(id=${this.nodeId}, ` + + `total_tokens=${this.totalTokens}, ` + + `extra_tokens=${this.extraTokens}, ` + + `is_leaf=${this.isLeaf}${req}, ` + + `active=${this.isActive})` + ); + } +} diff --git a/openclaw-plugin/src/engine/reorder.ts b/openclaw-plugin/src/engine/reorder.ts new file mode 100644 index 0000000..9b4d90f --- /dev/null +++ b/openclaw-plugin/src/engine/reorder.ts @@ -0,0 +1,109 @@ +import * as crypto from 'node:crypto'; + +interface IndexedDoc { + doc: string; + hash: string; + originalIndex: number; + previousPosition: number; +} + +function hashDoc(doc: string): string { + return crypto.createHash('sha256').update(doc.trim()).digest('hex').slice(0, 16); +} + +function buildIndexMappings(entries: IndexedDoc[], total: number): [number[], number[]] { + const originalOrder = entries.map((entry) => entry.originalIndex); + + const newOrder = new Array(total); + for (let newIndex = 0; newIndex < entries.length; newIndex += 1) { + newOrder[entries[newIndex].originalIndex] = newIndex; + } + + return [originalOrder, newOrder]; +} + +function indexDocuments(docs: string[]): IndexedDoc[] { + return docs.map((doc, originalIndex) => ({ + doc, + hash: hashDoc(doc), + originalIndex, + previousPosition: Number.POSITIVE_INFINITY + })); +} + +export function reorderDocuments(docs: string[]): [string[], number[], number[]] { + const indexed = indexDocuments(docs); + indexed.sort((a, b) => { + const byHash = a.hash.localeCompare(b.hash); + if (byHash !== 0) { + return byHash; + } + return a.originalIndex - b.originalIndex; + }); + + const reorderedDocs = indexed.map((entry) => entry.doc); + const [originalOrder, newOrder] = buildIndexMappings(indexed, docs.length); + return [reorderedDocs, originalOrder, newOrder]; +} + +export class ReorderState { + private previousOrder: string[] = []; + + private hashToDoc: Map = new Map(); + + reorder(docs: string[]): [string[], number[], number[]] { + const indexed = indexDocuments(docs); + const previousPositions = new Map(); + + for (let i = 0; i < this.previousOrder.length; i += 1) { + const hash = this.previousOrder[i]; + if (!previousPositions.has(hash)) { + previousPositions.set(hash, i); + } + } + + const known: IndexedDoc[] = []; + const unknown: IndexedDoc[] = []; + + for (const entry of indexed) { + const previousPosition = previousPositions.get(entry.hash); + if (previousPosition === undefined) { + unknown.push(entry); + continue; + } + + known.push({ ...entry, previousPosition }); + } + + known.sort((a, b) => { + if (a.previousPosition !== b.previousPosition) { + return a.previousPosition - b.previousPosition; + } + return a.originalIndex - b.originalIndex; + }); + + unknown.sort((a, b) => { + const byHash = a.hash.localeCompare(b.hash); + if (byHash !== 0) { + return byHash; + } + return a.originalIndex - b.originalIndex; + }); + + const reordered = [...known, ...unknown]; + + this.previousOrder = reordered.map((entry) => entry.hash); + for (const entry of reordered) { + this.hashToDoc.set(entry.hash, entry.doc); + } + + const reorderedDocs = reordered.map((entry) => entry.doc); + const [originalOrder, newOrder] = buildIndexMappings(reordered, docs.length); + return [reorderedDocs, originalOrder, newOrder]; + } + + reset(): void { + this.previousOrder = []; + this.hashToDoc.clear(); + } +} diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts new file mode 100644 index 0000000..e7b3c7a --- /dev/null +++ b/openclaw-plugin/src/engine/tree-nodes.ts @@ -0,0 +1,338 @@ +export class ClusterNode { + nodeId: number; + content: Set; + originalIndices: Set; + distance: number; + children: number[]; + parent: number | null; + frequency: number; + mergeDistance: number; + searchPath: number[]; + + constructor( + nodeId: number, + content: Set, + originalIndices: Set = new Set([nodeId]), + distance: number = 0.0, + children: number[] = [], + parent: number | null = null, + frequency: number = 1 + ) { + this.nodeId = nodeId; + this.content = content instanceof Set ? new Set(content) : new Set(content); + this.originalIndices = originalIndices; + this.distance = distance; + this.children = children; + this.parent = parent; + this.frequency = frequency; + this.mergeDistance = distance; + this.searchPath = []; + } + + get isLeaf(): boolean { + return !Array.isArray(this.children) || this.children.length === 0; + } + + get isRoot(): boolean { + return this.parent === null; + } + + get isEmpty(): boolean { + return this.content.size === 0; + } + + get docIds(): number[] { + return Array.from(this.content).sort((a, b) => a - b); + } + + set docIds(value: number[]) { + this.content = new Set(value); + } + + addChild(childId: number): void { + // Defensive: ensure children is an array + if (!Array.isArray(this.children)) { + this.children = []; + } + if (!this.children.includes(childId) && childId !== this.nodeId) { + this.children.push(childId); + } + } + + removeChild(childId: number): void { + const idx = this.children.indexOf(childId); + if (idx !== -1) { + this.children.splice(idx, 1); + } + } + + updateFrequency(additionalFrequency: number): void { + this.frequency += additionalFrequency; + } + + mergeWith(otherNode: ClusterNode): void { + this.content = new Set(Array.from(this.content).filter((v) => otherNode.content.has(v))); + this.originalIndices = new Set([...this.originalIndices, ...otherNode.originalIndices]); + this.frequency += otherNode.frequency; + } + + getDepth(): number { + return this.searchPath.length; + } +} + +export interface NodeStats { + totalNodes: number; + leafNodes: number; + rootNodes: number; + internalNodes: number; +} + +export class NodeManager { + clusterNodes: Map; + uniqueNodes: Map; + redirects: Map; + contentToNodeId: Map; + + constructor() { + this.clusterNodes = new Map(); + this.uniqueNodes = new Map(); + this.redirects = new Map(); + this.contentToNodeId = new Map(); + } + + private contentKey(content: Set): string { + return Array.from(content).sort((a, b) => a - b).join(','); + } + + createLeafNode(nodeId: number, promptContent: Iterable): ClusterNode { + const contentSet = promptContent instanceof Set ? new Set(promptContent) : new Set(promptContent); + const key = this.contentKey(contentSet); + + const canonicalId = this.contentToNodeId.get(key); + if (canonicalId !== undefined) { + const canonicalNode = this.uniqueNodes.get(canonicalId); + if (!canonicalNode) { + throw new Error(`Missing canonical leaf node for id ${canonicalId}`); + } + + canonicalNode.updateFrequency(1); + canonicalNode.originalIndices.add(nodeId); + + this.redirects.set(nodeId, canonicalId); + this.clusterNodes.set(nodeId, canonicalNode); + return canonicalNode; + } + + const node = new ClusterNode(nodeId, contentSet); + this.clusterNodes.set(nodeId, node); + this.uniqueNodes.set(nodeId, node); + this.contentToNodeId.set(key, nodeId); + return node; + } + + createInternalNode( + nodeId: number, + child1Id: number, + child2Id: number, + distance: number + ): ClusterNode { + const canonicalChild1Id = this.redirects.get(child1Id) ?? child1Id; + const canonicalChild2Id = this.redirects.get(child2Id) ?? child2Id; + + if (canonicalChild1Id === canonicalChild2Id) { + this.redirects.set(nodeId, canonicalChild1Id); + const canonicalNode = this.uniqueNodes.get(canonicalChild1Id); + if (!canonicalNode) { + throw new Error(`Missing canonical child node for id ${canonicalChild1Id}`); + } + this.clusterNodes.set(nodeId, canonicalNode); + return canonicalNode; + } + + const child1 = this.uniqueNodes.get(canonicalChild1Id); + const child2 = this.uniqueNodes.get(canonicalChild2Id); + if (!child1 || !child2) { + throw new Error( + `Missing child nodes for internal node ${nodeId}: ${canonicalChild1Id}, ${canonicalChild2Id}` + ); + } + + const intersectionContent = new Set( + Array.from(child1.content).filter((v) => child2.content.has(v)) + ); + const key = this.contentKey(intersectionContent); + + const existingId = this.contentToNodeId.get(key); + if (existingId !== undefined && intersectionContent.size > 0) { + if (existingId !== canonicalChild1Id && existingId !== canonicalChild2Id) { + const existingNode = this.uniqueNodes.get(existingId); + if (!existingNode) { + throw new Error(`Missing existing node for id ${existingId}`); + } + + existingNode.addChild(canonicalChild1Id); + existingNode.addChild(canonicalChild2Id); + existingNode.frequency = Math.max( + existingNode.frequency, + child1.frequency + child2.frequency + ); + existingNode.originalIndices = new Set([ + ...existingNode.originalIndices, + ...child1.originalIndices, + ...child2.originalIndices + ]); + + child1.parent = existingId; + child2.parent = existingId; + + this.redirects.set(nodeId, existingId); + this.clusterNodes.set(nodeId, existingNode); + return existingNode; + } + } + + const combinedIndices = new Set([...child1.originalIndices, ...child2.originalIndices]); + const node = new ClusterNode( + nodeId, + intersectionContent, + combinedIndices, + distance, + [canonicalChild1Id, canonicalChild2Id], + null, + child1.frequency + child2.frequency + ); + + this.clusterNodes.set(nodeId, node); + this.uniqueNodes.set(nodeId, node); + + if (intersectionContent.size > 0) { + this.contentToNodeId.set(key, nodeId); + } + + child1.parent = nodeId; + child2.parent = nodeId; + + return node; + } + + cleanupEmptyNodes(): void { + const emptyNodeIds = Array.from(this.uniqueNodes.entries()) + .filter(([_, node]) => node.isEmpty) + .map(([nodeId]) => nodeId); + + if (emptyNodeIds.length === 0) { + return; + } + + const sortedEmptyIds = emptyNodeIds.sort((a, b) => b - a); + + for (const emptyId of sortedEmptyIds) { + const emptyNode = this.uniqueNodes.get(emptyId); + if (!emptyNode) { + continue; + } + + const parentId = emptyNode.parent; + const childrenIds = [...emptyNode.children]; + + if (parentId !== null) { + const parentNode = this.uniqueNodes.get(parentId); + if (parentNode) { + parentNode.removeChild(emptyId); + for (const childId of childrenIds) { + if (this.uniqueNodes.has(childId)) { + parentNode.addChild(childId); + } + } + } + } + + for (const childId of childrenIds) { + const childNode = this.uniqueNodes.get(childId); + if (childNode) { + childNode.parent = parentId; + } + } + + this.uniqueNodes.delete(emptyId); + } + + for (const node of this.uniqueNodes.values()) { + if (node.parent !== null && !this.uniqueNodes.has(node.parent)) { + node.parent = null; + } + } + } + + getNodeStats(): NodeStats { + const totalNodes = this.uniqueNodes.size; + let leafNodes = 0; + let rootNodes = 0; + + for (const node of this.uniqueNodes.values()) { + if (node.isLeaf) { + leafNodes += 1; + } + if (node.isRoot) { + rootNodes += 1; + } + } + + return { + totalNodes, + leafNodes, + rootNodes, + internalNodes: totalNodes - leafNodes + }; + } + + updateSearchPaths(): void { + const rootNodes = Array.from(this.uniqueNodes.values()).filter((node) => node.isRoot); + + if (rootNodes.length === 0) { + return; + } + + if (rootNodes.length === 1) { + const root = rootNodes[0]; + root.searchPath = []; + this._updatePathsFromNode(root); + return; + } + + const currentMaxId = Math.max(...Array.from(this.uniqueNodes.keys())); + const virtualRootId = currentMaxId + 1; + const virtualRoot = new ClusterNode( + virtualRootId, + new Set(), + new Set(), + 0.0, + rootNodes.map((node) => node.nodeId), + null, + rootNodes.reduce((sum, node) => sum + node.frequency, 0) + ); + virtualRoot.searchPath = []; + + this.uniqueNodes.set(virtualRootId, virtualRoot); + + for (const node of rootNodes) { + node.parent = virtualRootId; + } + + this._updatePathsFromNode(virtualRoot); + } + + _updatePathsFromNode(node: ClusterNode): void { + for (let childIndex = 0; childIndex < node.children.length; childIndex += 1) { + const childId = node.children[childIndex]; + const childNode = this.uniqueNodes.get(childId); + if (!childNode) { + continue; + } + + childNode.searchPath = [...node.searchPath, childIndex]; + this._updatePathsFromNode(childNode); + } + } +} diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts new file mode 100644 index 0000000..e3736ff --- /dev/null +++ b/openclaw-plugin/src/index.ts @@ -0,0 +1,218 @@ +import { Type } from "@sinclair/typebox"; +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { delegateCompactionToRuntime } from "openclaw/plugin-sdk/core"; + +import { injectCacheControl } from "./engine/cache-control.js"; +import { dedupChatCompletions } from "./engine/dedup.js"; +import { getFormatHandler, type InterceptConfig } from "./engine/extract.js"; +import { ContextPilot } from "./engine/live-index.js"; + +type Scope = "all" | "system" | "tool_results"; + +function parseScope(value: unknown): Scope { + if (value === "system" || value === "tool_results" || value === "all") { + return value; + } + return "all"; +} + +function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] { + const [reordered] = engine.reorder(docs); + if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) { + return docs; + } + const candidate = reordered[0]; + if (!candidate.every((entry) => typeof entry === "string")) { + return docs; + } + return candidate as string[]; +} + +interface Message { + role: string; + content: unknown; +} + +export default definePluginEntry({ + id: "contextpilot", + name: "ContextPilot", + description: "Optimizes context via reordering, deduplication, and cache control injection.", + register: (api) => { + const config = { + scope: parseScope(api.pluginConfig?.scope), + }; + + // Initialize the ContextPilot engine for reordering + const engine = new ContextPilot(0.001, false, "average"); + + let assembleCount = 0; + let totalCharsSaved = 0; + + // Register as a Context Engine - this intercepts context assembly + api.registerContextEngine("contextpilot", () => ({ + info: { + id: "contextpilot", + name: "ContextPilot", + ownsCompaction: false, + }, + + async ingest() { + return { ingested: true }; + }, + + async assemble({ messages, system }: { messages: Message[]; system?: string }) { + const interceptConfig: InterceptConfig = { + enabled: true, + mode: "auto", + tag: "document", + separator: "---", + alpha: 0.001, + linkageMethod: "average", + scope: config.scope, + }; + + // OpenClaw uses role: "toolResult" instead of Anthropic's user+tool_result blocks + // Convert to Anthropic format for our extractors + const convertedMessages = messages.map((msg, idx) => { + if (msg.role === "toolResult") { + const content = typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) + ? (msg.content as any[]).map(b => b?.text || "").join("\n") + : ""; + return { + role: "user", + content: [{ + type: "tool_result", + tool_use_id: (msg as any).tool_use_id || (msg as any).toolUseId || `tool_${idx}`, + content: content, + }], + }; + } + return msg; + }); + + const convertedBody: Record = { + messages: convertedMessages, + system: system, + }; + + const handler = getFormatHandler("anthropic_messages"); + const multi = handler.extractAll(convertedBody, interceptConfig); + + const reorderDocs = (docs: string[]): string[] => { + if (docs.length < 2) { + return docs; + } + return reorderWithEngine(engine, docs); + }; + + // Reorder documents in system prompt + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + if (extraction.documents.length >= 2) { + const reordered = reorderDocs(extraction.documents); + handler.reconstructSystem(convertedBody, extraction, reordered, sysIdx); + } + } + + // Reorder documents in tool results + for (const [extraction, location] of multi.toolExtractions) { + if (extraction.documents.length >= 2) { + const reordered = reorderDocs(extraction.documents); + handler.reconstructToolResult(convertedBody, extraction, reordered, location); + } + } + + // Map converted messages back to original format (toolResult role) + const finalMessages = (convertedBody.messages as any[]).map((msg, idx) => { + const original = messages[idx]; + if (original?.role === "toolResult") { + const block = Array.isArray(msg.content) ? msg.content[0] : null; + const extractedContent = block?.content; + + if (Array.isArray(original.content)) { + const newContentArray = (original.content as any[]).map(b => { + if (b?.type === "text" && typeof extractedContent === "string") { + return { ...b, text: extractedContent }; + } + return b; + }); + return { ...original, content: newContentArray }; + } else if (typeof extractedContent === "string") { + return { ...original, content: extractedContent }; + } + return original; + } + return msg; + }); + + // Build final body with potentially reordered messages + const finalBody: Record = { + messages: finalMessages, + system: system, + }; + + // Deduplicate repeated content + const dedupResult = dedupChatCompletions(finalBody); + totalCharsSaved += dedupResult.charsSaved; + + // Inject cache control markers + const optimizedBody = injectCacheControl(finalBody, "anthropic"); + + assembleCount++; + + // Log savings periodically (every 5 requests or when significant savings) + if (dedupResult.charsSaved > 0 || assembleCount % 5 === 0) { + const estimatedTokensSaved = Math.round(totalCharsSaved / 4); + const estimatedCostSaved = (estimatedTokensSaved * 0.003 / 1000).toFixed(4); // $3/MTok input + console.error(`[ContextPilot] Stats: ${assembleCount} requests, ${totalCharsSaved.toLocaleString()} chars saved (~${estimatedTokensSaved.toLocaleString()} tokens, ~$${estimatedCostSaved})`); + } + + // Return optimized messages + return { + messages: (optimizedBody.messages as Message[]) || messages, + system: optimizedBody.system as string | undefined, + estimatedTokens: 0, + }; + }, + + async compact(params) { + return await delegateCompactionToRuntime(params); + }, + })); + + // Register status tool + api.registerTool({ + name: "contextpilot_status", + description: "Report ContextPilot engine state", + parameters: Type.Object({}), + async execute(_toolCallId: string, _params: unknown) { + const stats = engine.getStats(); + const lines = [ + "ContextPilot Engine Status:", + ` Scope: ${config.scope}`, + ` Contexts assembled: ${assembleCount}`, + ` Total chars saved: ${totalCharsSaved.toLocaleString()}`, + ` Live index: ${engine.isLive ? "active" : "warming"}`, + ` Nodes: ${Number(stats.num_nodes ?? 0)}`, + ` Active nodes: ${Number(stats.active_nodes ?? 0)}`, + ` Requests tracked: ${Number(stats.num_requests ?? 0)}`, + ` Total searches: ${Number(stats.total_searches ?? 0)}`, + ` Total insertions: ${Number(stats.total_insertions ?? 0)}`, + ` Total removals: ${Number(stats.total_removals ?? 0)}`, + ` Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`, + ]; + + return { + content: [ + { + type: "text" as const, + text: lines.join("\n"), + }, + ], + }; + }, + }); + }, +}); diff --git a/openclaw-plugin/test-e2e.ts b/openclaw-plugin/test-e2e.ts new file mode 100644 index 0000000..6b2f6f6 --- /dev/null +++ b/openclaw-plugin/test-e2e.ts @@ -0,0 +1,188 @@ +#!/usr/bin/env npx tsx +/** + * E2E test for ContextPilot plugin + * + * Run: npx tsx test-e2e.ts + * Requires: ANTHROPIC_API_KEY in environment + */ + +import { ContextPilot } from './src/engine/live-index.js'; +import { getFormatHandler, type InterceptConfig } from './src/engine/extract.js'; +import { injectCacheControl } from './src/engine/cache-control.js'; +import { dedupChatCompletions } from './src/engine/dedup.js'; + +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +if (!ANTHROPIC_API_KEY) { + console.error('Error: ANTHROPIC_API_KEY not set'); + process.exit(1); +} + +// Simulated system prompt with multiple documents (like Claude Code's context) +const systemPromptWithDocs = `You are a helpful coding assistant. + + + +# File: src/index.ts +export function main() { + console.log("Hello world"); + const result = processData(getData()); + return result; +} + +function getData() { + return { items: [1, 2, 3, 4, 5] }; +} + +function processData(data: { items: number[] }) { + return data.items.map(x => x * 2); +} + + +# File: src/utils.ts +export function formatOutput(data: number[]): string { + return data.join(', '); +} + +export function validateInput(input: unknown): boolean { + return Array.isArray(input) && input.every(x => typeof x === 'number'); +} + +export function calculateSum(numbers: number[]): number { + return numbers.reduce((a, b) => a + b, 0); +} + + +# File: README.md +# My Project + +This is a sample project demonstrating the ContextPilot optimization. + +## Installation +npm install + +## Usage +npm start + +## Features +- Data processing +- Input validation +- Output formatting + + + +Answer questions about the code above.`; + +// Build Anthropic Messages API request body +const requestBody = { + model: 'claude-sonnet-4-6', + max_tokens: 256, + system: systemPromptWithDocs, + messages: [ + { + role: 'user', + content: 'What does the main function do? Be brief.' + } + ] +}; + +async function runTest() { + console.log('=== ContextPilot E2E Test ===\n'); + + // 1. Initialize engine + const engine = new ContextPilot(0.001, false, 'average'); + console.log('1. Engine initialized'); + + // 2. Extract documents + const interceptConfig: InterceptConfig = { + enabled: true, + mode: 'auto', + tag: 'document', + separator: '---', + alpha: 0.001, + linkageMethod: 'average', + scope: 'all' + }; + + const body = structuredClone(requestBody); + const handler = getFormatHandler('anthropic_messages'); + const multi = handler.extractAll(body, interceptConfig); + + console.log(`2. Extracted ${multi.totalDocuments} documents from system prompt`); + + // 3. Reorder documents + if (multi.systemExtraction) { + const [extraction, sysIdx] = multi.systemExtraction; + console.log(` Original order: ${extraction.documents.map((_, i) => i).join(', ')}`); + + if (extraction.documents.length >= 2) { + const [reordered] = engine.reorder(extraction.documents); + if (Array.isArray(reordered) && Array.isArray(reordered[0])) { + const reorderedDocs = reordered[0] as string[]; + handler.reconstructSystem(body, extraction, reorderedDocs, sysIdx); + console.log('3. Documents reordered for prefix cache optimization'); + } + } + } + + // 4. Inject cache control + const optimizedBody = injectCacheControl(body, 'anthropic'); + console.log('4. Cache control markers injected'); + + // 5. Show optimization summary + const originalLen = JSON.stringify(requestBody).length; + const optimizedLen = JSON.stringify(optimizedBody).length; + console.log(`\n=== Optimization Summary ===`); + console.log(`Original request size: ${originalLen} chars`); + console.log(`Optimized request size: ${optimizedLen} chars`); + console.log(`Cache control added: ${JSON.stringify(optimizedBody).includes('cache_control')}`); + + // 6. Make real API call + console.log('\n=== Making API Call ==='); + console.log('Calling Anthropic API with optimized request...\n'); + + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': ANTHROPIC_API_KEY, + 'anthropic-version': '2023-06-01', + 'anthropic-beta': 'prompt-caching-2024-07-31' + }, + body: JSON.stringify(optimizedBody) + }); + + const result = await response.json(); + + if (result.error) { + console.error('API Error:', result.error); + process.exit(1); + } + + console.log('=== Response ==='); + console.log('Model:', result.model); + console.log('Stop reason:', result.stop_reason); + console.log('\nAssistant:', result.content?.[0]?.text || '(no text)'); + + console.log('\n=== Usage ==='); + console.log('Input tokens:', result.usage?.input_tokens); + console.log('Output tokens:', result.usage?.output_tokens); + if (result.usage?.cache_creation_input_tokens) { + console.log('Cache creation tokens:', result.usage.cache_creation_input_tokens); + } + if (result.usage?.cache_read_input_tokens) { + console.log('Cache read tokens:', result.usage.cache_read_input_tokens); + } + + console.log('\n=== Engine Stats ==='); + const stats = engine.getStats(); + console.log('Nodes:', stats.num_nodes); + console.log('Active nodes:', stats.active_nodes); + console.log('Total insertions:', stats.total_insertions); + + console.log('\n✓ E2E test complete'); +} + +runTest().catch(err => { + console.error('Test failed:', err); + process.exit(1); +}); diff --git a/openclaw-plugin/tsconfig.json b/openclaw-plugin/tsconfig.json new file mode 100644 index 0000000..017a5f9 --- /dev/null +++ b/openclaw-plugin/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "outDir": "dist", + "declaration": true, + "resolveJsonModule": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/tests/test_http_intercept.py b/tests/test_http_intercept.py index d746595..1b4be7b 100644 --- a/tests/test_http_intercept.py +++ b/tests/test_http_intercept.py @@ -118,18 +118,19 @@ def client(mock_session): original_session = http_mod._aiohttp_session original_url = http_mod._infer_api_url original_intercept_index = http_mod._intercept_index - original_state = http_mod._intercept_state + original_states = http_mod._intercept_states.copy() http_mod._aiohttp_session = mock_session http_mod._infer_api_url = "http://mock-backend:30000" http_mod._intercept_index = None # reset so each test starts fresh - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() try: yield TestClient(app, raise_server_exceptions=False) finally: http_mod._aiohttp_session = original_session http_mod._infer_api_url = original_url http_mod._intercept_index = original_intercept_index - http_mod._intercept_state = original_state + http_mod._intercept_states.clear() + http_mod._intercept_states.update(original_states) # ============================================================================ @@ -146,7 +147,7 @@ def _warmup(client, path, body): resp = client.post(path, json=body) assert resp.status_code == 200 # Keep _intercept_index primed, but reset conversation tracking. - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() return resp @@ -1005,7 +1006,7 @@ def _make_body(marker_id): content1 = mock_session._last_json["messages"][3]["content"] # Reset intercept state for clean comparison - http_mod._intercept_state = http_mod._InterceptConvState() + http_mod._intercept_states.clear() # Request 2 with different id "bbbb" resp2 = client.post("/v1/chat/completions", json=_make_body("cccc2222dddd3333"))