diff --git a/.gitignore b/.gitignore
index b9224d3..1a83c94 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,8 @@ test-cli.ts
 # Ignore all markdown files except README.md and files in doc/docs folders
 *.md
 !README.md
+!README.zh-CN.md
+!ROADMAP.md
 !doc/**/*.md
 !docs/**/*.md
 !tests/**/*.md
diff --git a/README.md b/README.md
index 3754d1e..4f60b0c 100644
--- a/README.md
+++ b/README.md
@@ -1,175 +1,84 @@
 # KODE SDK
 
-> **Stateful Agent Runtime Kernel** - The engine that powers your AI agents with persistence, recovery, and trajectory exploration.
+[English](./README.md) | [中文](./README.zh-CN.md)
 
-```
-                    +------------------+
-                    |   Your App       |  CLI / Desktop / IDE / Server
-                    +--------+---------+
-                             |
-                    +--------v---------+
-                    |    KODE SDK      |  Agent Runtime Kernel
-                    |  +-----------+   |
-                    |  |  Agent    |   |  Lifecycle + State + Events
-                    |  +-----------+   |
-                    |  |  Store    |   |  Persistence (Pluggable)
-                    |  +-----------+   |
-                    |  |  Sandbox  |   |  Execution Isolation
-                    |  +-----------+   |
-                    +------------------+
-```
-
----
-
-## What is KODE SDK?
-
-KODE SDK is an **Agent Runtime Kernel** - think of it like V8 for JavaScript, but for AI agents. It handles the complex lifecycle management so you can focus on building your agent's capabilities.
-
-**Core Capabilities:**
-- **Crash Recovery**: WAL-protected persistence with 7-stage breakpoint recovery
-- **Fork & Resume**: Explore different agent trajectories from any checkpoint
-- **Event Streams**: Progress/Control/Monitor channels for real-time UI updates
-- **Tool Governance**: Permission system, approval workflows, audit trails
-
-**What KODE SDK is NOT:**
-- Not a cloud platform (you deploy it)
-- Not an HTTP server (you add that layer)
-- Not a multi-tenant SaaS framework (you build that on top)
-
----
-
-## When to Use KODE SDK
-
-### Perfect Fit (Use directly)
-
-| Scenario | Why It Works |
-|----------|--------------|
-| **CLI Agent Tools** | Single process, local filesystem, zero config |
-| **Desktop Apps** (Electron/Tauri) | Full system access, long-running process |
-| **IDE Plugins** (VSCode/JetBrains) | Single user, workspace integration |
-| **Local Development** | Fast iteration, instant persistence |
-
-### Good Fit (With architecture)
-
-| Scenario | What You Need |
-|----------|---------------|
-| **Self-hosted Server** | Add HTTP layer (Express/Fastify/Hono) |
-| **Small-scale Backend** (<1K users) | Implement PostgresStore, add user isolation |
-| **Kubernetes Deployment** | Implement distributed Store + locks |
-
-### Needs Custom Architecture
-
-| Scenario | Recommended Approach |
-|----------|---------------------|
-| **Large-scale ToC** (10K+ users) | Worker microservice pattern (see [Architecture Guide](./docs/ARCHITECTURE.md)) |
-| **Serverless** (Vercel/Cloudflare) | API layer on serverless + Worker pool for agents |
-| **Multi-tenant SaaS** | Tenant isolation layer + distributed Store |
+> Event-driven, long-running AI Agent framework with enterprise-grade persistence and multi-agent collaboration.
 
-### Not Designed For
+## Features
 
-| Scenario | Reason |
-|----------|--------|
-| **Pure browser runtime** | No filesystem, no process execution |
-| **Edge functions only** | Agent loops need long-running processes |
-| **Stateless microservices** | Agents are inherently stateful |
+- **Event-Driven Architecture** - Three-channel system (Progress/Control/Monitor) for clean separation of concerns
+- **Long-Running & Resumable** - Seven-stage checkpoints with Safe-Fork-Point for crash recovery
+- **Multi-Agent Collaboration** - AgentPool, Room messaging, and task delegation
+- **Enterprise Persistence** - SQLite/PostgreSQL support with unified WAL
+- **Extensible Ecosystem** - MCP tools, custom Providers, Skills system
 
-> **Rule of Thumb**: If your agents need to run for more than a few seconds, execute tools, and remember state - KODE SDK is for you. If you just need stateless LLM calls, use the provider APIs directly.
+## Quick Start
 
----
-
-## 60-Second Quick Start
+**One-liner setup** (install dependencies and build):
 
 ```bash
-npm install @anthropic/kode-sdk
-
-# Set your API key
-export ANTHROPIC_API_KEY=sk-...
-
-# Run the example
-npx ts-node examples/getting-started.ts
-```
-
-```typescript
-import { Agent, AnthropicProvider, LocalSandbox } from '@anthropic/kode-sdk';
-
-const agent = await Agent.create({
-  agentId: 'my-first-agent',
-  template: { systemPrompt: 'You are a helpful assistant.' },
-  deps: {
-    modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-    sandbox: new LocalSandbox({ workDir: './workspace' }),
-  },
-});
-
-// Subscribe to events
-agent.subscribeProgress({ kinds: ['text_chunk'] }, (event) => {
-  process.stdout.write(event.text);
-});
-
-// Chat with the agent
-await agent.chat('Hello! What can you help me with?');
+./quickstart.sh
 ```
 
----
-
-## Core Concepts
+Or install as a dependency:
 
-### 1. Three-Channel Event System
-
-```
-+-------------+     +-------------+     +-------------+
-|  Progress   |     |   Control   |     |   Monitor   |
-+-------------+     +-------------+     +-------------+
-| text_chunk  |     | permission  |     | tool_audit  |
-| tool:start  |     | _required   |     | state_change|
-| tool:complete|    | approval    |     | token_usage |
-| done        |     | _response   |     | error       |
-+-------------+     +-------------+     +-------------+
-      |                   |                   |
-      v                   v                   v
-   Your UI         Approval Service     Observability
+```bash
+npm install @shareai-lab/kode-sdk
 ```
 
-### 2. Crash Recovery & Breakpoints
+Set environment variables:
 
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-...
+export ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514  # optional, default: claude-sonnet-4-20250514
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # optional, default: https://api.anthropic.com
 ```
-Agent Execution Flow:
-
-  READY -> PRE_MODEL -> STREAMING -> TOOL_PENDING -> PRE_TOOL -> EXECUTING -> POST_TOOL
-    |         |            |             |              |           |           |
-    +-------- WAL Protected State -------+-- Approval --+---- Tool Execution ---+
 
-On crash: Resume from last safe breakpoint, auto-seal incomplete tool calls
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-..."
+$env:ANTHROPIC_MODEL_ID="claude-sonnet-4-20250514"  # optional, default: claude-sonnet-4-20250514
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # optional, default: https://api.anthropic.com
 ```
+<!-- tabs:end -->
 
-### 3. Fork & Trajectory Exploration
+Minimal example:
 
 ```typescript
-// Create a snapshot at current state
-const snapshotId = await agent.snapshot('before-decision');
+import { Agent, AnthropicProvider, JSONStore } from '@shareai-lab/kode-sdk';
 
-// Fork to explore different paths
-const explorerA = await agent.fork(snapshotId);
-const explorerB = await agent.fork(snapshotId);
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID
+);
 
-await explorerA.chat('Try approach A');
-await explorerB.chat('Try approach B');
-```
+const agent = await Agent.create({
+  provider,
+  store: new JSONStore('./.kode'),
+  systemPrompt: 'You are a helpful assistant.'
+});
 
----
+// Subscribe to progress events
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+  if (envelope.event.type === 'done') break;
+}
 
-## Examples
+await agent.send('Hello!');
+```
 
-| Example | Description | Key Features |
-|---------|-------------|--------------|
-| `npm run example:getting-started` | Minimal chat loop | Progress stream, basic setup |
-| `npm run example:agent-inbox` | Event-driven inbox | Todo management, tool concurrency |
-| `npm run example:approval` | Approval workflow | Control channel, hooks, policies |
-| `npm run example:room` | Multi-agent collaboration | AgentPool, Room, Fork |
-| `npm run example:scheduler` | Long-running with reminders | Scheduler, step triggers |
-| `npm run example:nextjs` | Next.js API integration | Resume-or-create, SSE streaming |
+Run examples:
 
----
+```bash
+npm run example:getting-started    # Minimal chat
+npm run example:agent-inbox        # Event-driven inbox
+npm run example:approval           # Tool approval workflow
+npm run example:room               # Multi-agent collaboration
+```
 
 ## Architecture for Scale
 
@@ -209,76 +118,36 @@ For production deployments serving many users, we recommend the **Worker Microse
 3. **Store is shared** - Single source of truth for agent state
 4. **Queue decouples** - Request handling from agent execution
 
-See [docs/ARCHITECTURE.md](./docs/ARCHITECTURE.md) for detailed deployment guides.
-
----
-
-## Documentation
-
-| Document | Description |
-|----------|-------------|
-| [Architecture Guide](./docs/ARCHITECTURE.md) | Mental model, deployment patterns, scaling strategies |
-| [Quickstart](./docs/quickstart.md) | Step-by-step first agent |
-| [Events System](./docs/events.md) | Three-channel event model |
-| [API Reference](./docs/api.md) | Core types and interfaces |
-| [Playbooks](./docs/playbooks.md) | Common patterns and recipes |
-| [Deployment](./docs/DEPLOYMENT.md) | Production deployment guide |
-| [Roadmap](./docs/ROADMAP.md) | Future development plans |
-
-### Scenario Guides
-
-| Scenario | Guide |
-|----------|-------|
-| CLI Tools | [docs/scenarios/cli-tools.md](./docs/scenarios/cli-tools.md) |
-| Desktop Apps | [docs/scenarios/desktop-apps.md](./docs/scenarios/desktop-apps.md) |
-| IDE Plugins | [docs/scenarios/ide-plugins.md](./docs/scenarios/ide-plugins.md) |
-| Web Backend | [docs/scenarios/web-backend.md](./docs/scenarios/web-backend.md) |
-| Large-scale ToC | [docs/scenarios/large-scale-toc.md](./docs/scenarios/large-scale-toc.md) |
-
----
+See [docs/en/guides/architecture.md](./docs/en/guides/architecture.md) for detailed deployment guides.
 
 ## Supported Providers
 
-| Provider | Streaming | Tool Calling | Thinking/Reasoning |
-|----------|-----------|--------------|-------------------|
-| **Anthropic** | SSE | Native | Extended Thinking |
-| **OpenAI** | SSE | Function Calling | o1/o3 reasoning |
-| **Gemini** | SSE | Function Calling | thinkingLevel |
-| **DeepSeek** | SSE | OpenAI-compatible | reasoning_content |
-| **Qwen** | SSE | OpenAI-compatible | thinking_budget |
-| **Groq/Cerebras** | SSE | OpenAI-compatible | - |
-
----
-
-## Roadmap
+| Provider | Streaming | Tools | Reasoning | Files |
+|----------|-----------|-------|-----------|-------|
+| Anthropic | ✅ | ✅ | ✅ Extended Thinking | ✅ |
+| OpenAI | ✅ | ✅ | ✅ | ✅ |
+| Gemini | ✅ | ✅ | ✅ | ✅ |
 
-### v2.8 - Storage Foundation
-- PostgresStore with connection pooling
-- Distributed locking (Advisory Lock)
-- Graceful shutdown support
+> **Note**: OpenAI-compatible services (DeepSeek, GLM, Qwen, Minimax, OpenRouter, etc.) can be used via `OpenAIProvider` with custom `baseURL` configuration. See [Providers Guide](./docs/en/guides/providers.md) for details.
 
-### v3.0 - Performance
-- Incremental message storage (append-only)
-- Copy-on-Write fork optimization
-- Event sampling and aggregation
-
-### v3.5 - Distributed
-- Agent Scheduler with LRU caching
-- Distributed EventBus (Redis Pub/Sub)
-- Worker mode helpers
-
-See [docs/ROADMAP.md](./docs/ROADMAP.md) for the complete roadmap.
-
----
-
-## Contributing
+## Documentation
 
-We welcome contributions! Please see [CONTRIBUTING.md](./CONTRIBUTING.md) for guidelines.
+| Section | Description |
+|---------|-------------|
+| **Getting Started** | |
+| [Installation](./docs/en/getting-started/installation.md) | Setup and configuration |
+| [Quickstart](./docs/en/getting-started/quickstart.md) | Build your first Agent |
+| [Concepts](./docs/en/getting-started/concepts.md) | Core concepts explained |
+| **Guides** | |
+| [Events](./docs/en/guides/events.md) | Three-channel event system |
+| [Tools](./docs/en/guides/tools.md) | Built-in tools & custom tools |
+| [Providers](./docs/en/guides/providers.md) | Model provider configuration |
+| [Database](./docs/en/guides/database.md) | SQLite/PostgreSQL persistence |
+| [Resume & Fork](./docs/en/guides/resume-fork.md) | Crash recovery & branching |
+| **Reference** | |
+| [API Reference](./docs/en/reference/api.md) | Complete API documentation |
+| [Examples](./docs/en/examples/playbooks.md) | All examples explained |
 
 ## License
 
-MIT License - see [LICENSE](./LICENSE) for details.
-
----
-
-**KODE SDK** - *The runtime kernel that lets you build agents that persist, recover, and explore.*
+MIT
diff --git a/README.zh-CN.md b/README.zh-CN.md
new file mode 100644
index 0000000..0a5a870
--- /dev/null
+++ b/README.zh-CN.md
@@ -0,0 +1,113 @@
+# KODE SDK
+
+[English](./README.md) | [中文](./README.zh-CN.md)
+
+> 事件驱动的长时运行 AI Agent 框架，支持企业级持久化和多 Agent 协作。
+
+## 核心特性
+
+- **事件驱动架构** - 三通道系统 (Progress/Control/Monitor) 清晰分离关注点
+- **长时运行与恢复** - 七段断点机制，支持 Safe-Fork-Point 崩溃恢复
+- **多 Agent 协作** - AgentPool、Room 消息、任务委派
+- **企业级持久化** - 支持 SQLite/PostgreSQL，统一 WAL 日志
+- **可扩展生态** - MCP 工具、自定义 Provider、Skills 系统
+
+## 快速开始
+
+**一键启动**（安装依赖并构建）：
+
+```bash
+./quickstart.sh
+```
+
+或作为依赖安装：
+
+```bash
+npm install @shareai-lab/kode-sdk
+```
+
+设置环境变量：
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-...
+export ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514  # 可选，默认: claude-sonnet-4-20250514
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # 可选，默认: https://api.anthropic.com
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-..."
+$env:ANTHROPIC_MODEL_ID="claude-sonnet-4-20250514"  # 可选，默认: claude-sonnet-4-20250514
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # 可选，默认: https://api.anthropic.com
+```
+<!-- tabs:end -->
+
+最简示例：
+
+```typescript
+import { Agent, AnthropicProvider, JSONStore } from '@shareai-lab/kode-sdk';
+
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID
+);
+
+const agent = await Agent.create({
+  provider,
+  store: new JSONStore('./.kode'),
+  systemPrompt: '你是一个乐于助人的助手。'
+});
+
+// 订阅 progress 事件
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+  if (envelope.event.type === 'done') break;
+}
+
+await agent.send('你好！');
+```
+
+运行示例：
+
+```bash
+npm run example:getting-started    # 最简对话
+npm run example:agent-inbox        # 事件驱动收件箱
+npm run example:approval           # 工具审批流程
+npm run example:room               # 多Agent协作
+```
+
+## 支持的 Provider
+
+| Provider | 流式输出 | 工具调用 | 推理 | 文件 |
+|----------|---------|---------|------|------|
+| Anthropic | ✅ | ✅ | ✅ Extended Thinking | ✅ |
+| OpenAI | ✅ | ✅ | ✅ | ✅ |
+| Gemini | ✅ | ✅ | ✅ | ✅ |
+
+> **说明**：OpenAI 兼容的服务（DeepSeek、GLM、Qwen、Minimax、OpenRouter 等）可以通过 `OpenAIProvider` 配置自定义 `baseURL` 来使用。详见 [Provider 配置指南](./docs/zh-CN/guides/providers.md)。
+
+## 文档
+
+| 章节 | 说明 |
+|------|------|
+| **入门指南** | |
+| [安装配置](./docs/zh-CN/getting-started/installation.md) | 环境配置与安装 |
+| [快速上手](./docs/zh-CN/getting-started/quickstart.md) | 创建第一个 Agent |
+| [核心概念](./docs/zh-CN/getting-started/concepts.md) | 核心概念详解 |
+| **使用指南** | |
+| [事件系统](./docs/zh-CN/guides/events.md) | 三通道事件系统 |
+| [工具系统](./docs/zh-CN/guides/tools.md) | 内置工具与自定义工具 |
+| [Provider 配置](./docs/zh-CN/guides/providers.md) | 模型 Provider 配置 |
+| [数据库存储](./docs/zh-CN/guides/database.md) | SQLite/PostgreSQL 持久化 |
+| [恢复与分叉](./docs/zh-CN/guides/resume-fork.md) | 崩溃恢复与分支 |
+| **参考** | |
+| [API 参考](./docs/zh-CN/reference/api.md) | 完整 API 文档 |
+| [示例集](./docs/zh-CN/examples/playbooks.md) | 所有示例详解 |
+
+## 许可证
+
+MIT
diff --git a/docs/ROADMAP.md b/ROADMAP.md
similarity index 99%
rename from docs/ROADMAP.md
rename to ROADMAP.md
index dd23b70..d726cc6 100644
--- a/docs/ROADMAP.md
+++ b/ROADMAP.md
@@ -177,8 +177,7 @@ const pool = new DistributedPool({
 
 // Agent automatically migrates between workers
 const agent = await pool.acquire(agentId);
-await agent.send(message);
-await agent.complete();
+const result = await agent.complete(message);
 await pool.release(agentId);
 ```
 
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
deleted file mode 100644
index b4fd278..0000000
--- a/docs/ARCHITECTURE.md
+++ /dev/null
@@ -1,572 +0,0 @@
-# KODE SDK Architecture Guide
-
-> Deep dive into the mental model, design decisions, and deployment patterns for KODE SDK.
-
----
-
-## Table of Contents
-
-1. [Mental Model](#mental-model)
-2. [Core Architecture](#core-architecture)
-3. [Runtime Characteristics](#runtime-characteristics)
-4. [Deployment Patterns](#deployment-patterns)
-5. [Scaling Strategies](#scaling-strategies)
-6. [Decision Framework](#decision-framework)
-
----
-
-## Mental Model
-
-### What KODE SDK Is
-
-```
-Think of KODE SDK like:
-
-+------------------+     +------------------+     +------------------+
-|       V8         |     |     SQLite       |     |    KODE SDK      |
-|  JS Runtime      |     |  Database Engine |     |  Agent Runtime   |
-+------------------+     +------------------+     +------------------+
-        |                        |                        |
-        v                        v                        v
-+------------------+     +------------------+     +------------------+
-|    Express.js    |     |     Prisma       |     |   Your App       |
-|  Web Framework   |     |       ORM        |     | (CLI/Desktop/Web)|
-+------------------+     +------------------+     +------------------+
-        |                        |                        |
-        v                        v                        v
-+------------------+     +------------------+     +------------------+
-|      Vercel      |     |   PlanetScale    |     |   Your Infra     |
-|  Cloud Platform  |     |  Cloud Database  |     | (K8s/EC2/Local)  |
-+------------------+     +------------------+     +------------------+
-```
-
-**KODE SDK is an engine, not a platform.**
-
-It provides:
-- Agent lifecycle management (create, run, pause, resume, fork)
-- State persistence (via pluggable Store interface)
-- Tool execution and permission governance
-- Event streams for observability
-
-It does NOT provide:
-- HTTP routing or API framework
-- User authentication or authorization
-- Multi-tenancy or resource isolation
-- Horizontal scaling or load balancing
-
-### The Single Responsibility
-
-```
-                     KODE SDK's Job
-                           |
-                           v
-    +----------------------------------------------+
-    |                                              |
-    |   "Keep this agent running, recover from    |
-    |    crashes, let it fork, and tell me        |
-    |    what's happening via events."            |
-    |                                              |
-    +----------------------------------------------+
-                           |
-                           v
-                     Your App's Job
-                           |
-                           v
-    +----------------------------------------------+
-    |                                              |
-    |   "Handle users, route requests, manage     |
-    |    permissions, scale infrastructure,       |
-    |    and integrate with my systems."          |
-    |                                              |
-    +----------------------------------------------+
-```
-
----
-
-## Core Architecture
-
-### Component Overview
-
-```
-+------------------------------------------------------------------+
-|                         Agent Instance                            |
-+------------------------------------------------------------------+
-|                                                                   |
-|  +------------------+  +------------------+  +------------------+ |
-|  |  MessageQueue    |  | ContextManager   |  |   ToolRunner     | |
-|  |  (User inputs)   |  | (Token mgmt)     |  | (Parallel exec)  | |
-|  +--------+---------+  +--------+---------+  +--------+---------+ |
-|           |                     |                     |           |
-|           +---------------------+---------------------+           |
-|                                 |                                 |
-|                    +------------v------------+                    |
-|                    |      BreakpointManager  |                    |
-|                    |   (7-stage state track) |                    |
-|                    +------------+------------+                    |
-|                                 |                                 |
-|  +------------------+  +--------v---------+  +------------------+ |
-|  | PermissionManager|  |     EventBus     |  |   TodoManager    | |
-|  | (Approval flow)  |  | (3-channel emit) |  | (Task tracking)  | |
-|  +------------------+  +------------------+  +------------------+ |
-|                                                                   |
-+----------------------------------+--------------------------------+
-                                   |
-                    +--------------+--------------+
-                    |              |              |
-           +--------v------+ +----v----+ +-------v-------+
-           |     Store     | | Sandbox | | ModelProvider |
-           | (Persistence) | | (Exec)  | | (LLM calls)   |
-           +---------------+ +---------+ +---------------+
-```
-
-### Data Flow
-
-```
-User Message
-     |
-     v
-+----+----+     +-----------+     +------------+
-| Message |---->|  Context  |---->|   Model    |
-|  Queue  |     |  Manager  |     |  Provider  |
-+---------+     +-----------+     +-----+------+
-                                        |
-                              +---------+---------+
-                              |                   |
-                         Text Response      Tool Calls
-                              |                   |
-                              v                   v
-                    +---------+------+    +------+-------+
-                    |    EventBus    |    |  ToolRunner  |
-                    | (text_chunk)   |    | (parallel)   |
-                    +----------------+    +------+-------+
-                                                 |
-                              +------------------+------------------+
-                              |                  |                  |
-                         Permission         Execution          Result
-                           Check              (Sandbox)        Handling
-                              |                  |                  |
-                              v                  v                  v
-                    +--------------------+  +---------+  +------------------+
-                    | PermissionManager  |  | Sandbox |  |    EventBus      |
-                    | (Control channel)  |  | (exec)  |  | (tool:complete)  |
-                    +--------------------+  +---------+  +------------------+
-```
-
-### State Persistence (WAL)
-
-```
-Every State Change
-        |
-        v
-+-------+-------+
-|  Write-Ahead  |
-|     Log       |  <-- Write first (fast, append-only)
-+-------+-------+
-        |
-        v
-+-------+-------+
-|   Main File   |  <-- Then update (can be slow)
-+-------+-------+
-        |
-        v
-+-------+-------+
-|  Delete WAL   |  <-- Finally cleanup
-+-------+-------+
-
-On Crash Recovery:
-1. Scan for WAL files
-2. If WAL exists but main file incomplete -> Restore from WAL
-3. Delete WAL after successful restore
-```
-
----
-
-## Runtime Characteristics
-
-### Memory Model
-
-```
-Agent Memory Footprint (Typical):
-
-+---------------------------+
-|     Agent Instance        |
-+---------------------------+
-| messages[]: 10KB - 2MB    |  <-- Grows with conversation
-| toolRecords: 1KB - 100KB  |  <-- Grows with tool usage
-| eventTimeline: 5KB - 500KB|  <-- Recent events cached
-| mediaCache: 0 - 10MB      |  <-- If images/files involved
-| baseObjects: ~50KB        |  <-- Fixed overhead
-+---------------------------+
-
-Typical range: 100KB - 5MB per agent
-AgentPool (50 agents): 5MB - 250MB
-```
-
-### I/O Patterns
-
-```
-Per Agent Step:
-
-+-------------------+     +-------------------+     +-------------------+
-| persistMessages() |     | persistToolRecs() |     | emitEvents()      |
-| ~20-50ms (SSD)    |     | ~5-10ms           |     | ~1-5ms (buffered) |
-+-------------------+     +-------------------+     +-------------------+
-
-Total per step: 30-70ms I/O overhead
-
-At Scale (100 concurrent agents):
-- Sequential bottleneck in JSONStore
-- Need distributed Store for parallel writes
-```
-
-### Event Loop Impact
-
-```
-Agent Processing:
-
-   +---------+
-   |  IDLE   |  <-- Agent waiting for input
-   +----+----+
-        |
-   +----v----+
-   | PROCESS |  <-- Model call (async, non-blocking)
-   +----+----+
-        |
-   +----v----+
-   |  TOOL   |  <-- Tool execution (may block if sync)
-   +----+----+
-        |
-   +----v----+
-   | PERSIST |  <-- File I/O (async)
-   +----+----+
-        |
-        v
-   +---------+
-   |  IDLE   |
-   +---------+
-
-Key: All heavy operations are async
-Risk: Sync operations in custom tools can block event loop
-```
-
----
-
-## Deployment Patterns
-
-### Pattern 1: Single Process (CLI/Desktop)
-
-```
-+------------------------------------------------------------------+
-|                        Your Application                           |
-+------------------------------------------------------------------+
-|                                                                   |
-|   +------------------+                                            |
-|   |   KODE SDK       |                                            |
-|   |   +----------+   |                                            |
-|   |   | Agent(s) |   |                                            |
-|   |   +----------+   |                                            |
-|   |   | JSONStore|   |  --> Local filesystem                      |
-|   |   +----------+   |                                            |
-|   +------------------+                                            |
-|                                                                   |
-+------------------------------------------------------------------+
-
-Best for: CLI tools, Electron apps, VSCode extensions
-Agents: 1-50 concurrent
-Users: Single user
-Persistence: Local files
-```
-
-### Pattern 2: Single Server (Self-hosted)
-
-```
-+------------------------------------------------------------------+
-|                          Server                                   |
-+------------------------------------------------------------------+
-|                                                                   |
-|   +------------------+     +------------------+                   |
-|   |   HTTP Layer     |     |   KODE SDK       |                   |
-|   |   (Express/etc)  |---->|   AgentPool      |                   |
-|   +------------------+     +------------------+                   |
-|                                   |                               |
-|                            +------v------+                        |
-|                            |  JSONStore  |  --> Local filesystem  |
-|                            +-------------+                        |
-|                                                                   |
-+------------------------------------------------------------------+
-
-Best for: Internal tools, small teams, prototypes
-Agents: 10-100 concurrent
-Users: <100 concurrent
-Persistence: Local files (can use Redis/Postgres)
-```
-
-### Pattern 3: Worker Microservice (Scalable)
-
-```
-+------------------------------------------------------------------+
-|                         Load Balancer                             |
-+----------------------------------+--------------------------------+
-                                   |
-         +-------------------------+-------------------------+
-         |                         |                         |
-+--------v--------+     +----------v--------+     +----------v------+
-|   API Server 1  |     |   API Server 2    |     |   API Server N  |
-|   (Stateless)   |     |   (Stateless)     |     |   (Stateless)   |
-+--------+--------+     +----------+--------+     +----------+------+
-         |                         |                         |
-         +-------------------------+-------------------------+
-                                   |
-                          +--------v--------+
-                          |  Message Queue  |
-                          |  (Redis/SQS)    |
-                          +--------+--------+
-                                   |
-         +-------------------------+-------------------------+
-         |                         |                         |
-+--------v--------+     +----------v--------+     +----------v------+
-|   Worker 1      |     |   Worker 2        |     |   Worker N      |
-|   +----------+  |     |   +----------+    |     |   +----------+  |
-|   | KODE SDK |  |     |   | KODE SDK |    |     |   | KODE SDK |  |
-|   | AgentPool|  |     |   | AgentPool|    |     |   | AgentPool|  |
-|   +----------+  |     |   +----------+    |     |   +----------+  |
-+--------+--------+     +----------+--------+     +----------+------+
-         |                         |                         |
-         +-------------------------+-------------------------+
-                                   |
-                          +--------v--------+
-                          | Distributed     |
-                          | Store           |
-                          | (PostgreSQL)    |
-                          +-----------------+
-
-Best for: Production ToC apps, SaaS platforms
-Agents: 1000+ concurrent
-Users: 10K+ concurrent
-Persistence: PostgreSQL/Redis with distributed locks
-```
-
-### Pattern 4: Hybrid Serverless (API + Workers)
-
-```
-+------------------------------------------------------------------+
-|                    Serverless Platform (Vercel)                   |
-+------------------------------------------------------------------+
-|                                                                   |
-|   +------------------+                                            |
-|   |  /api/chat       |  --> Validate, enqueue, return task ID    |
-|   +------------------+                                            |
-|   |  /api/status     |  --> Check task status from DB            |
-|   +------------------+                                            |
-|   |  /api/stream     |  --> SSE from Redis Pub/Sub               |
-|   +------------------+                                            |
-|                                                                   |
-+----------------------------------+--------------------------------+
-                                   |
-                          +--------v--------+
-                          |  Message Queue  |
-                          |  (Upstash Redis)|
-                          +--------+--------+
-                                   |
-+----------------------------------v--------------------------------+
-|                    Worker Platform (Railway/Render)               |
-+------------------------------------------------------------------+
-|                                                                   |
-|   +------------------+                                            |
-|   |   Worker Pool    |                                            |
-|   |   +----------+   |                                            |
-|   |   | KODE SDK |   |                                            |
-|   |   | Agents   |   |                                            |
-|   |   +----------+   |                                            |
-|   +------------------+                                            |
-|                                                                   |
-+------------------------------------------------------------------+
-
-Best for: Serverless frontend + stateful backend
-API: Serverless (fast, scalable, cheap)
-Agents: Long-running workers (Railway, Render, Fly.io)
-```
-
----
-
-## Scaling Strategies
-
-### Strategy 1: Vertical Scaling (Single Node)
-
-```
-Applicable: Up to ~100 concurrent agents
-
-Optimizations:
-1. Increase AgentPool maxAgents
-2. Use Redis for Store (faster than files)
-3. Add memory (agents are memory-bound)
-4. Use SSD for persistence
-
-const pool = new AgentPool({
-  maxAgents: 100,  // Increase from default 50
-  store: new RedisStore({ ... }),
-});
-```
-
-### Strategy 2: Agent Sharding (Multi-Node)
-
-```
-Applicable: 100-1000 concurrent agents
-
-Architecture:
-- Hash agentId to determine which worker handles it
-- Consistent hashing for minimal reshuffling
-- Each worker owns a shard of agents
-
-                    agentId: "user-123-agent-456"
-                              |
-                              v
-                    hash(agentId) % N = worker_index
-                              |
-              +---------------+---------------+
-              |               |               |
-         Worker 0        Worker 1        Worker 2
-        (agents 0-33)   (agents 34-66)  (agents 67-99)
-```
-
-### Strategy 3: Agent Scheduling (LRU)
-
-```
-Applicable: 1000+ total agents, limited active
-
-Concept:
-- Not all agents are active simultaneously
-- Keep hot agents in memory
-- Hibernate cold agents to storage
-- Resume on demand
-
-class AgentScheduler {
-  private active: LRUCache<string, Agent>;  // In memory
-  private hibernated: Set<string>;           // In storage
-
-  async get(agentId: string): Promise<Agent> {
-    // Check active cache
-    if (this.active.has(agentId)) {
-      return this.active.get(agentId);
-    }
-
-    // Resume from storage
-    const agent = await Agent.resume(agentId, config, deps);
-    this.active.set(agentId, agent);
-
-    // LRU eviction handles hibernation
-    return agent;
-  }
-}
-```
-
-### Strategy 4: Fork Optimization (COW)
-
-```
-Applicable: Heavy fork usage (exploration scenarios)
-
-Current: O(n) deep copy of messages
-Optimized: O(1) copy-on-write
-
-Before:
-  fork() {
-    const forked = JSON.parse(JSON.stringify(messages));  // O(n)
-  }
-
-After:
-  fork() {
-    const forkedHead = currentHead;  // O(1) pointer copy
-    // Messages are immutable, share until modified
-  }
-```
-
----
-
-## Decision Framework
-
-### When to Use KODE SDK Directly
-
-```
-+------------------+
-|  Decision Tree   |
-+------------------+
-         |
-         v
-+------------------+
-| Single user/     |----YES---> Use directly (Pattern 1)
-| local machine?   |
-+--------+---------+
-         | NO
-         v
-+------------------+
-| < 100 concurrent |----YES---> Single server (Pattern 2)
-| users?           |
-+--------+---------+
-         | NO
-         v
-+------------------+
-| Can run long-    |----YES---> Worker microservice (Pattern 3)
-| running processes?|
-+--------+---------+
-         | NO
-         v
-+------------------+
-| Serverless only? |----YES---> Hybrid pattern (Pattern 4)
-+--------+---------+
-         | NO
-         v
-+------------------+
-| Consider other   |
-| solutions        |
-+------------------+
-```
-
-### Platform Compatibility Matrix
-
-| Platform | Compatible | Notes |
-|----------|------------|-------|
-| Node.js | 100% | Primary target |
-| Bun | 95% | Minor adjustments needed |
-| Deno | 80% | Permission flags required |
-| Electron | 90% | Use in main process |
-| VSCode Extension | 85% | workspace.fs integration |
-| Vercel Functions | 20% | API layer only, not agents |
-| Cloudflare Workers | 5% | Not compatible |
-| Browser | 10% | No fs/process, very limited |
-
-### Store Selection Guide
-
-| Store | Use Case | Throughput | Scaling |
-|-------|----------|------------|---------|
-| JSONStore | Development, CLI | Low | Single node |
-| SQLiteStore | Desktop apps | Medium | Single node |
-| RedisStore | Small-medium production | High | Single node |
-| PostgresStore | Production, multi-node | High | Multi-node |
-
----
-
-## Summary
-
-### Core Principles
-
-1. **KODE SDK is a runtime kernel** - It manages agent lifecycle, not application infrastructure
-
-2. **Agents are stateful** - They need persistent storage and long-running processes
-
-3. **Scale through architecture** - Use worker patterns for large-scale deployments
-
-4. **Store is pluggable** - Implement custom Store for your infrastructure
-
-### Quick Reference
-
-| Scenario | Pattern | Store | Scale |
-|----------|---------|-------|-------|
-| CLI tool | Single Process | JSONStore | 1 user |
-| Desktop app | Single Process | SQLiteStore | 1 user |
-| Internal tool | Single Server | RedisStore | ~100 users |
-| SaaS product | Worker Microservice | PostgresStore | 10K+ users |
-| Serverless app | Hybrid | External DB | Varies |
-
----
-
-*Next: See [Deployment Guide](./DEPLOYMENT.md) for implementation details.*
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
deleted file mode 100644
index 4a03a1f..0000000
--- a/docs/DEPLOYMENT.md
+++ /dev/null
@@ -1,638 +0,0 @@
-# Deployment Scenarios & Architecture Patterns
-
-This document covers deployment patterns for KODE SDK across different use cases, from CLI tools to production backends.
-
----
-
-## Scenario Overview
-
-| Scenario | Complexity | Store | Scalability | Example |
-|----------|-----------|-------|-------------|---------|
-| CLI Tool | Low | JSONStore | Single user | Claude Code |
-| Desktop App | Low | JSONStore | Single user | ChatGPT Desktop |
-| IDE Plugin | Low | JSONStore | Single user | Cursor |
-| Self-hosted Server | Medium | JSONStore/Custom | ~100 concurrent | Internal tool |
-| Production Backend | High | PostgreSQL/Redis | 1000+ concurrent | SaaS product |
-| Serverless | High | External DB | Auto-scaling | API service |
-
----
-
-## Scenario 1: CLI Tool
-
-**Characteristics:**
-- Single user, single process
-- Local file system available
-- Long-running process
-- No external dependencies needed
-
-**Architecture:**
-```
-┌─────────────────────────────┐
-│         Terminal            │
-│  ┌───────────────────────┐  │
-│  │      CLI App          │  │
-│  │  ┌─────────────────┐  │  │
-│  │  │   KODE SDK      │  │  │
-│  │  │  ┌───────────┐  │  │  │
-│  │  │  │ JSONStore │  │  │  │
-│  │  │  └─────┬─────┘  │  │  │
-│  │  └────────┼────────┘  │  │
-│  └───────────┼───────────┘  │
-└──────────────┼──────────────┘
-               │
-        ┌──────▼──────┐
-        │ Local Files │
-        │ ~/.my-agent │
-        └─────────────┘
-```
-
-**Implementation:**
-```typescript
-import { Agent, AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
-
-const store = new JSONStore(path.join(os.homedir(), '.my-agent'));
-const pool = new AgentPool({
-  dependencies: { store, templateRegistry, sandboxFactory, toolRegistry }
-});
-
-// Resume or create
-const agent = await pool.get('main')
-  ?? await pool.create('main', { templateId: 'cli-assistant' });
-
-// Interactive loop
-const rl = readline.createInterface({ input: stdin, output: stdout });
-for await (const line of rl) {
-  await agent.send(line);
-  for await (const event of agent.subscribeProgress()) {
-    if (event.type === 'text_chunk') process.stdout.write(event.delta);
-  }
-}
-```
-
-**Best for:** Developer tools, automation scripts, personal assistants.
-
----
-
-## Scenario 2: Desktop App (Electron)
-
-**Characteristics:**
-- Single user
-- Full file system access
-- Can run background processes
-- May need multiple agents
-
-**Architecture:**
-```
-┌────────────────────────────────────────────┐
-│              Electron App                  │
-│  ┌──────────────────────────────────────┐  │
-│  │           Renderer Process           │  │
-│  │  ┌──────────────────────────────┐    │  │
-│  │  │            React UI          │    │  │
-│  │  └──────────────┬───────────────┘    │  │
-│  └─────────────────┼────────────────────┘  │
-│                    │ IPC                    │
-│  ┌─────────────────▼────────────────────┐  │
-│  │            Main Process              │  │
-│  │  ┌──────────────────────────────┐    │  │
-│  │  │         AgentPool            │    │  │
-│  │  │  ┌────────┐ ┌────────┐      │    │  │
-│  │  │  │Agent 1 │ │Agent 2 │ ...  │    │  │
-│  │  │  └────────┘ └────────┘      │    │  │
-│  │  └──────────────────────────────┘    │  │
-│  │  ┌──────────────────────────────┐    │  │
-│  │  │         JSONStore            │    │  │
-│  │  └──────────────┬───────────────┘    │  │
-│  └─────────────────┼────────────────────┘  │
-└────────────────────┼────────────────────────┘
-                     │
-              ┌──────▼──────┐
-              │  userData   │
-              │   folder    │
-              └─────────────┘
-```
-
-**Implementation:**
-```typescript
-// main.ts (Main Process)
-import { AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
-import { app, ipcMain } from 'electron';
-
-const store = new JSONStore(path.join(app.getPath('userData'), 'agents'));
-const pool = new AgentPool({ dependencies: { store, ... } });
-
-ipcMain.handle('agent:send', async (event, { agentId, message }) => {
-  const agent = pool.get(agentId) ?? await pool.create(agentId, config);
-  await agent.send(message);
-  return agent.complete();
-});
-
-ipcMain.on('agent:subscribe', (event, { agentId }) => {
-  const agent = pool.get(agentId);
-  if (!agent) return;
-
-  (async () => {
-    for await (const ev of agent.subscribeProgress()) {
-      event.sender.send(`agent:event:${agentId}`, ev);
-    }
-  })();
-});
-```
-
-**Best for:** Chat applications, productivity tools, AI assistants.
-
----
-
-## Scenario 3: Self-hosted Server (Single Node)
-
-**Characteristics:**
-- Multiple users
-- Persistent server process
-- Can use local storage
-- Moderate concurrency (<100 users)
-
-**Architecture:**
-```
-┌──────────────────────────────────────────────────┐
-│                   Node.js Server                 │
-│  ┌────────────────────────────────────────────┐  │
-│  │              Express/Hono                  │  │
-│  │  ┌──────────────────────────────────────┐  │  │
-│  │  │  /api/agents/:id/message  (POST)     │  │  │
-│  │  │  /api/agents/:id/events   (SSE)      │  │  │
-│  │  └──────────────────────────────────────┘  │  │
-│  └────────────────────┬───────────────────────┘  │
-│                       │                          │
-│  ┌────────────────────▼───────────────────────┐  │
-│  │               AgentPool (50)               │  │
-│  │  ┌────┐ ┌────┐ ┌────┐ ┌────┐ ┌────┐      │  │
-│  │  │ A1 │ │ A2 │ │ A3 │ │... │ │A50 │      │  │
-│  │  └────┘ └────┘ └────┘ └────┘ └────┘      │  │
-│  └────────────────────┬───────────────────────┘  │
-│                       │                          │
-│  ┌────────────────────▼───────────────────────┐  │
-│  │              JSONStore                     │  │
-│  └────────────────────┬───────────────────────┘  │
-└───────────────────────┼──────────────────────────┘
-                        │
-                 ┌──────▼──────┐
-                 │  /data/     │
-                 │   agents    │
-                 └─────────────┘
-```
-
-**Implementation:**
-```typescript
-import { Hono } from 'hono';
-import { streamSSE } from 'hono/streaming';
-import { AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
-
-const app = new Hono();
-const store = new JSONStore('/data/agents');
-const pool = new AgentPool({ dependencies: { store, ... }, maxAgents: 50 });
-
-// Send message
-app.post('/api/agents/:id/message', async (c) => {
-  const { id } = c.req.param();
-  const { message } = await c.req.json();
-
-  let agent = pool.get(id);
-  if (!agent) {
-    const exists = await store.exists(id);
-    agent = exists
-      ? await pool.resume(id, getConfig())
-      : await pool.create(id, getConfig());
-  }
-
-  await agent.send(message);
-  const result = await agent.complete();
-  return c.json(result);
-});
-
-// SSE events
-app.get('/api/agents/:id/events', async (c) => {
-  const { id } = c.req.param();
-  const agent = pool.get(id);
-  if (!agent) return c.json({ error: 'Agent not found' }, 404);
-
-  return streamSSE(c, async (stream) => {
-    for await (const event of agent.subscribeProgress()) {
-      await stream.writeSSE({ data: JSON.stringify(event) });
-    }
-  });
-});
-
-export default app;
-```
-
-**Scaling Limit:** ~50-100 concurrent agents per process. Beyond this, consider worker architecture.
-
----
-
-## Scenario 4: Production Backend (Multi-node)
-
-**Characteristics:**
-- High concurrency (1000+ users)
-- Multiple server instances
-- Database-backed persistence
-- Queue-based processing
-
-**Architecture:**
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                        Load Balancer                            │
-└────────────────────────────┬────────────────────────────────────┘
-                             │
-         ┌───────────────────┼───────────────────┐
-         │                   │                   │
-┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
-│   API Server 1  │ │   API Server 2  │ │   API Server N  │
-│   (Stateless)   │ │   (Stateless)   │ │   (Stateless)   │
-└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
-         │                   │                   │
-         └───────────────────┼───────────────────┘
-                             │
-                    ┌────────▼────────┐
-                    │   Job Queue     │
-                    │   (BullMQ)      │
-                    └────────┬────────┘
-                             │
-         ┌───────────────────┼───────────────────┐
-         │                   │                   │
-┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
-│   Worker 1      │ │   Worker 2      │ │   Worker N      │
-│  AgentPool(50)  │ │  AgentPool(50)  │ │  AgentPool(50)  │
-└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
-         │                   │                   │
-         └───────────────────┼───────────────────┘
-                             │
-              ┌──────────────┼──────────────┐
-              │              │              │
-       ┌──────▼──────┐ ┌─────▼─────┐ ┌─────▼─────┐
-       │  PostgreSQL │ │   Redis   │ │    S3     │
-       │   (Store)   │ │  (Cache)  │ │  (Files)  │
-       └─────────────┘ └───────────┘ └───────────┘
-```
-
-**API Server Implementation:**
-```typescript
-// api/routes/agent.ts
-import { Queue } from 'bullmq';
-
-const queue = new Queue('agent-tasks', { connection: redis });
-
-app.post('/api/agents/:id/message', async (c) => {
-  const { id } = c.req.param();
-  const { message } = await c.req.json();
-
-  // Add job to queue
-  const job = await queue.add('process-message', {
-    agentId: id,
-    message,
-    userId: c.get('userId'),
-  });
-
-  return c.json({ jobId: job.id, status: 'queued' });
-});
-
-app.get('/api/agents/:id/events', async (c) => {
-  const { id } = c.req.param();
-
-  // Subscribe to Redis pub/sub
-  return streamSSE(c, async (stream) => {
-    const sub = redis.duplicate();
-    await sub.subscribe(`agent:${id}:events`);
-
-    sub.on('message', (channel, message) => {
-      stream.writeSSE({ data: message });
-    });
-  });
-});
-```
-
-**Worker Implementation:**
-```typescript
-// worker/index.ts
-import { Worker } from 'bullmq';
-import { AgentPool } from '@shareai-lab/kode-sdk';
-import { PostgresStore } from './postgres-store';
-
-const store = new PostgresStore(pgPool);
-const pool = new AgentPool({ dependencies: { store, ... }, maxAgents: 50 });
-
-const worker = new Worker('agent-tasks', async (job) => {
-  const { agentId, message } = job.data;
-
-  // Get or resume agent
-  let agent = pool.get(agentId);
-  if (!agent) {
-    const exists = await store.exists(agentId);
-    agent = exists
-      ? await pool.resume(agentId, getConfig(job.data))
-      : await pool.create(agentId, getConfig(job.data));
-  }
-
-  // Process
-  await agent.send(message);
-
-  // Stream events to Redis
-  for await (const event of agent.subscribeProgress()) {
-    await redis.publish(`agent:${agentId}:events`, JSON.stringify(event));
-
-    if (event.type === 'done') break;
-  }
-}, { connection: redis });
-
-// Periodic cleanup: hibernate idle agents
-setInterval(async () => {
-  for (const agentId of pool.list()) {
-    const agent = pool.get(agentId);
-    if (agent && agent.idleTime > 60_000) {
-      await agent.persistInfo();
-      pool.delete(agentId);
-    }
-  }
-}, 30_000);
-```
-
-**PostgreSQL Store Implementation:**
-```sql
--- Schema
-CREATE TABLE agents (
-  id TEXT PRIMARY KEY,
-  template_id TEXT NOT NULL,
-  created_at TIMESTAMPTZ DEFAULT NOW(),
-  updated_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE TABLE agent_messages (
-  agent_id TEXT PRIMARY KEY REFERENCES agents(id),
-  messages JSONB NOT NULL,
-  updated_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE TABLE agent_tool_records (
-  agent_id TEXT PRIMARY KEY REFERENCES agents(id),
-  records JSONB NOT NULL,
-  updated_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE TABLE agent_events (
-  id BIGSERIAL PRIMARY KEY,
-  agent_id TEXT REFERENCES agents(id),
-  channel TEXT NOT NULL,
-  event JSONB NOT NULL,
-  created_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE INDEX idx_agent_events_agent_channel ON agent_events(agent_id, channel, id);
-```
-
----
-
-## Scenario 5: Serverless (Vercel/Lambda)
-
-**Characteristics:**
-- Request-scoped execution
-- Cold starts
-- Execution time limits (10s-300s)
-- No local persistence
-
-**Challenges:**
-1. **No File System**: JSONStore won't work
-2. **Timeout**: Long agent tasks may exceed limits
-3. **Cold Start**: Must load state quickly
-4. **Stateless**: No in-memory agent pool
-
-**Architecture:**
-```
-┌──────────────────────────────────────────────────────────────┐
-│                     Vercel/Lambda                            │
-│  ┌────────────────────────────────────────────────────────┐  │
-│  │                   API Function                         │  │
-│  │                                                        │  │
-│  │  Request → Load Agent → Execute Step → Persist → Response  │
-│  │                                                        │  │
-│  └──────────────────────────┬─────────────────────────────┘  │
-└─────────────────────────────┼────────────────────────────────┘
-                              │
-               ┌──────────────┼──────────────┐
-               │              │              │
-        ┌──────▼──────┐ ┌─────▼─────┐ ┌─────▼─────┐
-        │  Supabase   │ │  Upstash  │ │  Inngest  │
-        │ (Postgres)  │ │  (Redis)  │ │  (Queue)  │
-        └─────────────┘ └───────────┘ └───────────┘
-```
-
-**Implementation:**
-```typescript
-// app/api/agent/[id]/route.ts
-import { Agent, AgentConfig } from '@shareai-lab/kode-sdk';
-import { SupabaseStore } from '@/lib/supabase-store';
-
-const store = new SupabaseStore(supabaseClient);
-
-export async function POST(req: Request, { params }: { params: { id: string } }) {
-  const { message } = await req.json();
-  const agentId = params.id;
-
-  // 1. Load or create agent
-  const exists = await store.exists(agentId);
-  const agent = exists
-    ? await Agent.resume(agentId, config, { store, ... })
-    : await Agent.create({ ...config, agentId }, { store, ... });
-
-  // 2. Send message
-  await agent.send(message);
-
-  // 3. Run with timeout (leave buffer for response)
-  const timeoutMs = 25_000; // Vercel Pro = 30s
-  let result;
-
-  try {
-    result = await Promise.race([
-      agent.complete(),
-      new Promise((_, reject) =>
-        setTimeout(() => reject(new Error('Timeout')), timeoutMs)
-      ),
-    ]);
-  } catch (e) {
-    if (e.message === 'Timeout') {
-      // Agent still processing, queue for background
-      await inngest.send('agent/continue', { agentId });
-      return Response.json({ status: 'processing', agentId });
-    }
-    throw e;
-  }
-
-  return Response.json({ status: 'done', output: result.text });
-}
-```
-
-**For Long-running Tasks:**
-Use a queue service like Inngest:
-
-```typescript
-// inngest/functions/agent-continue.ts
-import { inngest } from '@/lib/inngest';
-
-export const agentContinue = inngest.createFunction(
-  { id: 'agent-continue' },
-  { event: 'agent/continue' },
-  async ({ event, step }) => {
-    const { agentId } = event.data;
-
-    // Resume agent
-    const agent = await Agent.resume(agentId, config, { store, ... });
-
-    // Process until done (Inngest handles timeouts)
-    const result = await step.run('complete', async () => {
-      return agent.complete();
-    });
-
-    // Notify user via webhook/push
-    await step.run('notify', async () => {
-      await notifyUser(agentId, result);
-    });
-
-    return result;
-  }
-);
-```
-
----
-
-## Custom Store Implementations
-
-### PostgreSQL Store (Full Example)
-
-```typescript
-import { Store, Message, ToolCallRecord, Timeline, Bookmark, ... } from '@shareai-lab/kode-sdk';
-import { Pool } from 'pg';
-
-export class PostgresStore implements Store {
-  constructor(private pool: Pool) {}
-
-  // === Runtime State ===
-
-  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
-    await this.pool.query(`
-      INSERT INTO agent_messages (agent_id, messages, updated_at)
-      VALUES ($1, $2, NOW())
-      ON CONFLICT (agent_id) DO UPDATE SET messages = $2, updated_at = NOW()
-    `, [agentId, JSON.stringify(messages)]);
-  }
-
-  async loadMessages(agentId: string): Promise<Message[]> {
-    const { rows } = await this.pool.query(
-      'SELECT messages FROM agent_messages WHERE agent_id = $1',
-      [agentId]
-    );
-    return rows[0]?.messages || [];
-  }
-
-  async saveToolCallRecords(agentId: string, records: ToolCallRecord[]): Promise<void> {
-    await this.pool.query(`
-      INSERT INTO agent_tool_records (agent_id, records, updated_at)
-      VALUES ($1, $2, NOW())
-      ON CONFLICT (agent_id) DO UPDATE SET records = $2, updated_at = NOW()
-    `, [agentId, JSON.stringify(records)]);
-  }
-
-  async loadToolCallRecords(agentId: string): Promise<ToolCallRecord[]> {
-    const { rows } = await this.pool.query(
-      'SELECT records FROM agent_tool_records WHERE agent_id = $1',
-      [agentId]
-    );
-    return rows[0]?.records || [];
-  }
-
-  // === Events ===
-
-  async appendEvent(agentId: string, timeline: Timeline): Promise<void> {
-    await this.pool.query(`
-      INSERT INTO agent_events (agent_id, channel, event, created_at)
-      VALUES ($1, $2, $3, NOW())
-    `, [agentId, timeline.event.channel, JSON.stringify(timeline)]);
-  }
-
-  async *readEvents(agentId: string, opts?: { since?: Bookmark; channel?: string }): AsyncIterable<Timeline> {
-    const conditions = ['agent_id = $1'];
-    const params: any[] = [agentId];
-    let paramIndex = 2;
-
-    if (opts?.since) {
-      conditions.push(`id > $${paramIndex++}`);
-      params.push(opts.since.seq);
-    }
-    if (opts?.channel) {
-      conditions.push(`channel = $${paramIndex++}`);
-      params.push(opts.channel);
-    }
-
-    const { rows } = await this.pool.query(`
-      SELECT event FROM agent_events
-      WHERE ${conditions.join(' AND ')}
-      ORDER BY id ASC
-    `, params);
-
-    for (const row of rows) {
-      yield row.event;
-    }
-  }
-
-  // ... implement remaining methods (history, snapshots, metadata, lifecycle)
-
-  async exists(agentId: string): Promise<boolean> {
-    const { rows } = await this.pool.query(
-      'SELECT 1 FROM agents WHERE id = $1',
-      [agentId]
-    );
-    return rows.length > 0;
-  }
-
-  async delete(agentId: string): Promise<void> {
-    await this.pool.query('DELETE FROM agents WHERE id = $1', [agentId]);
-  }
-
-  async list(prefix?: string): Promise<string[]> {
-    const { rows } = await this.pool.query(
-      prefix
-        ? 'SELECT id FROM agents WHERE id LIKE $1'
-        : 'SELECT id FROM agents',
-      prefix ? [`${prefix}%`] : []
-    );
-    return rows.map(r => r.id);
-  }
-}
-```
-
----
-
-## Capacity Planning
-
-| Deployment | Agents/Process | Memory/Agent | Concurrent Users |
-|------------|----------------|--------------|------------------|
-| CLI | 1 | 10-100 MB | 1 |
-| Desktop | 5-10 | 50-200 MB | 1 |
-| Single Server | 50 | 2-10 MB | 50-100 |
-| Worker Cluster (10 nodes) | 500 | 2-10 MB | 500-1000 |
-| Worker Cluster (50 nodes) | 2500 | 2-10 MB | 2500-5000 |
-
-**Memory Estimation per Agent:**
-- Base object: ~50 KB
-- Message history (100 messages): ~500 KB - 5 MB
-- Tool records: ~50-500 KB
-- Event timeline: ~100 KB - 1 MB
-- **Typical total: 1-10 MB**
-
----
-
-## Summary
-
-1. **CLI/Desktop/IDE**: Use JSONStore, single AgentPool, straightforward
-2. **Single Server**: Add HTTP layer, consider Redis for events
-3. **Multi-node**: Implement custom Store, use queue for job distribution
-4. **Serverless**: Use external DB, handle timeouts, consider background queue
-
-The key insight: **KODE SDK handles the agent lifecycle; you handle the infrastructure.**
diff --git a/docs/ERROR_HANDLING.md b/docs/ERROR_HANDLING.md
deleted file mode 100644
index 843ca8d..0000000
--- a/docs/ERROR_HANDLING.md
+++ /dev/null
@@ -1,389 +0,0 @@
-# 错误处理机制
-
-## 核心原则
-
-1. **模型感知错误并自我调整** - 所有错误信息对模型可见且可操作
-2. **程序永不崩溃** - 多层错误捕获，确保系统稳定运行  
-3. **完整监听记录** - 所有错误触发事件，方便监控和调试
-
-## 错误处理架构
-
-### 错误流转路径
-
-```
-工具执行
-  ├─ 参数验证失败 → {ok: false, error: ..., _validationError: true}
-  ├─ 执行抛异常 → {ok: false, error: ..., _thrownError: true}
-  ├─ 返回 {ok: false} → 保持原样（逻辑错误）
-  └─ 正常返回 → 保持原样
-     ↓
-Agent 处理
-  ├─ 识别错误类型：validation | runtime | logical | aborted | exception
-  ├─ 判断可重试性：validation不可重试，其他可重试
-  ├─ 生成智能建议：基于错误类型和工具名称
-  ├─ 发出 tool:error 事件（ProgressEvent - 用户可见）
-  └─ 发出 error 事件（MonitorEvent - 监控系统）
-     ↓
-返回给模型
-  └─ {
-       ok: false,
-       error: "具体错误信息",
-       errorType: "错误类型",
-       retryable: true/false,
-       recommendations: ["建议1", "建议2", ...]
-     }
-```
-
-## 错误类型分类
-
-| 错误类型 | 标识 | 可重试 | 典型场景 |
-|---------|------|--------|---------|
-| `validation` | `_validationError: true` | ❌ | 参数类型错误、必填参数缺失 |
-| `runtime` | `_thrownError: true` | ✅ | 文件不存在、权限不足、网络错误 |
-| `logical` | 工具返回 `{ok: false}` | ✅ | 文件内容不匹配、命令执行失败 |
-| `aborted` | 超时/中断 | ❌ | 工具执行超时、用户中断 |
-| `exception` | 未预期异常 | ✅ | 系统异常、未知错误 |
-
-## 核心实现
-
-### 1. 工具层统一错误处理
-
-`src/tools/tool.ts`
-
-```typescript
-async exec(args: any, ctx: ToolContext): Promise<any> {
-  try {
-    // 参数验证（safeParse 不抛异常）
-    if (def.parameters) {
-      const parseResult = def.parameters.safeParse(args);
-      if (!parseResult.success) {
-        return {
-          ok: false,
-          error: `Invalid parameters: ${parseResult.error.message}`,
-          _validationError: true
-        };
-      }
-      args = parseResult.data;
-    }
-
-    // 执行工具
-    const result = await def.execute(args, enhancedCtx);
-
-    // 保持工具返回的 {ok: false}
-    if (result && typeof result === 'object' && 'ok' in result && result.ok === false) {
-      return result;
-    }
-
-    return result;
-  } catch (error: any) {
-    // 捕获所有异常，统一返回格式
-    return {
-      ok: false,
-      error: error?.message || String(error),
-      _thrownError: true
-    };
-  }
-}
-```
-
-### 2. Agent层错误识别和处理
-
-`src/core/agent.ts`
-
-```typescript
-// 正确识别工具状态
-const outputOk = output && typeof output === 'object' && 'ok' in output ? output.ok : true;
-let outcome: ToolOutcome = {
-  id: toolUse.id,
-  name: toolUse.name,
-  ok: outputOk !== false,  // 修复了硬编码 ok: true 的问题
-  content: output
-};
-
-// 处理失败情况
-const errorType = errorContent?._validationError ? 'validation' :
-                  errorContent?._thrownError ? 'runtime' : 'logical';
-const isRetryable = errorType !== 'validation';
-
-// 发出进度事件（用户可见）
-this.events.emitProgress({
-  channel: 'progress',
-  type: 'tool:error',
-  call: this.snapshotToolRecord(record.id),
-  error: errorMessage,
-});
-
-// 发出监控事件（系统级）
-this.events.emitMonitor({
-  channel: 'monitor',
-  type: 'error',
-  severity: 'warn',
-  phase: 'tool',
-  message: errorMessage,
-  detail: { ...outcome.content, errorType, retryable: isRetryable },
-});
-
-// 返回给模型
-return this.makeToolResult(toolUse.id, {
-  ok: false,
-  error: errorMessage,
-  errorType,
-  retryable: isRetryable,
-  recommendations: this.getErrorRecommendations(errorType, toolUse.name),
-});
-```
-
-### 3. 智能错误建议
-
-`getErrorRecommendations(errorType, toolName)` 示例：
-
-```typescript
-case 'validation':
-  return [
-    '检查工具参数是否符合schema要求',
-    '确认所有必填参数已提供',
-    '检查参数类型是否正确',
-    '参考工具手册中的参数说明'
-  ];
-
-case 'logical':
-  if (toolName.startsWith('fs_')) {
-    return [
-      '确认文件内容是否符合预期',
-      '检查文件是否被外部修改',
-      '验证路径和模式是否正确',
-      '可以先用 fs_read 确认文件状态'
-    ];
-  }
-  // ... 更多针对性建议
-```
-
-## 模型自我调整示例
-
-### 场景：文件不存在错误
-
-**工具返回：**
-```json
-{
-  "ok": false,
-  "error": "File not found: /src/utils/helper.ts",
-  "errorType": "logical",
-  "retryable": true,
-  "recommendations": [
-    "确认文件内容是否符合预期",
-    "检查文件是否被外部修改",
-    "验证路径和模式是否正确",
-    "可以先用 fs_read 确认文件状态"
-  ]
-}
-```
-
-**模型分析：**
-1. `errorType: "logical"` - 不是参数问题，是文件确实不存在
-2. `retryable: true` - 可以尝试其他方案
-3. 建议提到"验证路径和模式是否正确"
-
-**模型调整策略：**
-```
-1. 使用 fs_glob("src/**/*.ts") 查找所有ts文件
-2. 使用 fs_grep("helper", "src/**/*.ts") 搜索包含helper的文件
-3. 找到正确的文件路径后继续操作
-```
-
-### 场景：参数验证错误
-
-**工具返回：**
-```json
-{
-  "ok": false,
-  "error": "Invalid parameters: path is required",
-  "errorType": "validation",
-  "retryable": false,
-  "recommendations": [
-    "检查工具参数是否符合schema要求",
-    "确认所有必填参数已提供",
-    "检查参数类型是否正确",
-    "参考工具手册中的参数说明"
-  ]
-}
-```
-
-**模型分析：**
-1. `errorType: "validation"` - 参数问题
-2. `retryable: false` - 不应该用相同参数重试
-3. 错误明确指出 "path is required"
-
-**模型调整策略：**
-```
-1. 检查工具调用，发现确实缺少 path 参数
-2. 补充必要的 path 参数
-3. 重新调用工具
-```
-
-## 事件监听
-
-### 监听工具错误（用户层）
-
-```typescript
-// 订阅进度事件
-for await (const envelope of agent.chatStream(input)) {
-  if (envelope.event.type === 'tool:error') {
-    console.log('工具错误:', envelope.event.error);
-    console.log('工具状态:', envelope.event.call.state);
-    // UI 提示用户
-  }
-}
-```
-
-### 监控系统错误（运维层）
-
-```typescript
-// 订阅监控事件
-agent.subscribe(['monitor']).on('error', (event) => {
-  if (event.phase === 'tool') {
-    const { errorType, retryable } = event.detail || {};
-    
-    // 记录到日志系统
-    logger.warn('Tool Error', {
-      message: event.message,
-      errorType,
-      retryable,
-      severity: event.severity,
-      timestamp: Date.now()
-    });
-    
-    // 发送告警
-    if (event.severity === 'error') {
-      alerting.send('Tool execution failed', event);
-    }
-  }
-});
-```
-
-## 稳定性保证
-
-### 多层防护机制
-
-```
-第1层：工具执行层 (tool.ts)
-  └─ try-catch 捕获所有异常 → {ok: false, _thrownError: true}
-
-第2层：Agent调用层 (agent.ts)  
-  └─ try-catch 捕获调用异常 → errorType: 'exception'
-
-第3层：参数验证层
-  └─ safeParse 避免验证异常 → {ok: false, _validationError: true}
-
-第4层：Hook执行层
-  └─ Hook失败不影响主流程 → 记录错误继续执行
-```
-
-### 错误隔离原则
-
-- ✅ 单个工具错误 ≠ Agent崩溃
-- ✅ Agent错误 ≠ 系统崩溃
-- ✅ 工具间完全隔离
-- ✅ 所有错误可追踪
-
-## 最佳实践
-
-### 工具开发者
-
-```typescript
-// ✅ 推荐：使用 {ok: false} 返回预期的业务错误
-if (!fileExists) {
-  return {
-    ok: false,
-    error: 'File not found',
-    recommendations: ['检查文件路径', '使用 fs_glob 搜索文件']
-  };
-}
-
-// ❌ 避免：抛出异常表示业务错误
-throw new Error('File not found');  // 应该只用于意外异常
-```
-
-### 应用开发者
-
-```typescript
-// 监听错误并做UI提示
-agent.subscribe(['progress']).on('tool:error', (event) => {
-  showNotification({
-    type: 'error',
-    message: event.error,
-    action: event.call.state === 'FAILED' ? 'retry' : null
-  });
-});
-
-// 智能重试逻辑
-if (result.status === 'paused' && result.permissionIds?.length) {
-  // 有pending权限，等待用户决策
-} else if (lastError?.retryable && retryCount < 3) {
-  // 可重试错误，自动重试
-  await agent.send('请根据建议调整后重试');
-}
-```
-
-### 系统运维
-
-```typescript
-// 错误统计和分析
-const errorStats = {
-  validation: 0,
-  runtime: 0,
-  logical: 0,
-  aborted: 0,
-  exception: 0
-};
-
-agent.subscribe(['monitor']).on('error', (event) => {
-  if (event.phase === 'tool') {
-    const type = event.detail?.errorType || 'unknown';
-    errorStats[type]++;
-    
-    // 定期分析错误模式
-    if (errorStats.validation > 100) {
-      alert('参数验证错误过多，请检查工具schema配置');
-    }
-  }
-});
-```
-
-## 新增事件类型
-
-### ProgressToolErrorEvent
-
-```typescript
-export interface ProgressToolErrorEvent {
-  channel: 'progress';
-  type: 'tool:error';
-  call: ToolCallSnapshot;  // 工具调用快照
-  error: string;           // 错误信息
-  bookmark?: Bookmark;     // 事件书签
-}
-```
-
-**用途：** 让用户和前端能实时感知工具错误，进行UI提示或策略调整。
-
-## 总结
-
-通过这套完整的错误处理机制，实现了：
-
-✅ **模型智能感知**
-- 错误类型明确（validation/runtime/logical/aborted/exception）
-- 可重试性清晰（retryable: true/false）
-- 建议具体可操作（根据工具和错误类型定制）
-
-✅ **程序永不崩溃**
-- 工具层 try-catch 兜底
-- Agent层 try-catch 保护
-- 参数验证 safeParse
-- Hook执行隔离
-
-✅ **完整监听记录**
-- 进度事件（tool:error）- 用户可见
-- 监控事件（error）- 系统记录
-- 工具记录（ToolCallRecord）- 完整审计
-- 事件时间线（EventBus）- 可回溯
-
-这套机制确保了Agent在长时间运行中能够稳定运行，模型能够自主感知和调整，同时系统提供完整的可观测性。
diff --git a/docs/api.md b/docs/api.md
deleted file mode 100644
index 239b0ed..0000000
--- a/docs/api.md
+++ /dev/null
@@ -1,278 +0,0 @@
-# API 参考（v2.7）
-
-本文聚焦 KODE SDK 的核心入口：依赖注入、Agent 创建/恢复、事件订阅、常用管理器与工具系统。其余专题文档请参考 `docs/` 目录。
-
----
-
-## 依赖注入（AgentDependencies）
-
-所有 Agent 都运行在明确的依赖容器里：
-
-| 字段 | 类型 | 说明 |
-| --- | --- | --- |
-| `store` | `Store` | 统一 WAL 的持久化实现（默认使用 `JSONStore`）。|
-| `templateRegistry` | `AgentTemplateRegistry` | 模板注册中心，定义系统提示、默认工具、运行时策略。|
-| `sandboxFactory` | `SandboxFactory` | 根据配置创建沙箱（local/docker/k8s/remote/vfs）。|
-| `toolRegistry` | `ToolRegistry` | 注册所有可用工具（内置 & 自定义 & MCP）。|
-| `modelFactory` | `(config: ModelConfig) => ModelProvider` | 可选。未提供时默认支持 `provider = anthropic`。|
-
-```typescript
-import {
-  AgentDependencies,
-  AgentTemplateRegistry,
-  JSONStore,
-  SandboxFactory,
-  ToolRegistry,
-  builtin,
-  AnthropicProvider,
-} from '@kode/sdk';
-
-export function createDependencies(): AgentDependencies {
-  const store = new JSONStore('./.kode');
-  const templates = new AgentTemplateRegistry();
-  const tools = new ToolRegistry();
-  const sandboxFactory = new SandboxFactory();
-
-  templates.register({
-    id: 'repo-assistant',
-    systemPrompt: 'You are the repo teammate.',
-    tools: ['fs_read', 'fs_write', 'fs_edit', 'bash_run', 'todo_read', 'todo_write'],
-    runtime: { todo: { enabled: true, reminderOnStart: true, remindIntervalSteps: 25 } },
-  });
-
-  for (const tool of [...builtin.fs(), ...builtin.bash(), ...builtin.todo()]) {
-    tools.register(tool.name, () => tool);
-  }
-
-  return {
-    store,
-    templateRegistry: templates,
-    sandboxFactory,
-    toolRegistry: tools,
-    // Configuration-driven model factory with provider-specific options
-    modelFactory: (config) => {
-      if (config.provider === 'anthropic') {
-        return new AnthropicProvider(config.apiKey!, config.model, config.baseUrl, config.proxyUrl, {
-          reasoningTransport: config.reasoningTransport,
-          thinking: config.thinking,
-          beta: { interleavedThinking: true },
-        });
-      }
-      if (config.provider === 'openai') {
-        return new OpenAIProvider(config.apiKey!, config.model, config.baseUrl, config.proxyUrl, {
-          api: config.api ?? 'chat',
-          reasoningTransport: config.reasoningTransport,
-          responses: config.responses,
-          reasoning: config.reasoning,
-        });
-      }
-      if (config.provider === 'gemini') {
-        return new GeminiProvider(config.apiKey!, config.model, config.baseUrl, config.proxyUrl, {
-          reasoningTransport: config.reasoningTransport,
-          thinking: config.thinking,
-        });
-      }
-      // Default to OpenAI-compatible provider
-      return new OpenAIProvider(config.apiKey!, config.model, config.baseUrl, config.proxyUrl, {
-        reasoningTransport: config.reasoningTransport,
-        reasoning: config.reasoning,
-      });
-    },
-  };
-}
-```
-
----
-
-## Agent.create(config, deps)
-
-| 字段 | 说明 |
-| --- | --- |
-| `templateId` | 必填。引用已注册模板。|
-| `agentId?` | 可选。未指定时自动生成 `agt-` 前缀 ULID。|
-| `model` / `modelConfig` | 提供 `ModelProvider` 实例或配置。|
-| `sandbox` | `Sandbox` 实例或 `SandboxConfig`（kind/workDir/enforceBoundary/allowPaths 等）。|
-| `tools` | 工具名称数组。默认遵循模板：`'*'` 表示注册表所有工具。|
-| `exposeThinking` | 是否推送 `progress.think_*`。模板 metadata 也可配置。|
-| `overrides.*` | 覆盖模板的 permission/todo/subagents/hooks。|
-| `context` | 上下文管理参数（maxTokens / compressToTokens / compressionModel 等）。|
-| `metadata` | 透传字段：`toolTimeoutMs`、`maxToolConcurrency`、`maxTokens`、`temperature` 等。|
-
-返回值为 `Promise<Agent>`。初始化流程：
-
-1. 根据模板及 `config.tools` 解析工具实例 → 自动注入工具说明书。
-2. 构建沙箱 + FilePool watcher。
-3. 从 Store 恢复消息、工具记录、Todo、断点、Lineage。
-4. 设置事件总线，READY 状态等待消息。
-
----
-
-## Agent.resume / Agent.resumeFromStore
-
-```typescript
-const agent = await Agent.resume('agt-demo', {
-  templateId: 'repo-assistant',
-  modelConfig: { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022', apiKey: process.env.ANTHROPIC_API_KEY! },
-  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
-}, deps, {
-  strategy: 'crash',  // 自动封口未完成工具
-  autoRun: true,      // 恢复后继续处理队列
-});
-
-const agent2 = await Agent.resumeFromStore('agt-demo', deps, {
-  overrides: { modelConfig: { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022', apiKey: process.env.ANTHROPIC_API_KEY! } },
-});
-```
-
-- `strategy: 'manual' | 'crash'`：`crash` 会自动封口未完成工具并发 `monitor.agent_resumed`。
-- `autoRun`：恢复后立即继续处理队列。
-- `overrides`：对读取到的 metadata 做精细覆盖（模型、权限、sandbox 等）。
-
-恢复后务必重新绑定事件监听（Control/Monitor 回调不会随 metadata 存储）。
-
----
-
-## Agent 实例 API
-
-| 方法 | 返回 | 说明 |
-| --- | --- | --- |
-| `send(text, opts?)` | `Promise<string>` | 入队消息（`kind: 'user' | 'reminder'`）。REMINDER 会自动包裹系统提示。|
-| `chat(text, opts?)` | `Promise<CompleteResult>` | 阻塞式单轮对话，返回最后一条文本与 `permissionIds`。|
-| `chatStream(text, opts?)` / `stream` | `AsyncIterable<ProgressEvent>` | 推拉式流，见 Progress 通道。|
-| `subscribe(channels, opts?)` | `AsyncIterable<AgentEventEnvelope>` | 订阅任意组合的 Progress/Control/Monitor，支持 `since`/`kinds` 过滤。|
-| `on(type, handler)` | `() => void` | 订阅 Control/Monitor 回调（审批、错误、tool_executed、todo_changed 等）。|
-| `schedule()` | `Scheduler` | 注册步数调度、外部触发。详见 Scheduler 文档。|
-| `snapshot(label?)` | `Promise<SnapshotId>` | 创建 Safe-Fork-Point。|
-| `fork(sel?)` | `Promise<Agent>` | 基于 snapshot 派生新 Agent（继承工具/权限/lineage）。|
-| `status()` / `info()` | `Promise<AgentStatus/AgentInfo>` | 读取运行状态、断点、cursor、lineage。|
-| `getTodos()` / `setTodos()` / `updateTodo()` / `deleteTodo()` | 管理 Todo 列表，自动触发 Monitor 事件。|
-| `decide(id, decision, note?)` | `Promise<void>` | 审批 Control 请求（通常配合自定义审批服务）。|
-| `interrupt(opts?)` | `Promise<void>` | 中断当前工具，封口 `tool_result`，恢复 READY。|
-
----
-
-## 事件通道
-
-### Progress（数据面）
-
-| 事件 | 说明 |
-| --- | --- |
-| `think_chunk_start/think_chunk/think_chunk_end` | 暴露模型思考。受 `exposeThinking` 控制。|
-| `text_chunk_start/text_chunk/text_chunk_end` | 文本增量输送。|
-| `tool:start/tool:end/tool:error` | 工具生命周期 & 错误。`tool:end` 始终推送，即使失败。|
-| `done` | 当前轮结束，返回 `bookmark`（`seq/timestamp`）。|
-
-`subscribe(['progress'], { since, kinds })` 支持直播 + 续播。`bookmark` 可用于前端断点续传。
-
-### Control（审批面）
-
-| 事件 | 说明 |
-| --- | --- |
-| `permission_required` | 工具被策略/Hooks 判定需要审批。包含 `call`（快照）、`respond(decision, opts?)`。|
-| `permission_decided` | 审批结果广播，含 `callId`、`decision`、`decidedBy`。|
-
-### Monitor（治理面）
-
-核心事件：
-
-- `state_changed`：READY / WORKING / PAUSED 切换。
-- `tool_executed`：工具完成，含耗时、审批信息、结果摘要。
-- `error`：分类错误（model/tool/system），带 `severity` 与 `detail`。
-- `todo_changed` / `todo_reminder`：Todo 更新 & 提醒。
-- `file_changed`：FilePool 发现外部修改。
-- `context_compression`：上下文压缩摘要与比率。
-- `agent_resumed`：Resume 完成及自动封口列表。
-- `tool_manual_updated`：注入的工具说明书更新。
-
-Monitor 事件默认只在必要时推送，避免 UI 噪音。
-
----
-
-## 工具系统速览
-
-- 所有工具必须注册到 `ToolRegistry`，`Agent.create`/`resume` 会根据模板/配置实例化。
-- 内置工具分组：`builtin.fs()`、`builtin.bash()`、`builtin.todo()`、`builtin.task(templates)`、`builtin.skills(skillsManager)`。
-- 推荐使用 `defineTool`/`defineTools` 或 `tool()/tools()`（Zod）封装，自动生成 JSON Schema 与自定义事件。
-- 工具执行上下文(`ToolContext`)包含 `agent`, `sandbox`, `store`, `signal`, `events` 等；请响应 `AbortSignal`。
-- 工具返回结构若带 `{ ok: false, error, recommendations }`，会自动生成结构化审计事件。
-
-### Skills 工具注册
-
-Skills工具需要先创建`SkillsManager`实例，然后注册到工具注册表：
-
-```typescript
-import { createSkillsTool } from '@kode/sdk';
-import { SkillsManager } from '@kode/sdk';
-
-// 创建Skills管理器
-const skillsManager = new SkillsManager('./skills', ['skill1', 'skill2']);
-
-// 注册Skills工具
-deps.toolRegistry.register('skills', () => createSkillsTool(skillsManager));
-```
-
-Skills系统特性：
-- **热重载**：Skills代码修改后自动重新加载
-- **元数据注入**：自动将技能描述注入到系统提示
-- **沙箱隔离**：每个技能有独立的文件系统空间
-- **白名单机制**：支持选择性加载特定技能
-
-更多细节见 [`docs/tools.md`](./tools.md)、[`docs/simplified-tools.md`](./simplified-tools.md) 与 [`docs/skills.md`](./skills.md)。
-
----
-
-## Todo 与提醒
-
-启用模板的 `runtime.todo.enabled = true` 后：
-
-- `TodoService` 会从 Store 加载/持久化，限制同一时间最多一个 `in_progress`。
-- `agent.todoManager` 自动触发提醒：
-  - 初始化提醒（`reminderOnStart`）。
-  - `remindIntervalSteps` 间隔触发系统消息（Progress 不推送，默认 Monitor 提醒）。
-- `todo_read`/`todo_write` 工具会自动注入，直接被模型使用。
-
----
-
-## Scheduler
-
-`agent.schedule()` 返回 `Scheduler`：
-
-```typescript
-const scheduler = agent.schedule();
-const handle = scheduler.everySteps(5, ({ stepCount }) => {
-  console.log('reminder every 5 steps', stepCount);
-  agent.send('系统提醒：请总结当前进度。', { kind: 'reminder' });
-});
-
-// 外部触发
-scheduler.notifyExternalTrigger({ taskId: 'cron:daily', spec: '0 9 * * *', kind: 'cron' });
-```
-
-触发时会推送 `monitor.scheduler_triggered` 事件，便于审计。
-
----
-
-## FilePool 与 Sandbox
-
-- 所有文件类工具自动调用 FilePool：读取/写入记录时间戳、防止陈旧写入。
-- 外部修改 → `monitor.file_changed` + 系统提醒（通过 `agent.remind`）。
-- Sandbox 默认 `enforceBoundary: true`，可通过模板 metadata 的 `sandbox` 或 Agent 配置放权 `allowPaths`。
-- `LocalSandbox` 自动阻止危险命令（`rm -rf /`、`curl | bash` 等）。
-
----
-
-## 错误处理
-
-- 工具错误 → `tool:error` (Progress) + `monitor.error`（`severity: warn/error`）。
-- 模型错误 → `monitor.error`（`phase: model`）。
-- 自动封口（Seal）默认恢复 READY，并写入带建议的 `tool_result`。
-- 可通过 HookManager (`preToolUse`/`postToolUse`/`preModel`/`postModel`) 注入治理逻辑。
-
----
-
-## 相关文档
-
-- [`docs/events.md`](./events.md)：事件驱动最佳实践。
-- [`docs/resume.md`](./resume.md)：恢复/分叉策略 & 业务职责分工。
-- [`docs/playbooks.md`](./playbooks.md)：典型场景脚本（收件箱、审批、多 Agent、调度）。
-- [`docs/simplified-tools.md`](./simplified-tools.md)：工具定义 API。
diff --git a/docs/database.md b/docs/database.md
deleted file mode 100644
index fa2455b..0000000
--- a/docs/database.md
+++ /dev/null
@@ -1,1028 +0,0 @@
-# 数据库持久化指南
-
-KODE SDK 从 v2.7 开始支持 SQLite 和 PostgreSQL 作为持久化后端，提供高性能的查询、聚合和分析能力。本文档详细介绍数据库存储的架构设计、使用方法和最佳实践。
-
-## 目录
-
-- [为什么需要数据库？](#为什么需要数据库)
-- [架构设计](#架构设计)
-- [SQLite vs PostgreSQL](#sqlite-vs-postgresql)
-- [快速开始](#快速开始)
-- [查询 API 详解](#查询-api-详解)
-- [性能优化](#性能优化)
-- [生产部署](#生产部署)
-- [故障排查](#故障排查)
-
----
-
-## 为什么需要数据库？
-
-默认的 `JSONStore` 适合单 Agent 快速开发，但在生产环境中存在以下限制：
-
-| 场景 | JSONStore | 数据库存储 |
-|------|-----------|------------|
-| **查询会话列表** | 需要遍历所有目录 | 索引加速，毫秒级响应 |
-| **统计工具调用次数** | 需要读取所有文件并聚合 | SQL 聚合函数，高效计算 |
-| **按时间范围过滤** | 需要解析所有文件 | WHERE 条件过滤，索引优化 |
-| **多进程并发** | 文件锁冲突风险 | 数据库事务保证一致性 |
-| **备份与恢复** | 需要同步整个目录树 | 标准 SQL 工具（dump/restore）|
-| **审计与合规** | 需要自定义日志分析 | SQL 查询生成审计报告 |
-
-### 典型应用场景
-
-1. **多 Agent 管理平台**：需要列出所有 Agent 会话、按模板分类、按时间排序
-2. **工具调用分析**：统计哪些工具最常用、成功率如何、哪些 Agent 调用最多
-3. **成本监控**：按 Agent、模板、时间维度统计 Token 用量和成本
-4. **审计合规**：查询特定时间段内的所有工具调用记录、审批决策
-5. **多实例部署**：多个服务实例共享同一个 PostgreSQL 数据库
-
----
-
-## 架构设计
-
-### 混合存储策略
-
-KODE SDK 采用 **数据库 + 文件系统混合存储** 架构，在性能和灵活性之间取得平衡：
-
-```
-┌─────────────────────────────────────────────────────────┐
-│                  QueryableStore                         │
-│  (extends Store interface, 向后兼容)                    │
-├─────────────────────────────────────────────────────────┤
-│                                                         │
-│  ┌─────────────────────┐     ┌────────────────────┐   │
-│  │   Database          │     │   File System      │   │
-│  │   (SQL)             │     │   (JSONStore)      │   │
-│  ├─────────────────────┤     ├────────────────────┤   │
-│  │ • AgentInfo         │     │ • Events           │   │
-│  │ • Messages          │     │ • Todos            │   │
-│  │ • ToolCallRecords   │     │ • History          │   │
-│  │ • Snapshots         │     │ • Compression      │   │
-│  └─────────────────────┘     └────────────────────┘   │
-│         ↑ 查询优化                ↑ 高频写入             │
-│         ↑ 聚合分析                ↑ 顺序访问             │
-└─────────────────────────────────────────────────────────┘
-```
-
-### 数据分流原则
-
-| 数据类型 | 存储位置 | 原因 |
-|---------|---------|------|
-| **AgentInfo** | 数据库 | 需要按 templateId、createdAt 查询和过滤 |
-| **Messages** | 数据库 | 需要按 role、contentType、时间范围查询 |
-| **ToolCallRecords** | 数据库 | 需要按 toolName、isError、hasApproval 查询和统计 |
-| **Snapshots** | 数据库 | 需要列出所有快照、按时间排序 |
-| **Events** | 文件系统 | 高频写入，仅需顺序追加和读取 |
-| **Todos** | 文件系统 | 临时状态，频繁更新，无需查询 |
-| **History** | 文件系统 | 上下文窗口历史，仅需完整读取 |
-| **Compression** | 文件系统 | 压缩记录，仅需完整读取 |
-
-### 表结构设计
-
-#### SQLite 表结构
-
-```sql
--- Agent 基础信息表
-CREATE TABLE agents (
-  agent_id TEXT PRIMARY KEY,
-  template_id TEXT NOT NULL,
-  created_at TEXT NOT NULL,
-  config_version TEXT,
-  lineage TEXT,           -- JSON array
-  message_count INTEGER DEFAULT 0,
-  last_sfp_index INTEGER DEFAULT -1,
-  breakpoint TEXT,
-  last_bookmark TEXT      -- JSON object
-);
-
--- 消息表
-CREATE TABLE messages (
-  id INTEGER PRIMARY KEY AUTOINCREMENT,
-  agent_id TEXT NOT NULL,
-  seq INTEGER NOT NULL,
-  role TEXT NOT NULL,
-  content TEXT NOT NULL,  -- JSON array
-  metadata TEXT,          -- JSON object
-  created_at TEXT NOT NULL,
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-
--- 工具调用记录表
-CREATE TABLE tool_call_records (
-  id INTEGER PRIMARY KEY AUTOINCREMENT,
-  agent_id TEXT NOT NULL,
-  tool_call_id TEXT NOT NULL,
-  tool_name TEXT NOT NULL,
-  input TEXT NOT NULL,    -- JSON object
-  output TEXT,            -- JSON object
-  is_error INTEGER DEFAULT 0,
-  approval TEXT,          -- JSON object
-  audit_trail TEXT,       -- JSON array
-  created_at TEXT NOT NULL,
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-
--- 快照表
-CREATE TABLE snapshots (
-  id INTEGER PRIMARY KEY AUTOINCREMENT,
-  agent_id TEXT NOT NULL,
-  snapshot_id TEXT NOT NULL,
-  parent_snapshot_id TEXT,
-  created_at TEXT NOT NULL,
-  metadata TEXT,          -- JSON object
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-```
-
-#### PostgreSQL 表结构
-
-PostgreSQL 版本使用 JSONB 类型优化 JSON 字段存储和查询：
-
-```sql
--- Agent 基础信息表（使用 JSONB）
-CREATE TABLE agents (
-  agent_id TEXT PRIMARY KEY,
-  template_id TEXT NOT NULL,
-  created_at TIMESTAMPTZ NOT NULL,
-  config_version TEXT,
-  lineage JSONB,          -- JSONB array
-  message_count INTEGER DEFAULT 0,
-  last_sfp_index INTEGER DEFAULT -1,
-  breakpoint TEXT,
-  last_bookmark JSONB     -- JSONB object
-);
-
--- 消息表（使用 JSONB）
-CREATE TABLE messages (
-  id SERIAL PRIMARY KEY,
-  agent_id TEXT NOT NULL,
-  seq INTEGER NOT NULL,
-  role TEXT NOT NULL,
-  content JSONB NOT NULL,
-  metadata JSONB,
-  created_at TIMESTAMPTZ NOT NULL,
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-
--- 工具调用记录表（使用 JSONB）
-CREATE TABLE tool_call_records (
-  id SERIAL PRIMARY KEY,
-  agent_id TEXT NOT NULL,
-  tool_call_id TEXT NOT NULL,
-  tool_name TEXT NOT NULL,
-  input JSONB NOT NULL,
-  output JSONB,
-  is_error BOOLEAN DEFAULT FALSE,
-  approval JSONB,
-  audit_trail JSONB,
-  created_at TIMESTAMPTZ NOT NULL,
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-
--- 快照表（使用 JSONB）
-CREATE TABLE snapshots (
-  id SERIAL PRIMARY KEY,
-  agent_id TEXT NOT NULL,
-  snapshot_id TEXT NOT NULL,
-  parent_snapshot_id TEXT,
-  created_at TIMESTAMPTZ NOT NULL,
-  metadata JSONB,
-  FOREIGN KEY (agent_id) REFERENCES agents(agent_id) ON DELETE CASCADE
-);
-```
-
-#### 索引设计
-
-```sql
--- SQLite 索引
-CREATE INDEX idx_agents_template ON agents(template_id);
-CREATE INDEX idx_agents_created ON agents(created_at);
-CREATE INDEX idx_messages_agent_seq ON messages(agent_id, seq);
-CREATE INDEX idx_messages_role ON messages(role);
-CREATE INDEX idx_messages_created ON messages(created_at);
-CREATE INDEX idx_tool_calls_agent ON tool_call_records(agent_id);
-CREATE INDEX idx_tool_calls_name ON tool_call_records(tool_name);
-CREATE INDEX idx_tool_calls_error ON tool_call_records(is_error);
-CREATE INDEX idx_tool_calls_created ON tool_call_records(created_at);
-CREATE INDEX idx_snapshots_agent ON snapshots(agent_id);
-
--- PostgreSQL 额外索引（利用 JSONB）
-CREATE INDEX idx_agents_lineage ON agents USING GIN (lineage);
-CREATE INDEX idx_messages_content ON messages USING GIN (content);
-CREATE INDEX idx_tool_calls_input ON tool_call_records USING GIN (input);
-CREATE INDEX idx_tool_calls_output ON tool_call_records USING GIN (output);
-```
-
----
-
-## SQLite vs PostgreSQL
-
-### 对比矩阵
-
-| 特性 | SQLite | PostgreSQL |
-|-----|--------|-----------|
-| **部署复杂度** | ⭐⭐⭐⭐⭐ 单文件，零配置 | ⭐⭐⭐ 需要数据库服务器 |
-| **并发写入** | ⭐⭐⭐ 单进程写入 | ⭐⭐⭐⭐⭐ 多进程并发 |
-| **查询性能** | ⭐⭐⭐⭐ 小数据集高效 | ⭐⭐⭐⭐⭐ 大数据集优化 |
-| **JSON 支持** | ⭐⭐⭐ JSON 函数 | ⭐⭐⭐⭐⭐ JSONB + GIN 索引 |
-| **备份恢复** | ⭐⭐⭐⭐⭐ 复制文件 | ⭐⭐⭐⭐ pg_dump/restore |
-| **运维成本** | ⭐⭐⭐⭐⭐ 无需维护 | ⭐⭐⭐ 需要监控、备份、调优 |
-| **扩展性** | ⭐⭐ 单机限制 | ⭐⭐⭐⭐⭐ 主从复制、分片 |
-| **数据量上限** | ~100GB 推荐 | TB 级无压力 |
-
-### 选择建议
-
-#### 选择 SQLite 当...
-
-- 单机部署，单个服务实例
-- Agent 数量 < 1000
-- 每日消息量 < 10 万条
-- 需要快速开发和原型验证
-- 希望零运维成本
-
-**示例场景**：
-- 个人开发和测试
-- 小团队内部工具
-- Edge 设备本地 Agent
-- 单机爬虫/自动化脚本
-
-#### 选择 PostgreSQL 当...
-
-- 多实例部署，负载均衡
-- Agent 数量 > 1000
-- 每日消息量 > 10 万条
-- 需要实时分析和复杂查询
-- 需要跨地域备份和容灾
-
-**示例场景**：
-- 企业级 Agent 平台
-- SaaS 多租户服务
-- 数据分析和 BI 看板
-- 审计合规要求严格的场景
-
----
-
-## 快速开始
-
-### 安装依赖
-
-```bash
-# SQLite（通常已内置）
-npm install better-sqlite3
-
-# PostgreSQL
-npm install pg
-```
-
-### SQLite 示例
-
-```typescript
-import { Agent } from '@shareai-lab/kode-sdk';
-import { SqliteStore } from '@shareai-lab/kode-sdk/infra/db/sqlite';
-import path from 'path';
-
-// 1. 创建 SQLite Store
-const dbPath = path.join(__dirname, 'data', 'agents.db');
-const storePath = path.join(__dirname, 'data', 'store');
-const store = new SqliteStore(dbPath, storePath);
-
-// 2. 创建 Agent
-const agent = await Agent.create({
-  provider,
-  store,
-  template: {
-    id: 'my-template',
-    systemPrompt: 'You are a helpful assistant.',
-    tools: []
-  }
-});
-
-// 3. 对话
-await agent.send({ role: 'user', content: 'Hello!' });
-
-// 4. 查询会话列表
-const sessions = await store.querySessions({
-  templateId: 'my-template',
-  limit: 10
-});
-console.log(`Found ${sessions.length} sessions`);
-
-// 5. 统计工具调用
-const stats = await store.aggregateStats({ agentId: agent.id });
-console.log(stats);
-
-// 6. 关闭数据库
-await store.close();
-```
-
-### PostgreSQL 示例
-
-```typescript
-import { Agent } from '@shareai-lab/kode-sdk';
-import { PostgresStore } from '@shareai-lab/kode-sdk/infra/db/postgres';
-
-// 1. 创建 PostgreSQL Store
-const store = new PostgresStore(
-  {
-    host: process.env.POSTGRES_HOST || 'localhost',
-    port: parseInt(process.env.POSTGRES_PORT || '5432'),
-    database: process.env.POSTGRES_DB || 'kode_agents',
-    user: process.env.POSTGRES_USER || 'kode',
-    password: process.env.POSTGRES_PASSWORD
-  },
-  './data/store'
-);
-
-// 2-5. 使用方法与 SQLite 完全一致
-
-// 6. 关闭连接池
-await store.close();
-```
-
-### Docker 快速启动
-
-#### PostgreSQL
-
-```bash
-# 开发环境
-docker run --name kode-postgres \
-  -e POSTGRES_PASSWORD=kode123 \
-  -e POSTGRES_DB=kode_agents \
-  -p 5432:5432 \
-  -d postgres:16-alpine
-
-# 生产环境（持久化数据）
-docker run --name kode-postgres \
-  -e POSTGRES_PASSWORD=kode123 \
-  -e POSTGRES_DB=kode_agents \
-  -v /data/postgres:/var/lib/postgresql/data \
-  -p 5432:5432 \
-  -d postgres:16-alpine
-```
-
----
-
-## 查询 API 详解
-
-### 会话查询：querySessions()
-
-查询 Agent 会话列表，支持按模板、时间范围过滤和分页。
-
-```typescript
-interface SessionQueryFilter {
-  templateId?: string;      // 按模板 ID 过滤
-  createdAfter?: Date;      // 创建时间晚于
-  createdBefore?: Date;     // 创建时间早于
-  limit?: number;           // 返回数量限制（默认 100）
-  offset?: number;          // 分页偏移量（默认 0）
-}
-
-const sessions = await store.querySessions({
-  templateId: 'chat-assistant',
-  createdAfter: new Date('2025-01-01'),
-  limit: 20,
-  offset: 0
-});
-
-// 返回结果
-sessions.forEach(session => {
-  console.log({
-    agentId: session.agentId,
-    templateId: session.templateId,
-    createdAt: session.createdAt,
-    messageCount: session.messageCount,
-    lineage: session.lineage  // 父 Agent ID 链
-  });
-});
-```
-
-**典型用例**：
-- 管理后台列出所有 Agent 会话
-- 按模板分类展示不同类型的 Agent
-- 时间范围过滤（今天、本周、本月）
-
-### 消息查询：queryMessages()
-
-查询消息记录，支持按 Agent、角色、内容类型过滤。
-
-```typescript
-interface MessageQueryFilter {
-  agentId?: string;         // 按 Agent ID 过滤
-  role?: 'user' | 'assistant';  // 按角色过滤
-  contentType?: 'text' | 'tool_use' | 'tool_result';  // 按内容类型过滤
-  createdAfter?: Date;      // 创建时间晚于
-  createdBefore?: Date;     // 创建时间早于
-  limit?: number;           // 返回数量限制（默认 100）
-  offset?: number;          // 分页偏移量（默认 0）
-}
-
-const messages = await store.queryMessages({
-  agentId: 'agt-abc123',
-  role: 'assistant',
-  contentType: 'tool_use',
-  limit: 50
-});
-
-// 返回结果
-messages.forEach(msg => {
-  console.log({
-    agentId: msg.agentId,
-    seq: msg.seq,
-    role: msg.role,
-    content: msg.content,  // ContentBlock[]
-    createdAt: msg.createdAt
-  });
-});
-```
-
-**典型用例**：
-- 查看特定 Agent 的对话历史
-- 分析 Assistant 生成的所有工具调用
-- 提取用户输入用于训练和分析
-
-### 工具调用查询：queryToolCalls()
-
-查询工具调用记录，支持按工具名、错误状态、审批状态过滤。
-
-```typescript
-interface ToolCallQueryFilter {
-  agentId?: string;         // 按 Agent ID 过滤
-  toolName?: string;        // 按工具名称过滤
-  isError?: boolean;        // 按错误状态过滤
-  hasApproval?: boolean;    // 按审批状态过滤
-  createdAfter?: Date;      // 创建时间晚于
-  createdBefore?: Date;     // 创建时间早于
-  limit?: number;           // 返回数量限制（默认 100）
-  offset?: number;          // 分页偏移量（默认 0）
-}
-
-const toolCalls = await store.queryToolCalls({
-  toolName: 'bash_run',
-  isError: true,
-  limit: 10
-});
-
-// 返回结果
-toolCalls.forEach(call => {
-  console.log({
-    toolCallId: call.toolCallId,
-    toolName: call.toolName,
-    input: call.input,
-    output: call.output,
-    isError: call.isError,
-    approval: call.approval,  // ToolCallApproval | undefined
-    auditTrail: call.auditTrail  // 审计日志
-  });
-});
-```
-
-**典型用例**：
-- 统计哪些工具最常失败
-- 查看所有需要审批的工具调用
-- 生成工具使用审计报告
-
-### 统计聚合：aggregateStats()
-
-聚合统计 Agent 的消息数量、工具调用次数、成功率等指标。
-
-```typescript
-interface StatsQueryFilter {
-  agentId: string;          // 必填：Agent ID
-}
-
-const stats = await store.aggregateStats({ agentId: 'agt-abc123' });
-
-console.log({
-  totalMessages: stats.totalMessages,           // 消息总数
-  totalToolCalls: stats.totalToolCalls,         // 工具调用总数
-  successfulToolCalls: stats.successfulToolCalls,  // 成功次数
-  failedToolCalls: stats.failedToolCalls        // 失败次数
-});
-
-// 计算成功率
-const successRate = (stats.successfulToolCalls / stats.totalToolCalls * 100).toFixed(2);
-console.log(`Tool call success rate: ${successRate}%`);
-```
-
-**典型用例**：
-- Agent 性能监控看板
-- 工具可靠性分析
-- 成本估算（基于消息数量）
-
----
-
-## 性能优化
-
-### 索引优化
-
-默认索引已覆盖常见查询场景，但如果有特定查询模式，可以添加自定义索引：
-
-```sql
--- 示例：按 Agent 和工具名组合查询
-CREATE INDEX idx_tool_calls_agent_tool ON tool_call_records(agent_id, tool_name);
-
--- 示例：按消息内容类型查询（需要解析 JSON）
--- PostgreSQL JSONB 索引
-CREATE INDEX idx_messages_content_type ON messages USING GIN ((content->0->>'type'));
-```
-
-### 查询优化
-
-#### 1. 使用分页避免大结果集
-
-```typescript
-// 不推荐：一次性加载所有数据
-const allMessages = await store.queryMessages({ agentId });
-
-// 推荐：分页加载
-const PAGE_SIZE = 100;
-let offset = 0;
-while (true) {
-  const messages = await store.queryMessages({
-    agentId,
-    limit: PAGE_SIZE,
-    offset
-  });
-  if (messages.length === 0) break;
-
-  // 处理当前页
-  processMessages(messages);
-  offset += PAGE_SIZE;
-}
-```
-
-#### 2. 使用时间范围过滤
-
-```typescript
-// 不推荐：查询所有历史数据
-const messages = await store.queryMessages({ agentId });
-
-// 推荐：限制时间范围
-const messages = await store.queryMessages({
-  agentId,
-  createdAfter: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000)  // 最近 7 天
-});
-```
-
-#### 3. 按需查询字段
-
-数据库查询已经只返回必要字段，但可以进一步优化：
-
-```typescript
-// 如果只需要统计数量，不需要查询详细记录
-const stats = await store.aggregateStats({ agentId });
-// 比 queryMessages() 然后统计快得多
-```
-
-### 写入优化
-
-#### 1. 批量插入（内部已实现）
-
-Store 实现内部已使用事务批量插入：
-
-```typescript
-// saveMessages 内部实现
-db.transaction(() => {
-  for (const message of messages) {
-    stmt.run(message);
-  }
-})();
-```
-
-#### 2. 延迟写入（高频场景）
-
-对于高频写入场景（如实时流式输出），可以考虑缓冲后批量写入：
-
-```typescript
-class BufferedStore {
-  private buffer: Message[] = [];
-  private flushInterval = 5000;  // 5秒刷新一次
-
-  constructor(private store: QueryableStore) {
-    setInterval(() => this.flush(), this.flushInterval);
-  }
-
-  async saveMessage(agentId: string, message: Message) {
-    this.buffer.push({ agentId, ...message });
-    if (this.buffer.length >= 100) {
-      await this.flush();
-    }
-  }
-
-  private async flush() {
-    if (this.buffer.length === 0) return;
-
-    // 按 agentId 分组批量写入
-    const grouped = groupBy(this.buffer, m => m.agentId);
-    for (const [agentId, messages] of grouped) {
-      await this.store.saveMessages(agentId, messages);
-    }
-    this.buffer = [];
-  }
-}
-```
-
-### PostgreSQL 特定优化
-
-#### 1. 连接池配置
-
-```typescript
-const store = new PostgresStore(
-  {
-    host: 'localhost',
-    port: 5432,
-    database: 'kode_agents',
-    user: 'kode',
-    password: 'password',
-    // 连接池配置
-    max: 20,          // 最大连接数
-    idleTimeoutMillis: 30000,  // 空闲连接超时
-    connectionTimeoutMillis: 2000  // 连接超时
-  },
-  './data/store'
-);
-```
-
-#### 2. JSONB 查询优化
-
-利用 PostgreSQL 的 JSONB 操作符进行高效查询：
-
-```sql
--- 查询包含特定工具调用的消息
-SELECT * FROM messages
-WHERE content @> '[{"type": "tool_use", "name": "bash_run"}]';
-
--- 查询工具输入包含特定参数的记录
-SELECT * FROM tool_call_records
-WHERE input @> '{"command": "ls"}';
-```
-
----
-
-## 生产部署
-
-### 数据库初始化
-
-Store 在首次创建时会自动初始化表结构和索引，无需手动执行 SQL 脚本。
-
-```typescript
-// 第一次运行会自动创建表
-const store = new SqliteStore('./agents.db', './store');
-// 或
-const store = new PostgresStore(config, './store');
-```
-
-### 备份策略
-
-#### SQLite 备份
-
-```bash
-# 方法 1：文件复制（需要先停止写入）
-cp agents.db agents.db.backup
-
-# 方法 2：在线备份（推荐）
-sqlite3 agents.db ".backup agents.db.backup"
-
-# 方法 3：导出 SQL
-sqlite3 agents.db .dump > agents.sql
-```
-
-#### PostgreSQL 备份
-
-```bash
-# 逻辑备份（小数据库）
-pg_dump -h localhost -U kode -d kode_agents > backup.sql
-
-# 压缩备份
-pg_dump -h localhost -U kode -d kode_agents | gzip > backup.sql.gz
-
-# 物理备份（大数据库，需要 wal_level = replica）
-pg_basebackup -h localhost -U kode -D /backup/postgres -Fp -Xs -P
-
-# 定时备份（cron）
-0 2 * * * pg_dump -h localhost -U kode -d kode_agents | gzip > /backup/kode_$(date +\%Y\%m\%d).sql.gz
-```
-
-### 监控指标
-
-#### 关键指标
-
-| 指标 | 说明 | 告警阈值 |
-|-----|------|---------|
-| **数据库文件大小** | SQLite 文件大小 | > 50GB 考虑迁移 |
-| **连接数** | PostgreSQL 活跃连接 | > max_connections * 0.8 |
-| **慢查询** | 执行时间 > 1s 的查询 | > 100 次/小时 |
-| **锁等待** | 事务等待锁的时间 | > 100ms |
-| **磁盘 I/O** | 读写吞吐量 | > 80% 利用率 |
-
-#### SQLite 监控
-
-```typescript
-import Database from 'better-sqlite3';
-
-const db = new Database('./agents.db');
-
-// 查询数据库大小
-const size = db.prepare('SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()').get();
-console.log(`Database size: ${(size.size / 1024 / 1024).toFixed(2)} MB`);
-
-// 查询表行数
-const counts = db.prepare('SELECT COUNT(*) as count FROM messages').get();
-console.log(`Message count: ${counts.count}`);
-```
-
-#### PostgreSQL 监控
-
-```sql
--- 数据库大小
-SELECT pg_size_pretty(pg_database_size('kode_agents'));
-
--- 表大小
-SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
-FROM pg_tables WHERE schemaname = 'public';
-
--- 活跃连接数
-SELECT count(*) FROM pg_stat_activity WHERE datname = 'kode_agents';
-
--- 慢查询（需要启用 pg_stat_statements）
-SELECT query, calls, mean_exec_time, stddev_exec_time
-FROM pg_stat_statements
-WHERE mean_exec_time > 1000  -- > 1s
-ORDER BY mean_exec_time DESC
-LIMIT 10;
-
--- 锁等待
-SELECT pid, usename, pg_blocking_pids(pid) as blocked_by, query
-FROM pg_stat_activity
-WHERE cardinality(pg_blocking_pids(pid)) > 0;
-```
-
-### 高可用部署
-
-#### PostgreSQL 主从复制
-
-```bash
-# 主库配置（postgresql.conf）
-wal_level = replica
-max_wal_senders = 3
-wal_keep_size = 64MB
-
-# 从库启动
-pg_basebackup -h master -U replication -D /data/postgres -Fp -Xs -P
-# 配置 standby.signal 和 primary_conninfo
-
-# 应用层连接池配置（读写分离）
-const masterStore = new PostgresStore(masterConfig, './store');
-const replicaStore = new PostgresStore(replicaConfig, './store');
-
-// 写操作用主库
-await masterStore.saveMessages(agentId, messages);
-
-// 读操作用从库
-const sessions = await replicaStore.querySessions({ limit: 10 });
-```
-
-#### 连接池管理（多实例）
-
-```typescript
-// 使用连接池单例模式避免重复连接
-class PostgresStoreFactory {
-  private static poolMap = new Map<string, Pool>();
-
-  static create(config: PoolConfig, storePath: string): PostgresStore {
-    const key = `${config.host}:${config.port}/${config.database}`;
-    if (!this.poolMap.has(key)) {
-      const pool = new Pool(config);
-      this.poolMap.set(key, pool);
-    }
-    return new PostgresStore(config, storePath, this.poolMap.get(key));
-  }
-}
-
-// 多个 Agent 共享同一个连接池
-const store1 = PostgresStoreFactory.create(config, './store1');
-const store2 = PostgresStoreFactory.create(config, './store2');
-```
-
----
-
-## 故障排查
-
-### SQLite 常见问题
-
-#### 问题 1：数据库锁定错误
-
-```
-Error: SQLITE_BUSY: database is locked
-```
-
-**原因**：多个进程同时写入 SQLite
-
-**解决**：
-1. 使用 WAL 模式（Write-Ahead Logging）：
-```typescript
-const db = new Database('./agents.db');
-db.pragma('journal_mode = WAL');
-```
-
-2. 增加 busy timeout：
-```typescript
-db.pragma('busy_timeout = 5000');  // 5秒
-```
-
-3. 考虑迁移到 PostgreSQL（多实例场景）
-
-#### 问题 2：数据库文件损坏
-
-```
-Error: database disk image is malformed
-```
-
-**解决**：
-```bash
-# 尝试恢复
-sqlite3 agents.db "PRAGMA integrity_check"
-
-# 如果失败，从备份恢复
-cp agents.db.backup agents.db
-
-# 或导出导入
-sqlite3 agents.db .dump > dump.sql
-sqlite3 agents_new.db < dump.sql
-```
-
-### PostgreSQL 常见问题
-
-#### 问题 1：连接被拒绝
-
-```
-Error: connect ECONNREFUSED 127.0.0.1:5432
-```
-
-**排查步骤**：
-```bash
-# 1. 检查 PostgreSQL 是否运行
-pg_isready -h localhost -p 5432
-
-# 2. 检查防火墙
-sudo ufw status
-sudo ufw allow 5432/tcp
-
-# 3. 检查 pg_hba.conf
-# 确保允许来自应用服务器的连接
-host    all    all    0.0.0.0/0    md5
-
-# 4. 检查 postgresql.conf
-listen_addresses = '*'
-```
-
-#### 问题 2：连接池耗尽
-
-```
-Error: sorry, too many clients already
-```
-
-**解决**：
-1. 增加 PostgreSQL max_connections：
-```sql
-ALTER SYSTEM SET max_connections = 200;
-SELECT pg_reload_conf();
-```
-
-2. 优化应用连接池：
-```typescript
-const store = new PostgresStore({
-  ...config,
-  max: 10,  // 减少单实例连接数
-  idleTimeoutMillis: 10000  // 更快释放空闲连接
-}, storePath);
-```
-
-#### 问题 3：慢查询
-
-**排查步骤**：
-```sql
--- 1. 启用慢查询日志
-ALTER SYSTEM SET log_min_duration_statement = 1000;  -- 1s
-SELECT pg_reload_conf();
-
--- 2. 查看执行计划
-EXPLAIN ANALYZE
-SELECT * FROM messages WHERE agent_id = 'agt-abc123' ORDER BY seq;
-
--- 3. 检查索引使用
-SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
-FROM pg_stat_user_indexes
-WHERE schemaname = 'public';
-
--- 4. 分析表统计信息
-ANALYZE messages;
-```
-
-### 数据一致性问题
-
-#### 问题：数据库与文件系统不一致
-
-**原因**：事务失败或进程异常退出导致部分数据未写入
-
-**排查**：
-```typescript
-// 检查 AgentInfo 是否存在
-const exists = await store.exists(agentId);
-const info = await store.loadInfo(agentId);
-
-// 检查文件系统数据是否存在
-const eventsExist = fs.existsSync(`./store/${agentId}/events.jsonl`);
-
-console.log({ exists, info, eventsExist });
-```
-
-**解决**：
-```typescript
-// 方法 1：从数据库恢复（如果数据库完整）
-const info = await store.loadInfo(agentId);
-const messages = await store.loadMessages(agentId);
-// 手动重建文件系统数据...
-
-// 方法 2：从备份恢复
-await store.delete(agentId);
-// 从备份文件恢复...
-```
-
----
-
-## 常见问题 (FAQ)
-
-### Q: 可以从 JSONStore 迁移到数据库存储吗？
-
-A: 可以，但目前需要手动迁移。未来版本会提供迁移工具。手动迁移步骤：
-
-```typescript
-// 1. 读取 JSONStore 数据
-const jsonStore = new JSONStore('./old-store');
-const agentIds = await jsonStore.list('agt-');
-
-// 2. 逐个迁移到数据库
-const dbStore = new SqliteStore('./agents.db', './new-store');
-for (const agentId of agentIds) {
-  const info = await jsonStore.loadInfo(agentId);
-  const messages = await jsonStore.loadMessages(agentId);
-  const toolCalls = await jsonStore.loadToolCallRecords(agentId);
-  const snapshots = await jsonStore.listSnapshots(agentId);
-
-  await dbStore.saveInfo(agentId, info);
-  await dbStore.saveMessages(agentId, messages);
-  await dbStore.saveToolCallRecords(agentId, toolCalls);
-  for (const snapshot of snapshots) {
-    await dbStore.saveSnapshot(agentId, snapshot);
-  }
-}
-```
-
-### Q: 数据库存储会影响性能吗？
-
-A: 不会。对于常规操作（create、send、resume），性能与 JSONStore 相当。数据库带来的额外开销主要在查询和聚合操作上，但这些操作在 JSONStore 中更慢或无法实现。
-
-### Q: 可以混用 SQLite 和 PostgreSQL 吗？
-
-A: 可以。`QueryableStore` 接口抽象了底层实现，你可以在不同环境使用不同的 Store：
-
-```typescript
-const store = process.env.NODE_ENV === 'production'
-  ? new PostgresStore(pgConfig, storePath)
-  : new SqliteStore('./dev.db', storePath);
-```
-
-### Q: 数据库文件可以跨平台使用吗？
-
-A: SQLite 文件在不同操作系统和架构之间是兼容的，可以直接复制使用。PostgreSQL 备份（pg_dump）也是跨平台的。
-
-### Q: 如何删除旧数据释放空间？
-
-A:
-
-```typescript
-// 删除指定 Agent
-await store.delete(agentId);
-
-// 批量删除旧 Agent（自定义逻辑）
-const sessions = await store.querySessions({
-  createdBefore: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000)  // 90 天前
-});
-for (const session of sessions) {
-  await store.delete(session.agentId);
-}
-
-// SQLite vacuum 释放空间
-// （需要直接操作数据库）
-const db = new Database('./agents.db');
-db.exec('VACUUM');
-```
-
----
-
-## 下一步
-
-- 查看示例代码：[`examples/db-sqlite.ts`](../examples/db-sqlite.ts)、[`examples/db-postgres.ts`](../examples/db-postgres.ts)
-- 了解 Store 接口设计：[`docs/api.md#store`](./api.md#store)
-- 提交问题和建议：[GitHub Issues](https://github.com/shareai-lab/kode-sdk/issues)
diff --git a/docs/en/advanced/architecture.md b/docs/en/advanced/architecture.md
new file mode 100644
index 0000000..604ea9f
--- /dev/null
+++ b/docs/en/advanced/architecture.md
@@ -0,0 +1,427 @@
+# Architecture Guide
+
+> Deep dive into the mental model, design decisions, and runtime characteristics of KODE SDK.
+
+---
+
+## Table of Contents
+
+1. [Mental Model](#mental-model)
+2. [Core Architecture](#core-architecture)
+3. [Runtime Characteristics](#runtime-characteristics)
+4. [Decision Framework](#decision-framework)
+
+---
+
+## Mental Model
+
+### What KODE SDK Is
+
+```
+Think of KODE SDK like:
+
++------------------+     +------------------+     +------------------+
+|       V8         |     |     SQLite       |     |    KODE SDK      |
+|  JS Runtime      |     |  Database Engine |     |  Agent Runtime   |
++------------------+     +------------------+     +------------------+
+        |                        |                        |
+        v                        v                        v
++------------------+     +------------------+     +------------------+
+|    Express.js    |     |     Prisma       |     |   Your App       |
+|  Web Framework   |     |       ORM        |     | (CLI/Desktop/Web)|
++------------------+     +------------------+     +------------------+
+        |                        |                        |
+        v                        v                        v
++------------------+     +------------------+     +------------------+
+|      Vercel      |     |   PlanetScale    |     |   Your Infra     |
+|  Cloud Platform  |     |  Cloud Database  |     | (K8s/EC2/Local)  |
++------------------+     +------------------+     +------------------+
+```
+
+**KODE SDK is an engine, not a platform.**
+
+It provides:
+- Agent lifecycle management (create, run, pause, resume, fork)
+- State persistence (via pluggable Store interface)
+- Tool execution and permission governance
+- Event streams for observability
+
+It does NOT provide:
+- HTTP routing or API framework
+- User authentication or authorization
+- Multi-tenancy or resource isolation
+- Horizontal scaling or load balancing
+
+### The Single Responsibility
+
+```
+                     KODE SDK's Job
+                           |
+                           v
+    +----------------------------------------------+
+    |                                              |
+    |   "Keep this agent running, recover from    |
+    |    crashes, let it fork, and tell me        |
+    |    what's happening via events."            |
+    |                                              |
+    +----------------------------------------------+
+                           |
+                           v
+                     Your App's Job
+                           |
+                           v
+    +----------------------------------------------+
+    |                                              |
+    |   "Handle users, route requests, manage     |
+    |    permissions, scale infrastructure,       |
+    |    and integrate with my systems."          |
+    |                                              |
+    +----------------------------------------------+
+```
+
+---
+
+## Core Architecture
+
+### Component Overview
+
+```
++------------------------------------------------------------------+
+|                         Agent Instance                            |
++------------------------------------------------------------------+
+|                                                                   |
+|  +------------------+  +------------------+  +------------------+ |
+|  |  MessageQueue    |  | ContextManager   |  |   ToolRunner     | |
+|  |  (User inputs)   |  | (Token mgmt)     |  | (Parallel exec)  | |
+|  +--------+---------+  +--------+---------+  +--------+---------+ |
+|           |                     |                     |           |
+|           +---------------------+---------------------+           |
+|                                 |                                 |
+|                    +------------v------------+                    |
+|                    |    BreakpointManager    |                    |
+|                    |   (8-stage state track) |                    |
+|                    +------------+------------+                    |
+|                                 |                                 |
+|  +------------------+  +--------v---------+  +------------------+ |
+|  | PermissionManager|  |     EventBus     |  |   TodoManager    | |
+|  | (Approval flow)  |  | (3-channel emit) |  | (Task tracking)  | |
+|  +------------------+  +------------------+  +------------------+ |
+|                                                                   |
++----------------------------------+--------------------------------+
+                                   |
+                    +--------------+--------------+
+                    |              |              |
+           +--------v------+ +----v----+ +-------v-------+
+           |     Store     | | Sandbox | | ModelProvider |
+           | (Persistence) | | (Exec)  | | (LLM calls)   |
+           +---------------+ +---------+ +---------------+
+```
+
+### Key Classes & Interfaces
+
+| Component | Class | Description |
+|-----------|-------|-------------|
+| Agent | `Agent` | Core orchestrator for conversations and tool execution |
+| Pool | `AgentPool` | Manages multiple Agent instances with lifecycle control |
+| Room | `Room` | Multi-agent messaging and collaboration |
+| Store | `Store`, `JSONStore`, `SqliteStore`, `PostgresStore` | Persistence backends |
+| Sandbox | `LocalSandbox` | Isolated execution environment |
+| Provider | `AnthropicProvider`, `OpenAIProvider`, `GeminiProvider` | LLM API adapters |
+| Events | `EventBus` | Three-channel event distribution |
+| Hooks | `HookManager` | Pre/post execution interception |
+
+### Data Flow
+
+```
+User Message
+     |
+     v
++----+----+     +-----------+     +------------+
+| Message |---->|  Context  |---->|   Model    |
+|  Queue  |     |  Manager  |     |  Provider  |
++---------+     +-----------+     +-----+------+
+                                        |
+                              +---------+---------+
+                              |                   |
+                         Text Response      Tool Calls
+                              |                   |
+                              v                   v
+                    +---------+------+    +------+-------+
+                    |    EventBus    |    |  ToolRunner  |
+                    | (text_chunk)   |    | (parallel)   |
+                    +----------------+    +------+-------+
+                                                 |
+                              +------------------+------------------+
+                              |                  |                  |
+                         Permission         Execution          Result
+                           Check              (Sandbox)        Handling
+                              |                  |                  |
+                              v                  v                  v
+                    +--------------------+  +---------+  +------------------+
+                    | PermissionManager  |  | Sandbox |  |    EventBus      |
+                    | (Control channel)  |  | (exec)  |  | (tool:end)       |
+                    +--------------------+  +---------+  +------------------+
+```
+
+### Breakpoint State Machine
+
+The `BreakpointManager` tracks 8 states for crash recovery:
+
+```
+Agent Execution Flow:
+
+  READY -> PRE_MODEL -> STREAMING_MODEL -> TOOL_PENDING -> AWAITING_APPROVAL
+    |         |              |                 |                |
+    +-------- WAL Protected State -------------+-- Approval ----+
+                                                                |
+                        +---------------------------------------+
+                        |
+                        v
+            PRE_TOOL -> TOOL_EXECUTING -> POST_TOOL -> READY
+                |             |              |
+                +---- Tool Execution --------+
+
+On crash: Resume from last safe breakpoint, auto-seal incomplete tool calls
+```
+
+**BreakpointState Values** (from `src/core/types.ts:69`):
+- `READY` - Agent idle, waiting for input
+- `PRE_MODEL` - About to call LLM
+- `STREAMING_MODEL` - Receiving LLM response
+- `TOOL_PENDING` - Tool calls parsed, awaiting execution
+- `AWAITING_APPROVAL` - Waiting for permission decision
+- `PRE_TOOL` - About to execute tool
+- `TOOL_EXECUTING` - Tool running
+- `POST_TOOL` - Tool completed, processing result
+
+### State Persistence (WAL)
+
+```
+Every State Change
+        |
+        v
++-------+-------+
+|  Write-Ahead  |
+|     Log       |  <-- Write first (fast, append-only)
++-------+-------+
+        |
+        v
++-------+-------+
+|   Main File   |  <-- Then update (can be slow)
++-------+-------+
+        |
+        v
++-------+-------+
+|  Delete WAL   |  <-- Finally cleanup
++-------+-------+
+
+On Crash Recovery:
+1. Scan for WAL files
+2. If WAL exists but main file incomplete -> Restore from WAL
+3. Delete WAL after successful restore
+```
+
+### Three-Channel Event System
+
+```
++-------------+     +-------------+     +-------------+
+|  Progress   |     |   Control   |     |   Monitor   |
++-------------+     +-------------+     +-------------+
+| text_chunk  |     | permission  |     | state_changed|
+| tool:start  |     | _required   |     | token_usage |
+| tool:end    |     | permission  |     | tool_executed|
+| done        |     | _decided    |     | error       |
++-------------+     +-------------+     +-------------+
+      |                   |                   |
+      v                   v                   v
+   Your UI         Approval Service     Observability
+```
+
+**Usage Pattern:**
+
+```typescript
+// Progress: Real-time streaming for UI
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+}
+
+// Control: Approval workflow
+agent.on('permission_required', async (event) => {
+  await event.respond('allow');
+});
+
+// Monitor: Observability
+agent.on('token_usage', (event) => {
+  console.log('Tokens:', event.totalTokens);
+});
+```
+
+---
+
+## Runtime Characteristics
+
+### Memory Model
+
+```
+Agent Memory Footprint (Typical):
+
++---------------------------+
+|     Agent Instance        |
++---------------------------+
+| messages[]: 10KB - 2MB    |  <-- Grows with conversation
+| toolRecords: 1KB - 100KB  |  <-- Grows with tool usage
+| eventTimeline: 5KB - 500KB|  <-- Recent events cached
+| mediaCache: 0 - 10MB      |  <-- If images/files involved
+| baseObjects: ~50KB        |  <-- Fixed overhead
++---------------------------+
+
+Typical range: 100KB - 5MB per agent
+AgentPool (50 agents): 5MB - 250MB
+```
+
+### I/O Patterns
+
+```
+Per Agent Step:
+
++-------------------+     +-------------------+     +-------------------+
+| persistMessages() |     | persistToolRecs() |     | emitEvents()      |
+| ~20-50ms (SSD)    |     | ~5-10ms           |     | ~1-5ms (buffered) |
++-------------------+     +-------------------+     +-------------------+
+
+Total per step: 30-70ms I/O overhead
+
+At Scale (100 concurrent agents):
+- Sequential bottleneck in JSONStore
+- Need SqliteStore/PostgresStore for parallel writes
+```
+
+### Event Loop Impact
+
+```
+Agent Processing:
+
+   +---------+
+   |  READY  |  <-- Agent waiting for input
+   +----+----+
+        |
+   +----v----+
+   | PROCESS |  <-- Model call (async, non-blocking)
+   +----+----+
+        |
+   +----v----+
+   |  TOOL   |  <-- Tool execution (may block if sync)
+   +----+----+
+        |
+   +----v----+
+   | PERSIST |  <-- File I/O (async)
+   +----+----+
+        |
+        v
+   +---------+
+   |  READY  |
+   +---------+
+
+Key: All heavy operations are async
+Risk: Sync operations in custom tools can block event loop
+```
+
+---
+
+## Decision Framework
+
+### When to Use KODE SDK
+
+```
++------------------+
+|  Decision Tree   |
++------------------+
+         |
+         v
++------------------+
+| Single user/     |----YES---> Use directly (CLI/Desktop)
+| local machine?   |
++--------+---------+
+         | NO
+         v
++------------------+
+| < 100 concurrent |----YES---> Single server (AgentPool)
+| users?           |
++--------+---------+
+         | NO
+         v
++------------------+
+| Can run long-    |----YES---> Worker microservice pattern
+| running processes?|
++--------+---------+
+         | NO
+         v
++------------------+
+| Serverless only? |----YES---> Hybrid pattern (API + Workers)
++--------+---------+
+         | NO
+         v
++------------------+
+| Consider other   |
+| solutions        |
++------------------+
+```
+
+### Platform Compatibility Matrix
+
+| Platform | Compatible | Notes |
+|----------|------------|-------|
+| Node.js | 100% | Primary target |
+| Bun | 95% | Minor adjustments needed |
+| Deno | 80% | Permission flags required |
+| Electron | 90% | Use in main process |
+| VSCode Extension | 85% | workspace.fs integration |
+| Vercel Functions | 20% | API layer only, not agents |
+| Cloudflare Workers | 5% | Not compatible |
+| Browser | 10% | No fs/process, very limited |
+
+### Store Selection Guide
+
+| Store | Use Case | Throughput | Scaling |
+|-------|----------|------------|---------|
+| `JSONStore` | Development, CLI | Low | Single node |
+| `SqliteStore` | Desktop apps, small server | Medium | Single node |
+| `PostgresStore` | Production, multi-node | High | Multi-node |
+
+**Store Interface Hierarchy** (from `src/infra/store/types.ts`):
+
+```
+Store (base)
+  └── QueryableStore (adds query methods)
+        └── ExtendedStore (adds health check, metrics, distributed lock)
+```
+
+---
+
+## Summary
+
+### Core Principles
+
+1. **KODE SDK is a runtime kernel** - It manages agent lifecycle, not application infrastructure
+
+2. **Agents are stateful** - They need persistent storage and long-running processes
+
+3. **Scale through architecture** - Use worker patterns for large-scale deployments
+
+4. **Store is pluggable** - Implement custom Store for your infrastructure
+
+### Quick Reference
+
+| Scenario | Pattern | Store | Scale |
+|----------|---------|-------|-------|
+| CLI tool | Single Process | JSONStore | 1 user |
+| Desktop app | Single Process | SqliteStore | 1 user |
+| Internal tool | Single Server | SqliteStore/PostgresStore | ~100 users |
+| SaaS product | Worker Microservice | PostgresStore | 10K+ users |
+| Serverless app | Hybrid | External DB | Varies |
+
+---
+
+*See also: [Production Deployment](./production.md) | [Database Guide](../guides/database.md)*
diff --git a/docs/en/advanced/multi-agent.md b/docs/en/advanced/multi-agent.md
new file mode 100644
index 0000000..4b8f0df
--- /dev/null
+++ b/docs/en/advanced/multi-agent.md
@@ -0,0 +1,452 @@
+# Multi-Agent Systems
+
+This guide covers building multi-Agent systems using KODE SDK's coordination primitives: AgentPool, Room, and task_run.
+
+---
+
+## Overview
+
+| Component | Use Case |
+|-----------|----------|
+| `AgentPool` | Manage multiple Agent instances with shared dependencies |
+| `Room` | Coordinate communication between Agents with @mentions |
+| `task_run` | Delegate sub-tasks to specialized Agents |
+
+---
+
+## AgentPool
+
+Manages multiple Agent instances with lifecycle operations.
+
+### Basic Usage
+
+```typescript
+import { AgentPool } from '@shareai-lab/kode-sdk';
+
+const pool = new AgentPool({
+  dependencies: deps,
+  maxAgents: 50,  // Default: 50
+});
+
+// Create agents
+const agent1 = await pool.create('agent-1', {
+  templateId: 'researcher',
+  modelConfig: { provider: 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY! },
+});
+
+const agent2 = await pool.create('agent-2', {
+  templateId: 'coder',
+  modelConfig: { provider: 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY! },
+});
+
+// Get agent by ID
+const agent = pool.get('agent-1');
+
+// List all agents
+const agentIds = pool.list(); // ['agent-1', 'agent-2']
+
+// List with prefix filter
+const researchers = pool.list({ prefix: 'researcher-' });
+```
+
+### AgentPool API
+
+```typescript
+class AgentPool {
+  constructor(opts: AgentPoolOptions);
+
+  // Create new agent
+  async create(agentId: string, config: AgentConfig): Promise<Agent>;
+
+  // Get existing agent
+  get(agentId: string): Agent | undefined;
+
+  // List agent IDs
+  list(opts?: { prefix?: string }): string[];
+
+  // Get agent status
+  async status(agentId: string): Promise<AgentStatus | undefined>;
+
+  // Fork an agent
+  async fork(agentId: string, snapshotSel?: SnapshotId | { at?: string }): Promise<Agent>;
+
+  // Resume from storage
+  async resume(agentId: string, config: AgentConfig, opts?: {
+    autoRun?: boolean;
+    strategy?: 'crash' | 'manual';
+  }): Promise<Agent>;
+
+  // Destroy an agent
+  async destroy(agentId: string): Promise<void>;
+}
+```
+
+---
+
+## Room
+
+Coordinates communication between Agents with broadcast and directed messages.
+
+### Basic Usage
+
+```typescript
+import { AgentPool, Room } from '@shareai-lab/kode-sdk';
+
+const pool = new AgentPool({ dependencies: deps });
+const room = new Room(pool);
+
+// Create and join agents
+const alice = await pool.create('alice', config);
+const bob = await pool.create('bob', config);
+const charlie = await pool.create('charlie', config);
+
+room.join('Alice', 'alice');
+room.join('Bob', 'bob');
+room.join('Charlie', 'charlie');
+
+// Broadcast to all (except sender)
+await room.say('Alice', 'Hello everyone!');
+// Bob and Charlie receive: "[from:Alice] Hello everyone!"
+
+// Directed message with @mention
+await room.say('Alice', '@Bob What do you think about this?');
+// Only Bob receives: "[from:Alice] @Bob What do you think about this?"
+
+// Multiple mentions
+await room.say('Alice', '@Bob @Charlie Please review.');
+// Bob and Charlie both receive the message
+
+// Leave room
+room.leave('Charlie');
+
+// Get current members
+const members = room.getMembers();
+// [{ name: 'Alice', agentId: 'alice' }, { name: 'Bob', agentId: 'bob' }]
+```
+
+### Room API
+
+```typescript
+class Room {
+  constructor(pool: AgentPool);
+
+  // Join room
+  join(name: string, agentId: string): void;
+
+  // Leave room
+  leave(name: string): void;
+
+  // Send message (broadcast or directed)
+  async say(from: string, text: string): Promise<void>;
+
+  // Get members
+  getMembers(): RoomMember[];
+}
+
+interface RoomMember {
+  name: string;
+  agentId: string;
+}
+```
+
+---
+
+## task_run Tool
+
+Delegates tasks to specialized sub-Agents.
+
+### Setup
+
+```typescript
+import { createTaskRunTool, AgentTemplate } from '@shareai-lab/kode-sdk';
+
+// Define available templates
+const templates: AgentTemplate[] = [
+  {
+    id: 'researcher',
+    whenToUse: 'Research and gather information',
+    tools: ['fs_read', 'fs_glob', 'fs_grep'],
+  },
+  {
+    id: 'coder',
+    whenToUse: 'Write and modify code',
+    tools: ['fs_read', 'fs_write', 'fs_edit', 'bash_run'],
+  },
+  {
+    id: 'reviewer',
+    whenToUse: 'Review code and provide feedback',
+    tools: ['fs_read', 'fs_glob', 'fs_grep'],
+  },
+];
+
+// Create task_run tool
+const taskRunTool = createTaskRunTool(templates);
+
+// Register
+deps.toolRegistry.register('task_run', () => taskRunTool);
+```
+
+### How task_run Works
+
+When an Agent calls `task_run`:
+
+1. Agent specifies `agentTemplateId`, `prompt`, and optional `context`
+2. SDK creates a sub-Agent with the specified template
+3. Sub-Agent processes the task
+4. Result returns to parent Agent
+
+**Tool Parameters:**
+
+```typescript
+interface TaskRunParams {
+  description: string;      // Short task description (3-5 words)
+  prompt: string;           // Detailed instructions
+  agentTemplateId: string;  // Template ID to use
+  context?: string;         // Additional context
+}
+```
+
+**Tool Result:**
+
+```typescript
+interface TaskRunResult {
+  status: 'ok' | 'paused';
+  template: string;
+  text?: string;
+  permissionIds?: string[];
+}
+```
+
+### Sub-Agent Configuration
+
+Configure sub-agent behavior in template:
+
+```typescript
+const template: AgentTemplateDefinition = {
+  id: 'coordinator',
+  systemPrompt: 'You coordinate tasks between specialists...',
+  tools: ['task_run', 'fs_read'],
+  runtime: {
+    subagents: {
+      depth: 2,           // Max nesting depth
+      templates: ['researcher', 'coder'],  // Allowed templates
+      inheritConfig: true,
+      overrides: {
+        permission: { mode: 'auto' },
+      },
+    },
+  },
+};
+```
+
+---
+
+## Patterns
+
+### Coordinator Pattern
+
+One Agent coordinates multiple specialists.
+
+```typescript
+// Coordinator template
+const coordinatorTemplate: AgentTemplateDefinition = {
+  id: 'coordinator',
+  systemPrompt: `You are a project coordinator. Break down complex tasks and delegate to specialists:
+- Use 'researcher' for information gathering
+- Use 'coder' for implementation
+- Use 'reviewer' for code review
+
+Coordinate the work and synthesize results.`,
+  tools: ['task_run', 'fs_read', 'fs_write'],
+  runtime: {
+    subagents: {
+      depth: 1,
+      templates: ['researcher', 'coder', 'reviewer'],
+    },
+  },
+};
+
+// Usage
+const coordinator = await Agent.create({
+  templateId: 'coordinator',
+  ...
+}, deps);
+
+await coordinator.send('Implement a user authentication system');
+// Coordinator will delegate:
+// 1. researcher: "Research auth best practices"
+// 2. coder: "Implement auth module"
+// 3. reviewer: "Review auth implementation"
+```
+
+### Pipeline Pattern
+
+Chain Agents in sequence.
+
+```typescript
+async function pipeline(input: string) {
+  // Step 1: Research
+  const researcher = await pool.create('researcher-1', {
+    templateId: 'researcher',
+    ...
+  });
+  const research = await researcher.send(`Research: ${input}`);
+
+  // Step 2: Implement
+  const coder = await pool.create('coder-1', {
+    templateId: 'coder',
+    ...
+  });
+  const implementation = await coder.send(`
+    Based on this research:
+    ${research}
+
+    Implement the solution.
+  `);
+
+  // Step 3: Review
+  const reviewer = await pool.create('reviewer-1', {
+    templateId: 'reviewer',
+    ...
+  });
+  const review = await reviewer.send(`
+    Review this implementation:
+    ${implementation}
+  `);
+
+  return { research, implementation, review };
+}
+```
+
+### Debate Pattern
+
+Multiple Agents discuss a topic.
+
+```typescript
+const room = new Room(pool);
+
+// Create debaters
+const alice = await pool.create('alice', {
+  templateId: 'debater',
+  metadata: { position: 'pro' },
+  ...
+});
+const bob = await pool.create('bob', {
+  templateId: 'debater',
+  metadata: { position: 'con' },
+  ...
+});
+
+room.join('Alice', 'alice');
+room.join('Bob', 'bob');
+
+// Start debate
+await room.say('Moderator', 'Topic: Should we use microservices?');
+
+// Continue debate rounds
+for (let round = 0; round < 3; round++) {
+  await room.say('Alice', `@Bob [Round ${round + 1}] Here's my argument...`);
+  await room.say('Bob', `@Alice [Round ${round + 1}] My counterargument...`);
+}
+```
+
+---
+
+## Best Practices
+
+### 1. Limit Depth
+
+Prevent infinite sub-agent chains:
+
+```typescript
+runtime: {
+  subagents: {
+    depth: 2,  // Maximum nesting depth
+  },
+}
+```
+
+### 2. Clear Templates
+
+Each template should have clear responsibilities:
+
+```typescript
+const templates: AgentTemplate[] = [
+  {
+    id: 'data-analyst',
+    whenToUse: 'Analyze data patterns and generate insights',
+    tools: ['fs_read', 'fs_glob'],
+  },
+  // Avoid overlapping responsibilities
+];
+```
+
+### 3. Resource Management
+
+Clean up agents when done:
+
+```typescript
+try {
+  const agent = await pool.create('temp-agent', config);
+  const result = await agent.send(message);
+  return result;
+} finally {
+  await pool.destroy('temp-agent');
+}
+```
+
+### 4. Permission Inheritance
+
+Consider permission settings for sub-agents:
+
+```typescript
+runtime: {
+  subagents: {
+    inheritConfig: true,
+    overrides: {
+      permission: { mode: 'approval' },  // Require approval
+    },
+  },
+}
+```
+
+---
+
+## Monitoring Multi-Agent Systems
+
+### Track Sub-Agent Events
+
+```typescript
+agent.on('tool_executed', (event) => {
+  if (event.call.name === 'task_run') {
+    console.log('Sub-agent completed:', {
+      template: event.call.result?.template,
+      status: event.call.result?.status,
+    });
+  }
+});
+```
+
+### Aggregate Metrics
+
+```typescript
+const allAgentIds = pool.list();
+const stats = await Promise.all(
+  allAgentIds.map(async (id) => {
+    const status = await pool.status(id);
+    return { id, ...status };
+  })
+);
+
+console.log('Total agents:', stats.length);
+console.log('Working:', stats.filter(s => s.state === 'WORKING').length);
+console.log('Paused:', stats.filter(s => s.state === 'PAUSED').length);
+```
+
+---
+
+## References
+
+- [API Reference](../reference/api.md)
+- [Events Guide](../guides/events.md)
+- [Production Deployment](./production.md)
diff --git a/docs/en/advanced/production.md b/docs/en/advanced/production.md
new file mode 100644
index 0000000..6b5b5b1
--- /dev/null
+++ b/docs/en/advanced/production.md
@@ -0,0 +1,702 @@
+# Production Deployment
+
+This guide covers production configuration, monitoring, and best practices for KODE SDK.
+
+---
+
+## Database Selection
+
+### Development vs Production
+
+| Store | Use Case | Features |
+|-------|----------|----------|
+| `JSONStore` | Development, single machine | Simple file-based storage |
+| `SqliteStore` | Development, medium scale | QueryableStore + ExtendedStore |
+| `PostgresStore` | Production, multi-worker | Full ExtendedStore, distributed locks |
+
+### PostgreSQL Configuration
+
+```typescript
+import { createStore } from '@shareai-lab/kode-sdk';
+
+const store = await createStore({
+  type: 'postgres',
+  connection: {
+    host: process.env.PG_HOST!,
+    port: 5432,
+    database: 'kode_agents',
+    user: process.env.PG_USER!,
+    password: process.env.PG_PASSWORD!,
+    ssl: { rejectUnauthorized: true },
+
+    // Connection pool settings
+    max: 20,                       // Pool size
+    idleTimeoutMillis: 30000,      // Idle connection timeout
+    connectionTimeoutMillis: 5000, // Connection timeout
+  },
+  fileStoreBaseDir: '/data/kode-files',
+});
+```
+
+---
+
+## Health Checks
+
+ExtendedStore provides built-in health check capabilities.
+
+### Health Check API
+
+```typescript
+const health = await store.healthCheck();
+
+// Response:
+// {
+//   healthy: true,
+//   database: { connected: true, latencyMs: 5 },
+//   fileSystem: { writable: true },
+//   checkedAt: 1706000000000
+// }
+```
+
+### HTTP Health Endpoint
+
+```typescript
+import express from 'express';
+
+const app = express();
+
+app.get('/health', async (req, res) => {
+  const status = await store.healthCheck();
+  res.status(status.healthy ? 200 : 503).json(status);
+});
+
+// Kubernetes readiness probe
+app.get('/ready', async (req, res) => {
+  const status = await store.healthCheck();
+  res.status(status.healthy ? 200 : 503).send();
+});
+```
+
+### Data Consistency Check
+
+```typescript
+const consistency = await store.checkConsistency(agentId);
+
+if (!consistency.consistent) {
+  console.error('Consistency issues:', consistency.issues);
+}
+```
+
+---
+
+## Metrics & Monitoring
+
+### Store Metrics
+
+```typescript
+const metrics = await store.getMetrics();
+
+// {
+//   operations: { saves: 1234, loads: 5678, queries: 910, deletes: 11 },
+//   performance: { avgLatencyMs: 15.5, maxLatencyMs: 250, minLatencyMs: 2 },
+//   storage: { totalAgents: 100, totalMessages: 50000, dbSizeBytes: 104857600 },
+//   collectedAt: 1706000000000
+// }
+```
+
+### Prometheus Integration
+
+```typescript
+import { register, Gauge, Histogram } from 'prom-client';
+
+const agentCount = new Gauge({ name: 'kode_agents_total', help: 'Total agents' });
+const toolLatency = new Histogram({
+  name: 'kode_tool_duration_seconds',
+  help: 'Tool execution duration',
+  buckets: [0.1, 0.5, 1, 2, 5, 10],
+});
+
+agent.on('tool_executed', (event) => {
+  if (event.call.durationMs) {
+    toolLatency.observe(event.call.durationMs / 1000);
+  }
+});
+
+app.get('/metrics', async (req, res) => {
+  res.set('Content-Type', register.contentType);
+  res.send(await register.metrics());
+});
+```
+
+---
+
+## Retry Strategy
+
+### Built-in Retry Configuration
+
+```typescript
+import { withRetry, DEFAULT_RETRY_CONFIG } from '@shareai-lab/kode-sdk/provider';
+
+// Default: { maxRetries: 3, baseDelayMs: 1000, maxDelayMs: 60000, jitterFactor: 0.2 }
+
+const result = await withRetry(
+  () => callExternalAPI(),
+  { maxRetries: 5, baseDelayMs: 500, provider: 'myservice' },
+  (error, attempt, delay) => console.log(`Retry ${attempt} after ${delay}ms`)
+);
+```
+
+### Retryable Errors
+
+| Error Type | Retryable | Description |
+|------------|-----------|-------------|
+| `RateLimitError` | Yes | Respects `retry-after` header |
+| `TimeoutError` | Yes | Request timeout |
+| `ServiceUnavailableError` | Yes | 5xx server errors |
+| `AuthenticationError` | No | Invalid credentials |
+| `QuotaExceededError` | No | Billing limit reached |
+
+---
+
+## Distributed Locking
+
+### Using Agent Locks
+
+```typescript
+const release = await store.acquireAgentLock(agentId, 30000);
+
+try {
+  const agent = await Agent.resumeFromStore(agentId, deps);
+  await agent.send('Process this task');
+} finally {
+  await release();
+}
+```
+
+- **SQLite**: In-memory lock (single process only)
+- **PostgreSQL**: Database-level advisory lock (multi-worker safe)
+
+---
+
+## Graceful Shutdown
+
+```typescript
+async function gracefulShutdown() {
+  // 1. Stop accepting new requests
+  server.close();
+
+  // 2. Interrupt running agents
+  for (const agentId of pool.list()) {
+    const agent = pool.get(agentId);
+    if (agent) await agent.interrupt();
+  }
+
+  // 3. Close database connections
+  await store.close();
+
+  process.exit(0);
+}
+
+process.on('SIGTERM', gracefulShutdown);
+process.on('SIGINT', gracefulShutdown);
+```
+
+---
+
+## Logging & Cost Management
+
+### Logger Interface
+
+```typescript
+const config: DebugConfig = {
+  verbose: false,
+  logTokenUsage: true,
+  logCache: true,
+  logRetries: true,
+  redactSensitive: true,
+};
+```
+
+### Cost Limiting
+
+```typescript
+let sessionCost = 0;
+const COST_LIMIT = 10.0;
+
+agent.on('token_usage', (event) => {
+  const cost = (event.inputTokens * 0.003 + event.outputTokens * 0.015) / 1000;
+  sessionCost += cost;
+
+  if (sessionCost > COST_LIMIT) {
+    agent.interrupt();
+  }
+});
+```
+
+---
+
+## Security Best Practices
+
+```typescript
+// Permission configuration
+const agent = await Agent.create({
+  templateId: 'secure-assistant',
+  overrides: {
+    permission: {
+      mode: 'approval',
+      requireApprovalTools: ['bash_run', 'fs_write'],
+      allowTools: ['fs_read', 'fs_glob'],
+    },
+  },
+}, deps);
+
+// Sandbox boundary
+const sandbox = new LocalSandbox({
+  workDir: '/app/workspace',
+  enforceBoundary: true,
+  allowPaths: ['/app/workspace', '/tmp'],
+});
+```
+
+---
+
+## Deployment Checklist
+
+- [ ] Use PostgreSQL for production
+- [ ] Configure connection pooling
+- [ ] Set up health check endpoints
+- [ ] Configure metrics collection
+- [ ] Implement graceful shutdown
+- [ ] Use environment variables for secrets
+- [ ] Enable SSL for database connections
+- [ ] Set sandbox boundaries
+
+---
+
+## Deployment Patterns
+
+### Decision Tree
+
+```
++------------------+
+|  Decision Tree   |
++------------------+
+         |
+         v
++----------------------+
+| Single user/         |----YES---> Pattern 1: Single Process
+| local machine?       |
++--------+-------------+
+         | NO
+         v
++----------------------+
+| < 100 concurrent     |----YES---> Pattern 2: Single Server
+| users?               |
++--------+-------------+
+         | NO
+         v
++----------------------+
+| Can run long-running |----YES---> Pattern 3: Worker Microservice
+| processes?           |
++--------+-------------+
+         | NO
+         v
++----------------------+
+| Serverless only?     |----YES---> Pattern 4: Hybrid (API + Workers)
++--------+-------------+
+```
+
+### Pattern 1: Single Process (CLI/Desktop)
+
+**Best for:** CLI tools, Electron apps, VSCode extensions
+
+```
+┌─────────────────────────────┐
+│         Your App            │
+│  ┌───────────────────────┐  │
+│  │      KODE SDK         │  │
+│  │  ┌─────────────────┐  │  │
+│  │  │   AgentPool     │  │  │
+│  │  │   + JSONStore   │  │  │
+│  │  └────────┬────────┘  │  │
+│  └───────────┼───────────┘  │
+└──────────────┼──────────────┘
+               │
+        ┌──────▼──────┐
+        │ Local Files │
+        └─────────────┘
+```
+
+```typescript
+import { Agent, AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
+import * as path from 'path';
+import * as os from 'os';
+
+const store = new JSONStore(path.join(os.homedir(), '.my-agent'));
+const pool = new AgentPool({ dependencies: { store, templateRegistry, sandboxFactory, toolRegistry } });
+
+// Resume or create
+const agent = pool.get('main') ?? await pool.create('main', { templateId: 'cli-assistant' });
+
+// Interactive loop
+for await (const line of readline) {
+  await agent.send(line);
+  for await (const env of agent.subscribe(['progress'])) {
+    if (env.event.type === 'text_chunk') process.stdout.write(env.event.delta);
+    if (env.event.type === 'done') break;
+  }
+}
+```
+
+### Pattern 2: Single Server
+
+**Best for:** Internal tools, small teams, prototypes (<100 concurrent users)
+
+```
+┌──────────────────────────────────────────┐
+│               Node.js Server             │
+│  ┌────────────────────────────────────┐  │
+│  │          Express/Hono              │  │
+│  │  /api/agents/:id/message (POST)    │  │
+│  │  /api/agents/:id/events  (SSE)     │  │
+│  └──────────────────┬─────────────────┘  │
+│                     │                    │
+│  ┌──────────────────▼─────────────────┐  │
+│  │          AgentPool (50)            │  │
+│  │   SqliteStore / PostgresStore      │  │
+│  └──────────────────┬─────────────────┘  │
+└─────────────────────┼────────────────────┘
+                      │
+               ┌──────▼──────┐
+               │  Database   │
+               └─────────────┘
+```
+
+```typescript
+import { Hono } from 'hono';
+import { streamSSE } from 'hono/streaming';
+import { AgentPool, SqliteStore } from '@shareai-lab/kode-sdk';
+
+const app = new Hono();
+const store = new SqliteStore('./agents.db', './data');
+const pool = new AgentPool({ dependencies: { store, ... }, maxAgents: 50 });
+
+app.post('/api/agents/:id/message', async (c) => {
+  const { id } = c.req.param();
+  const { message } = await c.req.json();
+
+  let agent = pool.get(id);
+  if (!agent) {
+    const exists = await store.exists(id);
+    agent = exists
+      ? await pool.resume(id, getConfig())
+      : await pool.create(id, getConfig());
+  }
+
+  const result = await agent.complete(message);
+  return c.json(result);
+});
+
+app.get('/api/agents/:id/events', async (c) => {
+  const { id } = c.req.param();
+  const agent = pool.get(id);
+  if (!agent) return c.json({ error: 'Agent not found' }, 404);
+
+  return streamSSE(c, async (stream) => {
+    for await (const env of agent.subscribe(['progress'])) {
+      await stream.writeSSE({ data: JSON.stringify(env.event) });
+      if (env.event.type === 'done') break;
+    }
+  });
+});
+```
+
+### Pattern 3: Worker Microservice
+
+**Best for:** Production SaaS, 1000+ concurrent users
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        Load Balancer                            │
+└────────────────────────────┬────────────────────────────────────┘
+                             │
+         ┌───────────────────┼───────────────────┐
+         │                   │                   │
+┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
+│   API Server 1  │ │   API Server 2  │ │   API Server N  │
+│   (Stateless)   │ │   (Stateless)   │ │   (Stateless)   │
+└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
+         │                   │                   │
+         └───────────────────┼───────────────────┘
+                             │
+                    ┌────────▼────────┐
+                    │   Job Queue     │
+                    │   (BullMQ)      │
+                    └────────┬────────┘
+                             │
+         ┌───────────────────┼───────────────────┐
+         │                   │                   │
+┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
+│   Worker 1      │ │   Worker 2      │ │   Worker N      │
+│  AgentPool(50)  │ │  AgentPool(50)  │ │  AgentPool(50)  │
+└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
+         │                   │                   │
+         └───────────────────┼───────────────────┘
+                             │
+              ┌──────────────┼──────────────┐
+              │              │              │
+       ┌──────▼──────┐ ┌─────▼─────┐ ┌─────▼─────┐
+       │  PostgreSQL │ │   Redis   │ │    S3     │
+       │   (Store)   │ │  (Cache)  │ │  (Files)  │
+       └─────────────┘ └───────────┘ └───────────┘
+```
+
+**API Server (Stateless):**
+
+```typescript
+// api/routes/agent.ts
+import { Queue } from 'bullmq';
+
+const queue = new Queue('agent-tasks', { connection: redis });
+
+app.post('/api/agents/:id/message', async (c) => {
+  const { id } = c.req.param();
+  const { message } = await c.req.json();
+
+  const job = await queue.add('process-message', {
+    agentId: id,
+    message,
+    userId: c.get('userId'),
+  });
+
+  return c.json({ jobId: job.id, status: 'queued' });
+});
+
+app.get('/api/agents/:id/events', async (c) => {
+  const { id } = c.req.param();
+
+  return streamSSE(c, async (stream) => {
+    const sub = redis.duplicate();
+    await sub.subscribe(`agent:${id}:events`);
+
+    sub.on('message', (channel, message) => {
+      stream.writeSSE({ data: message });
+    });
+  });
+});
+```
+
+**Worker Process:**
+
+```typescript
+// worker/index.ts
+import { Worker } from 'bullmq';
+import { AgentPool, PostgresStore } from '@shareai-lab/kode-sdk';
+
+const store = new PostgresStore(pgConfig, './data');
+const pool = new AgentPool({ dependencies: { store, ... }, maxAgents: 50 });
+
+const worker = new Worker('agent-tasks', async (job) => {
+  const { agentId, message } = job.data;
+
+  // Acquire distributed lock
+  const release = await store.acquireAgentLock(agentId);
+
+  try {
+    let agent = pool.get(agentId);
+    if (!agent) {
+      const exists = await store.exists(agentId);
+      agent = exists
+        ? await pool.resume(agentId, getConfig(job.data))
+        : await pool.create(agentId, getConfig(job.data));
+    }
+
+    await agent.send(message);
+
+    // Stream events to Redis Pub/Sub
+    for await (const env of agent.subscribe(['progress'])) {
+      await redis.publish(`agent:${agentId}:events`, JSON.stringify(env.event));
+      if (env.event.type === 'done') break;
+    }
+  } finally {
+    await release();
+  }
+}, { connection: redis });
+
+// Periodic cleanup: hibernate idle agents
+setInterval(async () => {
+  for (const agentId of pool.list()) {
+    const agent = pool.get(agentId);
+    if (agent && agent.idleTime > 60_000) {
+      await agent.persistInfo();
+      pool.delete(agentId);
+    }
+  }
+}, 30_000);
+```
+
+### Pattern 4: Hybrid Serverless
+
+**Best for:** Serverless frontend + stateful backend
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│                    Vercel / Cloudflare                       │
+│  ┌────────────────────────────────────────────────────────┐  │
+│  │  /api/chat       --> Validate, enqueue, return task ID │  │
+│  │  /api/status     --> Check task status from DB         │  │
+│  │  /api/stream     --> SSE from Redis Pub/Sub            │  │
+│  └──────────────────────────┬─────────────────────────────┘  │
+└─────────────────────────────┼────────────────────────────────┘
+                              │
+                     ┌────────▼────────┐
+                     │  Message Queue  │
+                     │  (Upstash Redis)│
+                     └────────┬────────┘
+                              │
+┌─────────────────────────────▼────────────────────────────────┐
+│                    Railway / Render / Fly.io                 │
+│  ┌────────────────────────────────────────────────────────┐  │
+│  │              Worker Pool (KODE SDK)                    │  │
+│  │              Long-running processes                    │  │
+│  └────────────────────────────────────────────────────────┘  │
+└──────────────────────────────────────────────────────────────┘
+```
+
+**Serverless API (Vercel):**
+
+```typescript
+// app/api/agent/[id]/route.ts
+export async function POST(req: Request, { params }: { params: { id: string } }) {
+  const { message } = await req.json();
+  const agentId = params.id;
+
+  // Enqueue for worker processing
+  await inngest.send('agent/process', { agentId, message });
+
+  return Response.json({ status: 'processing', agentId });
+}
+```
+
+**Inngest Worker Function:**
+
+```typescript
+// inngest/functions/agent-process.ts
+import { inngest } from '@/lib/inngest';
+import { Agent, PostgresStore } from '@shareai-lab/kode-sdk';
+
+export const agentProcess = inngest.createFunction(
+  { id: 'agent-process' },
+  { event: 'agent/process' },
+  async ({ event, step }) => {
+    const { agentId, message } = event.data;
+
+    const result = await step.run('process', async () => {
+      const store = new PostgresStore(pgConfig, '/tmp/data');
+      const deps = { store, templateRegistry, toolRegistry, sandboxFactory };
+      const exists = await store.exists(agentId);
+      const agent = exists
+        ? await Agent.resume(agentId, config, deps)
+        : await Agent.create({ ...config, agentId }, deps);
+
+      return agent.complete(message);
+    });
+
+    await step.run('notify', async () => {
+      await notifyUser(agentId, result);
+    });
+
+    return result;
+  }
+);
+```
+
+---
+
+## Scaling Strategies
+
+### Strategy 1: Vertical Scaling
+
+**Applicable:** Up to ~100 concurrent agents per process
+
+```typescript
+const pool = new AgentPool({
+  maxAgents: 100,  // Increase from default 50
+  store: new SqliteStore('./agents.db', './data'),
+});
+```
+
+Optimizations:
+- Increase `maxAgents` in AgentPool
+- Use SqliteStore/PostgresStore (faster than JSONStore)
+- Add memory (agents are memory-bound)
+- Use SSD for persistence
+
+### Strategy 2: Agent Sharding
+
+**Applicable:** 100-1000 concurrent agents
+
+```
+                    agentId: "user-123-agent-456"
+                              |
+                              v
+                    hash(agentId) % N = worker_index
+                              |
+              +---------------+---------------+
+              |               |               |
+         Worker 0        Worker 1        Worker 2
+        (agents 0-33)   (agents 34-66)  (agents 67-99)
+```
+
+Use consistent hashing to route agents to specific workers.
+
+### Strategy 3: LRU Scheduling
+
+**Applicable:** 1000+ total agents, limited active at once
+
+```typescript
+class AgentScheduler {
+  private active: LRUCache<string, Agent>;
+  private store: Store;
+
+  async get(agentId: string): Promise<Agent> {
+    if (this.active.has(agentId)) {
+      return this.active.get(agentId)!;
+    }
+
+    // Resume from storage
+    const agent = await Agent.resume(agentId, config, deps);
+    this.active.set(agentId, agent);  // LRU eviction handles hibernation
+
+    return agent;
+  }
+}
+```
+
+---
+
+## Capacity Planning
+
+| Deployment | Agents/Process | Memory/Agent | Concurrent Users |
+|------------|----------------|--------------|------------------|
+| CLI | 1 | 10-100 MB | 1 |
+| Desktop | 5-10 | 50-200 MB | 1 |
+| Single Server | 50 | 2-10 MB | 50-100 |
+| Worker Cluster (10 nodes) | 500 | 2-10 MB | 500-1000 |
+| Worker Cluster (50 nodes) | 2500 | 2-10 MB | 2500-5000 |
+
+**Memory Estimation per Agent:**
+- Base object: ~50 KB
+- Message history (100 messages): ~500 KB - 5 MB
+- Tool records: ~50-500 KB
+- Event timeline: ~100 KB - 1 MB
+- **Typical total: 1-10 MB**
+
+---
+
+## References
+
+- [Architecture Guide](./architecture.md)
+- [Database Guide](../guides/database.md)
+- [Error Handling](../guides/error-handling.md)
+- [Events Guide](../guides/events.md)
diff --git a/docs/en/examples/playbooks.md b/docs/en/examples/playbooks.md
new file mode 100644
index 0000000..6f46117
--- /dev/null
+++ b/docs/en/examples/playbooks.md
@@ -0,0 +1,676 @@
+# Playbooks: Common Scenario Scripts
+
+This page breaks down the most common usage scenarios from a practical perspective, providing mental maps, key APIs, example files, and considerations. Example code is in the `examples/` directory and can be run directly with `ts-node`.
+
+---
+
+## 1. Collaborative Inbox (Event-Driven UI)
+
+- **Goal**: Persistent single Agent, UI displays text/tool progress via Progress stream, Monitor for lightweight alerts.
+- **Example**: `examples/01-agent-inbox.ts`
+- **Run**: `npm run example:agent-inbox`
+- **Key Steps**:
+  1. `Agent.create` + `agent.subscribe(['progress'])` pushes text increments.
+  2. Use `bookmark` / `cursor` for checkpoint replay.
+  3. `agent.on('tool_executed')` / `agent.on('error')` writes governance events to logs or monitoring.
+  4. `agent.todoManager` for auto-reminders, UI can display Todo panel.
+- **Considerations**:
+  - Expose Progress stream to frontend via SSE/WebSocket.
+  - Enable `exposeThinking` in template metadata if UI needs thinking process.
+
+```typescript
+// Basic event subscription
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+  if (envelope.event.type === 'done') {
+    break;
+  }
+}
+```
+
+---
+
+## 2. Tool Approval & Governance
+
+- **Goal**: Approval for sensitive tools (e.g., `bash_run`, database writes); combine with Hooks for policy guards.
+- **Example**: `examples/02-approval-control.ts`
+- **Run**: `npm run example:approval`
+- **Key Steps**:
+  1. Configure `permission` in template (e.g., `mode: 'approval'` + `requireApprovalTools`).
+  2. Subscribe to `agent.on('permission_required')`, push approval tasks to business system.
+  3. Approval UI calls `agent.decide(id, 'allow' | 'deny', note)`.
+  4. Combine with `HookManager`'s `preToolUse` / `postToolUse` for finer-grained policies (path guards, result truncation).
+- **Considerations**:
+  - Agent is at `AWAITING_APPROVAL` breakpoint during approval; SDK auto-resumes after decision.
+  - Denying a tool automatically writes `tool_result`, UI can prompt retry strategies.
+
+```typescript
+// Permission configuration
+const template = {
+  id: 'secure-runner',
+  permission: {
+    mode: 'approval',
+    requireApprovalTools: ['bash_run'],
+  },
+  // Hook for additional guards
+  hooks: {
+    preToolUse(call) {
+      if (call.name === 'bash_run' && /rm -rf|sudo/.test(call.args.cmd)) {
+        return { decision: 'deny', reason: 'Command matches forbidden keywords' };
+      }
+    },
+  },
+};
+
+// Approval handling
+agent.on('permission_required', async (event) => {
+  const decision = await getApprovalFromAdmin(event.call);
+  await event.respond(decision, { note: 'Approved by admin' });
+});
+```
+
+---
+
+## 3. Multi-Agent Team Collaboration
+
+- **Goal**: One Planner coordinates multiple Specialists, all Agents persistent and forkable.
+- **Example**: `examples/03-room-collab.ts`
+- **Run**: `npm run example:room`
+- **Key Steps**:
+  1. Use singleton `AgentPool` to manage Agent lifecycle (`create` / `resume` / `fork`).
+  2. Use `Room` for broadcast/mention messages; messages use `[from:name]` pattern for collaboration.
+  3. Sub-Agents launched via `task_run` tool or explicit `pool.create`.
+  4. Use `agent.snapshot()` + `agent.fork()` to fork at Safe-Fork-Points.
+- **Considerations**:
+  - Template's `runtime.subagents` can limit dispatchable templates and depth.
+  - Persist lineage (SDK writes to metadata by default) for audit and replay.
+  - Disable `watchFiles` in template if not monitoring external files.
+
+```typescript
+const pool = new AgentPool({ dependencies: deps, maxAgents: 10 });
+const room = new Room(pool);
+
+const planner = await pool.create('agt-planner', { templateId: 'planner', ... });
+const dev = await pool.create('agt-dev', { templateId: 'executor', ... });
+
+room.join('planner', planner.agentId);
+room.join('dev', dev.agentId);
+
+// Broadcast to room
+await room.say('planner', 'Hi team, let us audit the repository. @dev please execute.');
+await room.say('dev', 'Acknowledged, working on it.');
+```
+
+---
+
+## 4. Scheduling & System Reminders
+
+- **Goal**: Agent executes periodic tasks, monitors file changes, sends system reminders during long-running operations.
+- **Example**: `examples/04-scheduler-watch.ts`
+- **Run**: `npm run example:scheduler`
+- **Key Steps**:
+  1. `const scheduler = agent.schedule(); scheduler.everySteps(N, callback)` registers step triggers.
+  2. Use `agent.remind(text, options)` for system-level reminders (via Monitor, doesn't pollute Progress).
+  3. FilePool monitors written files by default, combine `monitor.file_changed` with `scheduler.notifyExternalTrigger` for auto-response.
+  4. Todo with `remindIntervalSteps` for periodic reviews.
+- **Considerations**:
+  - Keep scheduled tasks idempotent, follow event-driven principles.
+  - For high-frequency tasks, combine with external Cron and call `scheduler.notifyExternalTrigger`.
+
+---
+
+## 5. Database Persistence
+
+- **Goal**: Persist Agent state to SQLite or PostgreSQL for production deployments.
+- **Example**: `examples/db-sqlite.ts`, `examples/db-postgres.ts`
+- **Key Steps**:
+  1. Use `createExtendedStore` factory function to create store.
+  2. Pass store to Agent dependencies.
+  3. Use Query APIs for session management and analytics.
+
+```typescript
+import { createExtendedStore, SqliteStore } from '@shareai-lab/kode-sdk';
+
+// Create SQLite store
+const store = createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/files',
+}) as SqliteStore;
+
+// Use with Agent
+const agent = await Agent.create(
+  { templateId: 'my-agent', ... },
+  { store, ... }
+);
+
+// Query APIs
+const sessions = await store.querySessions({ limit: 10 });
+const stats = await store.aggregateStats(agent.agentId);
+```
+
+---
+
+## 6. Combined: Approval + Collaboration + Scheduling
+
+- **Scenario**: Code review bot, Planner splits tasks and assigns to Specialists, tool operations need approval, scheduled reminders ensure SLA.
+- **Implementation**:
+  1. **Planner template**: Has `task_run` tool and scheduling hooks, auto-patrol each morning.
+  2. **Specialist template**: Focuses on `fs_*` + `todo_*` tools, approval only for `bash_run`.
+  3. **Unified approval service**: Listens to all Agent Control events, integrates with enterprise IM/approval workflow.
+  4. **Room collaboration**: Planner delivers tasks via `@executor`, executor reports back via `@planner`.
+  5. **SLA monitoring**: Monitor events feed into observability pipeline (Prometheus/ELK/Datadog).
+  6. **Scheduled reminders**: Use Scheduler to periodically check todos or external system signals.
+
+---
+
+## Quick API Reference
+
+| Category | API |
+|----------|-----|
+| Events | `agent.subscribe(['progress'])`, `agent.on('error', handler)`, `agent.on('tool_executed', handler)` |
+| Approval | `permission_required` → `event.respond()` / `agent.decide()` |
+| Multi-Agent | `new AgentPool({ dependencies, maxAgents })`, `const room = new Room(pool)` |
+| Fork | `const snapshot = await agent.snapshot(); const fork = await agent.fork(snapshot);` |
+| Scheduling | `agent.schedule().everySteps(10, ...)`, `scheduler.notifyExternalTrigger(...)` |
+| Todo | `agent.getTodos()` / `agent.setTodos()` / `todo_read` / `todo_write` |
+| Database | `createExtendedStore({ type: 'sqlite', ... })`, `store.querySessions()` |
+
+---
+
+## References
+
+- [Getting Started](../getting-started/quickstart.md)
+- [Events Guide](../guides/events.md)
+- [Multi-Agent Systems](../advanced/multi-agent.md)
+- [Database Guide](../guides/database.md)
+
+---
+
+## 7. CLI Agent Application
+
+Build command-line AI assistants like Claude Code or Cursor.
+
+### Minimal CLI Agent
+
+```typescript
+// cli-agent.ts
+import { Agent, AnthropicProvider, JSONStore, LocalSandbox } from '@shareai-lab/kode-sdk';
+import * as readline from 'readline';
+
+async function main() {
+  const store = new JSONStore('./.cli-agent');
+  const provider = new AnthropicProvider(process.env.ANTHROPIC_API_KEY!);
+  const sandbox = new LocalSandbox({ workDir: process.cwd() });
+
+  const agent = await Agent.create({
+    templateId: 'cli-assistant',
+    model: provider,
+    sandbox: { kind: 'local', workDir: process.cwd() },
+  }, {
+    store,
+    templateRegistry,
+    sandboxFactory,
+    toolRegistry,
+  });
+
+  // Stream output to terminal using subscribe
+  (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      if (envelope.event.type === 'text_chunk') {
+        process.stdout.write(envelope.event.delta);
+      }
+      if (envelope.event.type === 'tool:start') {
+        console.log(`\n[Running: ${envelope.event.call.name}]`);
+      }
+      if (envelope.event.type === 'done') {
+        break;
+      }
+    }
+  })();
+
+  // Interactive loop
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+
+  console.log('CLI Agent ready. Type your message (Ctrl+C to exit)\n');
+
+  const askQuestion = () => {
+    rl.question('You: ', async (input) => {
+      if (input.trim()) {
+        console.log('\nAssistant: ');
+        await agent.complete(input);  // complete() handles send + wait
+        console.log('\n');
+      }
+      askQuestion();
+    });
+  };
+
+  askQuestion();
+}
+
+main().catch(console.error);
+```
+
+### Production CLI with Session Management
+
+```typescript
+// production-cli.ts
+import { Agent, AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
+import * as path from 'path';
+import * as os from 'os';
+import * as readline from 'readline';
+import { program } from 'commander';
+
+const DATA_DIR = path.join(os.homedir(), '.my-cli-agent');
+const store = new JSONStore(DATA_DIR);
+
+async function createDependencies() {
+  return {
+    store,
+    templateRegistry: /* ... */,
+    sandboxFactory: /* ... */,
+    toolRegistry: /* ... */,
+  };
+}
+
+async function main() {
+  program
+    .option('-s, --session <id>', 'Session ID to resume', 'default')
+    .option('-n, --new', 'Start new session (ignore existing)')
+    .option('-l, --list', 'List all sessions')
+    .parse();
+
+  const opts = program.opts();
+  const deps = await createDependencies();
+
+  // List sessions
+  if (opts.list) {
+    const sessions = await store.list();
+    console.log('Available sessions:');
+    sessions.forEach(s => console.log(`  - ${s}`));
+    return;
+  }
+
+  const pool = new AgentPool({ dependencies: deps, maxAgents: 5 });
+  const sessionId = opts.session;
+
+  // Resume or create agent
+  let agent: Agent;
+  const exists = await store.exists(sessionId);
+
+  if (exists && !opts.new) {
+    console.log(`Resuming session: ${sessionId}`);
+    agent = await pool.resume(sessionId, { templateId: 'cli-assistant' });
+  } else {
+    console.log(`Starting new session: ${sessionId}`);
+    agent = await pool.create(sessionId, { templateId: 'cli-assistant' });
+  }
+
+  // Event handlers
+  for await (const envelope of agent.subscribe(['progress'])) {
+    switch (envelope.event.type) {
+      case 'text_chunk':
+        process.stdout.write(envelope.event.delta);
+        break;
+      case 'tool:start':
+        console.log(`\n[Tool: ${envelope.event.call.name}]`);
+        break;
+      case 'done':
+        console.log('\n');
+        break;
+    }
+  }
+
+  // Interactive loop with special commands
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+
+  const processInput = async (input: string) => {
+    const trimmed = input.trim();
+
+    // Special commands
+    if (trimmed === '/exit' || trimmed === '/quit') {
+      console.log('Goodbye!');
+      process.exit(0);
+    }
+    if (trimmed === '/clear') {
+      // Fork to create fresh context
+      const snapshot = await agent.snapshot('clear-point');
+      agent = await agent.fork(snapshot);  // snapshot is already a SnapshotId
+      console.log('Context cleared.');
+      return;
+    }
+    if (trimmed === '/status') {
+      const status = agent.status();
+      console.log(`Session: ${status.agentId}`);
+      console.log(`Steps: ${status.stepCount}`);
+      console.log(`State: ${status.state}`);
+      return;
+    }
+
+    // Normal message
+    if (trimmed) {
+      console.log('\nAssistant: ');
+      await agent.complete(trimmed);
+    }
+  };
+
+  console.log('Ready. Commands: /exit, /clear, /status\n');
+
+  rl.on('line', async (line) => {
+    await processInput(line);
+    rl.prompt();
+  });
+
+  rl.prompt();
+}
+
+main().catch(console.error);
+```
+
+---
+
+## 8. Desktop App (Electron)
+
+Build desktop AI applications with Electron or Tauri.
+
+### Architecture Overview
+
+```
+┌────────────────────────────────────────────┐
+│              Electron App                  │
+│  ┌──────────────────────────────────────┐  │
+│  │           Renderer Process           │  │
+│  │  ┌──────────────────────────────┐    │  │
+│  │  │         React UI             │    │  │
+│  │  │  - Chat interface            │    │  │
+│  │  │  - Tool output display       │    │  │
+│  │  │  - Settings panel            │    │  │
+│  │  └──────────────┬───────────────┘    │  │
+│  └─────────────────┼────────────────────┘  │
+│                    │ IPC                    │
+│  ┌─────────────────▼────────────────────┐  │
+│  │            Main Process              │  │
+│  │  ┌──────────────────────────────┐    │  │
+│  │  │         AgentPool            │    │  │
+│  │  │  - Agent lifecycle           │    │  │
+│  │  │  - Event distribution        │    │  │
+│  │  │  - Store management          │    │  │
+│  │  └──────────────────────────────┘    │  │
+│  │  ┌──────────────────────────────┐    │  │
+│  │  │         JSONStore            │    │  │
+│  │  └──────────────┬───────────────┘    │  │
+│  └─────────────────┼────────────────────┘  │
+└────────────────────┼────────────────────────┘
+                     │
+              ┌──────▼──────┐
+              │  userData   │
+              │   folder    │
+              └─────────────┘
+```
+
+### Main Process Setup
+
+```typescript
+// main.ts
+import { app, ipcMain, BrowserWindow } from 'electron';
+import { AgentPool, JSONStore, Agent } from '@shareai-lab/kode-sdk';
+import * as path from 'path';
+
+let mainWindow: BrowserWindow;
+let pool: AgentPool;
+let store: JSONStore;
+
+async function initializeAgent() {
+  store = new JSONStore(path.join(app.getPath('userData'), 'agents'));
+
+  pool = new AgentPool({
+    dependencies: {
+      store,
+      templateRegistry: /* ... */,
+      sandboxFactory: /* ... */,
+      toolRegistry: /* ... */,
+    },
+    maxAgents: 10,
+  });
+}
+
+// IPC: Send message to agent
+ipcMain.handle('agent:send', async (event, { agentId, message }) => {
+  let agent = pool.get(agentId);
+
+  if (!agent) {
+    const exists = await store.exists(agentId);
+    agent = exists
+      ? await pool.resume(agentId, { templateId: 'desktop-assistant' })
+      : await pool.create(agentId, { templateId: 'desktop-assistant' });
+  }
+
+  return agent.complete(message);  // complete() handles send + wait
+});
+
+// IPC: Subscribe to events (streaming)
+ipcMain.on('agent:subscribe', (event, { agentId }) => {
+  const agent = pool.get(agentId);
+  if (!agent) return;
+
+  // Stream events to renderer
+  (async () => {
+    for await (const env of agent.subscribe(['progress'])) {
+      if (mainWindow && !mainWindow.isDestroyed()) {
+        mainWindow.webContents.send(`agent:event:${agentId}`, env.event);
+      }
+      if (env.event.type === 'done') break;
+    }
+  })();
+});
+
+// IPC: Create new agent
+ipcMain.handle('agent:create', async (event, { agentId, templateId }) => {
+  const agent = await pool.create(agentId, { templateId });
+  return { agentId: agent.agentId, status: 'created' };
+});
+
+// IPC: List agents
+ipcMain.handle('agent:list', async () => {
+  return store.list();
+});
+
+// IPC: Delete agent
+ipcMain.handle('agent:delete', async (event, { agentId }) => {
+  await pool.delete(agentId);  // pool.delete also removes from store
+  return { success: true };
+});
+
+// IPC: Handle permission requests
+ipcMain.on('agent:permission-subscribe', (event, { agentId }) => {
+  const agent = pool.get(agentId);
+  if (!agent) return;
+
+  agent.on('permission_required', async (permEvent) => {
+    mainWindow.webContents.send(`agent:permission:${agentId}`, {
+      callId: permEvent.call.id,
+      toolName: permEvent.call.name,
+      input: permEvent.call.inputPreview,
+    });
+  });
+});
+
+ipcMain.handle('agent:permission-respond', async (event, { agentId, callId, decision, note }) => {
+  const agent = pool.get(agentId);
+  if (!agent) return { error: 'Agent not found' };
+
+  await agent.decide(callId, decision, note);
+  return { success: true };
+});
+
+app.whenReady().then(async () => {
+  await initializeAgent();
+
+  mainWindow = new BrowserWindow({
+    width: 1200,
+    height: 800,
+    webPreferences: {
+      preload: path.join(__dirname, 'preload.js'),
+      contextIsolation: true,
+    },
+  });
+
+  mainWindow.loadFile('index.html');
+});
+
+// Graceful shutdown
+app.on('before-quit', async () => {
+  for (const agentId of pool.list()) {
+    const agent = pool.get(agentId);
+    if (agent) await agent.interrupt();
+  }
+});
+```
+
+### Preload Script
+
+```typescript
+// preload.ts
+import { contextBridge, ipcRenderer } from 'electron';
+
+contextBridge.exposeInMainWorld('agent', {
+  send: (agentId: string, message: string) =>
+    ipcRenderer.invoke('agent:send', { agentId, message }),
+
+  create: (agentId: string, templateId: string) =>
+    ipcRenderer.invoke('agent:create', { agentId, templateId }),
+
+  list: () => ipcRenderer.invoke('agent:list'),
+
+  delete: (agentId: string) =>
+    ipcRenderer.invoke('agent:delete', { agentId }),
+
+  subscribe: (agentId: string, callback: (event: any) => void) => {
+    ipcRenderer.send('agent:subscribe', { agentId });
+    ipcRenderer.on(`agent:event:${agentId}`, (_, event) => callback(event));
+  },
+
+  subscribePermission: (agentId: string, callback: (req: any) => void) => {
+    ipcRenderer.send('agent:permission-subscribe', { agentId });
+    ipcRenderer.on(`agent:permission:${agentId}`, (_, req) => callback(req));
+  },
+
+  respondPermission: (agentId: string, callId: string, decision: 'allow' | 'deny', note?: string) =>
+    ipcRenderer.invoke('agent:permission-respond', { agentId, callId, decision, note }),
+});
+```
+
+### Renderer (React)
+
+```tsx
+// App.tsx
+import React, { useState, useEffect, useRef } from 'react';
+
+declare global {
+  interface Window {
+    agent: {
+      send: (agentId: string, message: string) => Promise<any>;
+      create: (agentId: string, templateId: string) => Promise<any>;
+      list: () => Promise<string[]>;
+      subscribe: (agentId: string, callback: (event: any) => void) => void;
+      subscribePermission: (agentId: string, callback: (req: any) => void) => void;
+      respondPermission: (agentId: string, callId: string, decision: 'allow' | 'deny', note?: string) => Promise<any>;
+    };
+  }
+}
+
+function App() {
+  const [agentId] = useState('main-agent');
+  const [messages, setMessages] = useState<{ role: string; content: string }[]>([]);
+  const [input, setInput] = useState('');
+  const [streaming, setStreaming] = useState('');
+  const [pendingApproval, setPendingApproval] = useState<any>(null);
+
+  useEffect(() => {
+    // Subscribe to agent events
+    window.agent.subscribe(agentId, (event) => {
+      switch (event.type) {
+        case 'text_chunk':
+          setStreaming(prev => prev + event.delta);
+          break;
+        case 'done':
+          setMessages(prev => [...prev, { role: 'assistant', content: streaming }]);
+          setStreaming('');
+          break;
+      }
+    });
+
+    // Subscribe to permission requests
+    window.agent.subscribePermission(agentId, (req) => {
+      setPendingApproval(req);
+    });
+  }, [agentId]);
+
+  const handleSend = async () => {
+    if (!input.trim()) return;
+
+    setMessages(prev => [...prev, { role: 'user', content: input }]);
+    setInput('');
+
+    await window.agent.send(agentId, input);
+  };
+
+  const handleApproval = async (decision: 'allow' | 'deny') => {
+    if (!pendingApproval) return;
+    await window.agent.respondPermission(agentId, pendingApproval.callId, decision);
+    setPendingApproval(null);
+  };
+
+  return (
+    <div className="app">
+      <div className="messages">
+        {messages.map((msg, i) => (
+          <div key={i} className={`message ${msg.role}`}>
+            {msg.content}
+          </div>
+        ))}
+        {streaming && <div className="message assistant streaming">{streaming}</div>}
+      </div>
+
+      {pendingApproval && (
+        <div className="approval-dialog">
+          <p>Tool requires approval: {pendingApproval.toolName}</p>
+          <pre>{JSON.stringify(pendingApproval.input, null, 2)}</pre>
+          <button onClick={() => handleApproval('allow')}>Allow</button>
+          <button onClick={() => handleApproval('deny')}>Deny</button>
+        </div>
+      )}
+
+      <div className="input-area">
+        <input
+          value={input}
+          onChange={(e) => setInput(e.target.value)}
+          onKeyPress={(e) => e.key === 'Enter' && handleSend()}
+          placeholder="Type a message..."
+        />
+        <button onClick={handleSend}>Send</button>
+      </div>
+    </div>
+  );
+}
+
+export default App;
+```
+
+### Best Practices for Desktop Apps
+
+1. **Run KODE SDK in Main Process** - Renderer should only handle UI
+2. **Use IPC for Communication** - Never expose Node.js APIs directly to renderer
+3. **Graceful Shutdown** - Interrupt agents before app quit
+4. **Store in userData** - Use `app.getPath('userData')` for persistence
+5. **Stream Events** - Don't batch events, stream them for responsive UI
+6. **Handle Permissions** - Show approval dialogs for sensitive tools
+
+---
+
+*See also: [Production Deployment](../advanced/production.md) | [Architecture Guide](../advanced/architecture.md)*
diff --git a/docs/en/getting-started/concepts.md b/docs/en/getting-started/concepts.md
new file mode 100644
index 0000000..252bdc0
--- /dev/null
+++ b/docs/en/getting-started/concepts.md
@@ -0,0 +1,288 @@
+# Core Concepts
+
+## What is KODE SDK?
+
+KODE SDK is an **Agent Runtime Kernel** — it manages the complete lifecycle of AI agents including state persistence, crash recovery, and tool execution.
+
+Think of it like **V8 for JavaScript**, but for AI agents:
+
+```
++------------------+     +------------------+
+|       V8         |     |    KODE SDK      |
+|  JS Runtime      |     |  Agent Runtime   |
++------------------+     +------------------+
+        |                        |
+        v                        v
++------------------+     +------------------+
+|    Express.js    |     |   Your App       |
+|  Web Framework   |     | (CLI/Desktop/Web)|
++------------------+     +------------------+
+```
+
+**KODE SDK provides:**
+- Agent lifecycle management (create, run, pause, resume, fork)
+- State persistence with crash recovery (WAL-protected)
+- Tool execution with permission governance
+- Three-channel event system for observability
+
+**KODE SDK does NOT provide:**
+- HTTP routing or API framework
+- User authentication or authorization
+- Multi-tenancy or resource isolation
+- Horizontal scaling (you architect that layer)
+
+> For deep dive into architecture, see [Architecture Guide](../advanced/architecture.md)
+
+---
+
+## Agent
+
+The central entity that manages conversations with LLM models.
+
+```typescript
+// Setup dependencies
+const templates = new AgentTemplateRegistry();
+templates.register({
+  id: 'assistant',
+  systemPrompt: 'You are a helpful assistant.',
+  tools: ['fs_read', 'fs_write'],  // Optional: tool names
+});
+
+// Create agent
+const agent = await Agent.create(
+  { templateId: 'assistant' },
+  { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory }
+);
+```
+
+Key capabilities:
+- **Send messages**: `agent.send('...')` or `agent.send(contentBlocks)`
+- **Subscribe to events**: `agent.subscribe(['progress'])` or `agent.on('event_type', callback)`
+- **Resume from store**: `Agent.resume(agentId, config, deps)` or `Agent.resumeFromStore(agentId, deps)`
+- **Fork conversation**: `agent.fork()`
+
+## Three-Channel Event System
+
+KODE SDK separates events into three channels for clean architecture:
+
+### Progress Channel
+
+Real-time streaming data for UI display. Use `subscribe()`:
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'text_chunk':      // Text chunk from model
+      process.stdout.write(envelope.event.delta);
+      break;
+    case 'tool:start':      // Tool execution started
+    case 'tool:end':        // Tool execution completed
+    case 'done':            // Response complete
+  }
+}
+```
+
+### Control Channel
+
+Approval requests that need human/system decision. Use `on()`:
+
+```typescript
+agent.on('permission_required', async (event) => {
+  // Approve or reject tool execution
+  await event.respond('allow');  // or event.respond('deny', { note: 'reason' })
+});
+```
+
+### Monitor Channel
+
+Audit and observability events. Use `on()`:
+
+```typescript
+agent.on('tool_executed', (event) => {
+  console.log('Tool:', event.call.name, 'Duration:', event.call.durationMs);
+});
+
+agent.on('token_usage', (event) => {
+  console.log('Tokens:', event.totalTokens);
+});
+
+agent.on('error', (event) => {
+  console.error('Error:', event.message);
+});
+```
+
+## Tools
+
+Tools extend Agent capabilities. KODE provides built-in tools and supports custom tools.
+
+### Built-in Tools
+
+| Category | Tools |
+|----------|-------|
+| File System | `fs_read`, `fs_write`, `fs_edit`, `fs_glob`, `fs_grep` |
+| Shell | `bash_run`, `bash_logs`, `bash_kill` |
+| Task Management | `todo_read`, `todo_write` |
+
+### Custom Tools
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const weatherTool = defineTool({
+  name: 'get_weather',
+  description: 'Get weather for a city',
+  params: {
+    city: { type: 'string', description: 'City name' }
+  },
+  attributes: { readonly: true },
+  async exec(args, ctx) {
+    return { temp: 22, condition: 'sunny' };
+  }
+});
+```
+
+## Store
+
+Persistence backend for Agent state.
+
+| Store Type | Use Case |
+|------------|----------|
+| `JSONStore` | Development, single instance |
+| `SqliteStore` | Production, single machine |
+| `PostgresStore` | Production, multi-instance |
+
+```typescript
+// JSONStore (default)
+const store = new JSONStore('./.kode');
+
+// SQLite
+const store = new SqliteStore('./agents.db', './data');
+
+// PostgreSQL
+const store = new PostgresStore(connectionConfig, './data');
+
+// Factory function
+const store = createExtendedStore({
+  type: 'sqlite',
+  dbPath: './agents.db',
+  fileStoreBaseDir: './data'
+});
+```
+
+## Sandbox
+
+Isolated execution environment for tools.
+
+```typescript
+const agent = await Agent.create(
+  {
+    templateId: 'assistant',
+    sandbox: {
+      kind: 'local',
+      workDir: './workspace',
+      enforceBoundary: true,  // Restrict file access to workDir
+    }
+  },
+  deps
+);
+```
+
+## Provider
+
+Model provider adapters. KODE uses Anthropic-style messages internally.
+
+```typescript
+// Anthropic
+const provider = new AnthropicProvider(apiKey, modelId);
+
+// OpenAI
+const provider = new OpenAIProvider(apiKey, modelId);
+
+// Gemini
+const provider = new GeminiProvider(apiKey, modelId);
+```
+
+## Resume & Fork
+
+### Resume
+
+Recover from crash or continue later:
+
+```typescript
+// Resume existing agent
+const agent = await Agent.resume(agentId, config, deps);
+
+// Resume or create new
+const exists = await store.exists(agentId);
+const agent = exists
+  ? await Agent.resume(agentId, config, deps)
+  : await Agent.create(config, deps);
+```
+
+### Fork
+
+Branch conversation at a checkpoint:
+
+```typescript
+// Create snapshot
+const snapshotId = await agent.snapshot('before-risky-operation');
+
+// Fork from snapshot
+const forkedAgent = await agent.fork(snapshotId);
+
+// Each agent continues independently
+await forkedAgent.send('Try alternative approach');
+```
+
+## Multimodal Content
+
+KODE SDK supports multimodal input including images, PDF files, and audio:
+
+```typescript
+import { ContentBlock } from '@shareai-lab/kode-sdk';
+
+// Send image with text
+const content: ContentBlock[] = [
+  { type: 'text', text: 'What is in this image?' },
+  { type: 'image', base64: imageBase64, mime_type: 'image/png' }
+];
+
+await agent.send(content);
+```
+
+Configure multimodal behavior:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'vision-assistant',
+  multimodalContinuation: 'history',      // Keep multimodal in history
+  multimodalRetention: { keepRecent: 3 }, // Keep recent 3 multimodal messages
+}, deps);
+```
+
+## Extended Thinking
+
+Enable models to "think" through complex problems with extended thinking:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'reasoning-assistant',
+  exposeThinking: true,   // Emit thinking events to Progress channel
+  retainThinking: true,   // Persist thinking in message history
+}, deps);
+
+// Listen for thinking events
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'think_chunk') {
+    console.log('[Thinking]', envelope.event.delta);
+  }
+}
+```
+
+## Next Steps
+
+- [Events Guide](../guides/events.md) - Deep dive into event system
+- [Tools Guide](../guides/tools.md) - Built-in and custom tools
+- [Database Guide](../guides/database.md) - Persistence options
+- [Multimodal Guide](../guides/multimodal.md) - Images, PDFs, and audio
+- [Thinking Guide](../guides/thinking.md) - Extended thinking and reasoning
diff --git a/docs/en/getting-started/installation.md b/docs/en/getting-started/installation.md
new file mode 100644
index 0000000..5e706f1
--- /dev/null
+++ b/docs/en/getting-started/installation.md
@@ -0,0 +1,111 @@
+# Installation
+
+## Requirements
+
+- **Node.js**: >= 18.0.0
+- **npm** or **pnpm** or **yarn**
+
+## Install
+
+```bash
+npm install @shareai-lab/kode-sdk
+```
+
+Or with pnpm/yarn:
+
+```bash
+pnpm add @shareai-lab/kode-sdk
+yarn add @shareai-lab/kode-sdk
+```
+
+## Environment Variables
+
+KODE SDK uses environment variables for API keys and model configuration.
+
+### Anthropic (Default)
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+export ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514  # optional
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # optional
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-ant-..."
+$env:ANTHROPIC_MODEL_ID="claude-sonnet-4-20250514"  # optional
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # optional
+```
+
+#### **Windows (CMD)**
+```cmd
+set ANTHROPIC_API_KEY=sk-ant-...
+set ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514
+```
+<!-- tabs:end -->
+
+### OpenAI
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export OPENAI_API_KEY=sk-...
+export OPENAI_MODEL_ID=gpt-4o  # optional
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:OPENAI_API_KEY="sk-..."
+$env:OPENAI_MODEL_ID="gpt-4o"  # optional
+```
+<!-- tabs:end -->
+
+### Google Gemini
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export GOOGLE_API_KEY=...
+export GEMINI_MODEL_ID=gemini-2.0-flash  # optional
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:GOOGLE_API_KEY="..."
+$env:GEMINI_MODEL_ID="gemini-2.0-flash"  # optional
+```
+<!-- tabs:end -->
+
+## Using .env File
+
+Create a `.env` file in your project root:
+
+```bash
+# .env
+ANTHROPIC_API_KEY=sk-ant-...
+ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514
+```
+
+Load it in your code:
+
+```typescript
+import 'dotenv/config';
+// or
+import { config } from 'dotenv';
+config();
+```
+
+## Verify Installation
+
+```typescript
+import { Agent, AnthropicProvider, JSONStore } from '@shareai-lab/kode-sdk';
+
+console.log('KODE SDK installed successfully!');
+```
+
+## Next Steps
+
+- [Quickstart](./quickstart.md) - Build your first Agent
+- [Concepts](./concepts.md) - Understand core concepts
diff --git a/docs/en/getting-started/quickstart.md b/docs/en/getting-started/quickstart.md
new file mode 100644
index 0000000..0466815
--- /dev/null
+++ b/docs/en/getting-started/quickstart.md
@@ -0,0 +1,198 @@
+# Quickstart
+
+Build your first Agent in 5 minutes.
+
+## Prerequisites
+
+- Completed [Installation](./installation.md)
+- Set `ANTHROPIC_API_KEY` environment variable
+
+## Step 1: Setup Dependencies
+
+KODE SDK uses a dependency injection pattern. First, create the required dependencies:
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+} from '@shareai-lab/kode-sdk';
+
+// Create dependencies
+const store = new JSONStore('./.kode');
+const templates = new AgentTemplateRegistry();
+const tools = new ToolRegistry();
+const sandboxFactory = new SandboxFactory();
+
+// Create provider
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID  // optional, uses default if not set
+);
+
+// Register a template
+templates.register({
+  id: 'assistant',
+  systemPrompt: 'You are a helpful assistant.',
+});
+```
+
+## Step 2: Create an Agent
+
+```typescript
+const agent = await Agent.create(
+  { templateId: 'assistant' },
+  {
+    store,
+    templateRegistry: templates,
+    toolRegistry: tools,
+    sandboxFactory,
+    modelFactory: () => provider,
+  }
+);
+```
+
+## Step 3: Subscribe to Events
+
+```typescript
+// Subscribe to progress events (text streaming) using subscribe()
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'text_chunk':
+      process.stdout.write(envelope.event.delta);
+      break;
+    case 'done':
+      console.log('\n--- Message complete ---');
+      break;
+  }
+  if (envelope.event.type === 'done') break;
+}
+
+// Subscribe to control events using on()
+agent.on('permission_required', async (event) => {
+  console.log(`Tool ${event.call.name} needs approval`);
+  // Auto-approve for demo
+  await event.respond('allow');
+});
+```
+
+## Step 4: Send a Message
+
+```typescript
+await agent.send('Hello! What can you help me with?');
+```
+
+## Complete Example
+
+```typescript
+// getting-started.ts
+import 'dotenv/config';
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+} from '@shareai-lab/kode-sdk';
+
+async function main() {
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    process.env.ANTHROPIC_MODEL_ID
+  );
+
+  // Setup dependencies
+  const store = new JSONStore('./.kode');
+  const templates = new AgentTemplateRegistry();
+  const tools = new ToolRegistry();
+  const sandboxFactory = new SandboxFactory();
+
+  templates.register({
+    id: 'assistant',
+    systemPrompt: 'You are a helpful assistant.',
+  });
+
+  const agent = await Agent.create(
+    { templateId: 'assistant' },
+    { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory: () => provider }
+  );
+
+  // Subscribe to progress using async iterator
+  const progressTask = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      if (envelope.event.type === 'text_chunk') {
+        process.stdout.write(envelope.event.delta);
+      }
+      if (envelope.event.type === 'done') break;
+    }
+  })();
+
+  await agent.send('Hello!');
+  await progressTask;
+  console.log('\n');
+}
+
+main().catch(console.error);
+```
+
+Run it:
+
+```bash
+npx ts-node getting-started.ts
+```
+
+## Using Built-in Tools
+
+Add file system and bash tools by registering them:
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+  builtin,
+} from '@shareai-lab/kode-sdk';
+
+const store = new JSONStore('./.kode');
+const templates = new AgentTemplateRegistry();
+const tools = new ToolRegistry();
+const sandboxFactory = new SandboxFactory();
+
+// Register built-in tools
+for (const tool of builtin.fs()) {
+  tools.register(tool.name, () => tool);
+}
+for (const tool of builtin.bash()) {
+  tools.register(tool.name, () => tool);
+}
+for (const tool of builtin.todo()) {
+  tools.register(tool.name, () => tool);
+}
+
+// Register template with tool names
+templates.register({
+  id: 'coding-assistant',
+  systemPrompt: 'You are a coding assistant.',
+  tools: ['fs_read', 'fs_write', 'fs_edit', 'fs_glob', 'fs_grep', 'bash_run', 'todo_read', 'todo_write'],
+});
+
+const provider = new AnthropicProvider(process.env.ANTHROPIC_API_KEY!);
+
+const agent = await Agent.create(
+  { templateId: 'coding-assistant' },
+  { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory: () => provider }
+);
+```
+
+## Next Steps
+
+- [Concepts](./concepts.md) - Understand Agent, Events, Tools
+- [Events Guide](../guides/events.md) - Master the three-channel system
+- [Tools Guide](../guides/tools.md) - Learn about built-in and custom tools
diff --git a/docs/en/guides/database.md b/docs/en/guides/database.md
new file mode 100644
index 0000000..cc87e6d
--- /dev/null
+++ b/docs/en/guides/database.md
@@ -0,0 +1,781 @@
+# Database Persistence Guide
+
+KODE SDK supports SQLite and PostgreSQL as persistence backends, providing high-performance querying, aggregation, and analysis capabilities.
+
+---
+
+## Supported Backends
+
+| Backend | Use Case | Features |
+|---------|----------|----------|
+| SQLite | Development, Single Instance | Zero config, file-based |
+| PostgreSQL | Production, Multi-Instance | Concurrent writes, JSONB queries |
+
+---
+
+## Environment Variables
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+# SQLite
+export KODE_STORE_TYPE=sqlite
+export KODE_SQLITE_PATH=./data/agents.db
+export KODE_STORE_PATH=./data/store
+
+# PostgreSQL
+export KODE_STORE_TYPE=postgres
+export POSTGRES_HOST=localhost
+export POSTGRES_PORT=5432
+export POSTGRES_DB=kode_agents
+export POSTGRES_USER=kode
+export POSTGRES_PASSWORD=your_password
+```
+
+#### **Windows (PowerShell)**
+```powershell
+# SQLite
+$env:KODE_STORE_TYPE="sqlite"
+$env:KODE_SQLITE_PATH="./data/agents.db"
+$env:KODE_STORE_PATH="./data/store"
+
+# PostgreSQL
+$env:KODE_STORE_TYPE="postgres"
+$env:POSTGRES_HOST="localhost"
+$env:POSTGRES_PORT="5432"
+$env:POSTGRES_DB="kode_agents"
+$env:POSTGRES_USER="kode"
+$env:POSTGRES_PASSWORD="your_password"
+```
+
+#### **Windows (CMD)**
+```cmd
+set KODE_STORE_TYPE=sqlite
+set KODE_SQLITE_PATH=./data/agents.db
+set KODE_STORE_PATH=./data/store
+```
+<!-- tabs:end -->
+
+---
+
+## Quick Start
+
+### Using Factory Function (Recommended)
+
+```typescript
+import { createExtendedStore } from '@shareai-lab/kode-sdk';
+
+// Auto-selects backend based on KODE_STORE_TYPE
+const store = await createExtendedStore();
+
+// Or specify backend explicitly
+const sqliteStore = await createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/store',
+});
+
+const postgresStore = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: process.env.POSTGRES_HOST ?? 'localhost',
+    port: parseInt(process.env.POSTGRES_PORT ?? '5432'),
+    database: process.env.POSTGRES_DB ?? 'kode_agents',
+    user: process.env.POSTGRES_USER ?? 'kode',
+    password: process.env.POSTGRES_PASSWORD!,
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+### Direct Class Usage
+
+```typescript
+import { SqliteStore, PostgresStore } from '@shareai-lab/kode-sdk';
+
+// SQLite
+const sqliteStore = new SqliteStore('./data/agents.db', './data/store');
+
+// PostgreSQL
+const postgresStore = new PostgresStore(
+  {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+  },
+  './data/store'
+);
+```
+
+### Using with Agent
+
+```typescript
+import { Agent, createExtendedStore } from '@shareai-lab/kode-sdk';
+
+const store = await createExtendedStore();
+
+const agent = await Agent.create({
+  provider,
+  store,
+  template: {
+    id: 'assistant',
+    systemPrompt: 'You are a helpful assistant.',
+    tools: [],
+  },
+});
+
+await agent.send('Hello!');
+
+// Close database when done
+await store.close();
+```
+
+---
+
+## Query APIs
+
+### Query Sessions: `querySessions()`
+
+Query Agent session list with filtering and pagination.
+
+```typescript
+interface SessionQueryFilter {
+  templateId?: string;      // Filter by template ID
+  createdAfter?: Date;      // Created after date
+  createdBefore?: Date;     // Created before date
+  limit?: number;           // Max results (default: 100)
+  offset?: number;          // Pagination offset (default: 0)
+}
+
+const sessions = await store.querySessions({
+  templateId: 'chat-assistant',
+  createdAfter: new Date('2025-01-01'),
+  limit: 20,
+});
+
+sessions.forEach(session => {
+  console.log({
+    agentId: session.agentId,
+    templateId: session.templateId,
+    createdAt: session.createdAt,
+    messageCount: session.messageCount,
+  });
+});
+```
+
+### Query Messages: `queryMessages()`
+
+Query message records with filtering by role and content type.
+
+```typescript
+interface MessageQueryFilter {
+  agentId?: string;
+  role?: 'user' | 'assistant';
+  contentType?: 'text' | 'tool_use' | 'tool_result';
+  createdAfter?: Date;
+  createdBefore?: Date;
+  limit?: number;
+  offset?: number;
+}
+
+const messages = await store.queryMessages({
+  agentId: 'agt-abc123',
+  role: 'assistant',
+  contentType: 'tool_use',
+  limit: 50,
+});
+```
+
+### Query Tool Calls: `queryToolCalls()`
+
+Query tool call records with filtering by tool name and error status.
+
+```typescript
+interface ToolCallQueryFilter {
+  agentId?: string;
+  toolName?: string;        // Filter by tool name
+  isError?: boolean;        // Filter by error status
+  hasApproval?: boolean;    // Filter by approval status
+  createdAfter?: Date;
+  createdBefore?: Date;
+  limit?: number;
+  offset?: number;
+}
+
+const toolCalls = await store.queryToolCalls({
+  toolName: 'bash_run',
+  isError: true,
+  limit: 10,
+});
+
+toolCalls.forEach(call => {
+  console.log({
+    toolCallId: call.toolCallId,
+    toolName: call.toolName,
+    input: call.input,
+    output: call.output,
+    isError: call.isError,
+    approval: call.approval,
+  });
+});
+```
+
+### Aggregate Stats: `aggregateStats()`
+
+Aggregate statistics for an Agent including message counts and tool call metrics.
+
+```typescript
+const stats = await store.aggregateStats('agt-abc123');
+
+console.log({
+  totalMessages: stats.totalMessages,
+  totalToolCalls: stats.totalToolCalls,
+  totalSnapshots: stats.totalSnapshots,
+  toolCallsByState: stats.toolCallsByState,  // { completed: 10, failed: 2, ... }
+});
+
+// Calculate success rate using toolCallsByState
+if (stats.toolCallsByState) {
+  const completed = stats.toolCallsByState['completed'] || 0;
+  const successRate = (completed / stats.totalToolCalls * 100).toFixed(2);
+  console.log(`Tool call success rate: ${successRate}%`);
+}
+```
+
+---
+
+## SQLite vs PostgreSQL
+
+### Comparison
+
+| Feature | SQLite | PostgreSQL |
+|---------|--------|------------|
+| **Deployment** | Single file, zero config | Requires database server |
+| **Concurrent Writes** | Single process | Multi-process |
+| **Query Performance** | Good for small datasets | Optimized for large datasets |
+| **JSON Support** | JSON functions | JSONB + GIN indexes |
+| **Backup** | Copy file | pg_dump/restore |
+| **Scaling** | Single machine | Replication, sharding |
+
+### When to Choose SQLite
+
+- Single instance deployment
+- Less than 1000 Agents
+- Less than 100K messages per day
+- Quick prototyping
+- Zero maintenance overhead
+
+### When to Choose PostgreSQL
+
+- Multi-instance deployment
+- More than 1000 Agents
+- More than 100K messages per day
+- Complex queries and analytics
+- High availability requirements
+
+---
+
+## Docker Quick Start
+
+### PostgreSQL
+
+```bash
+# Development
+docker run --name kode-postgres \
+  -e POSTGRES_PASSWORD=kode123 \
+  -e POSTGRES_DB=kode_agents \
+  -p 5432:5432 \
+  -d postgres:16-alpine
+
+# Production (persistent data)
+docker run --name kode-postgres \
+  -e POSTGRES_PASSWORD=kode123 \
+  -e POSTGRES_DB=kode_agents \
+  -v /data/postgres:/var/lib/postgresql/data \
+  -p 5432:5432 \
+  -d postgres:16-alpine
+```
+
+---
+
+## Performance Tips
+
+### Use Pagination
+
+```typescript
+// Avoid loading all data at once
+const PAGE_SIZE = 100;
+let offset = 0;
+
+while (true) {
+  const messages = await store.queryMessages({
+    agentId,
+    limit: PAGE_SIZE,
+    offset,
+  });
+
+  if (messages.length === 0) break;
+  processMessages(messages);
+  offset += PAGE_SIZE;
+}
+```
+
+### Use Time Filters
+
+```typescript
+// Limit to recent data
+const messages = await store.queryMessages({
+  agentId,
+  createdAfter: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000), // Last 7 days
+});
+```
+
+### PostgreSQL Connection Pool
+
+```typescript
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+    max: 20,                    // Max connections
+    idleTimeoutMillis: 30000,   // Idle connection timeout
+    connectionTimeoutMillis: 2000,
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## Backup
+
+### SQLite
+
+```bash
+# Online backup (recommended)
+sqlite3 agents.db ".backup agents.db.backup"
+
+# Export SQL
+sqlite3 agents.db .dump > agents.sql
+```
+
+### PostgreSQL
+
+```bash
+# Logical backup
+pg_dump -h localhost -U kode -d kode_agents > backup.sql
+
+# Compressed backup
+pg_dump -h localhost -U kode -d kode_agents | gzip > backup.sql.gz
+
+# Scheduled backup (cron)
+0 2 * * * pg_dump -h localhost -U kode -d kode_agents | gzip > /backup/kode_$(date +\%Y\%m\%d).sql.gz
+```
+
+---
+
+## Troubleshooting
+
+### SQLite: Database Locked
+
+```
+Error: SQLITE_BUSY: database is locked
+```
+
+**Solution**: Enable WAL mode
+
+```typescript
+const db = new Database('./agents.db');
+db.pragma('journal_mode = WAL');
+db.pragma('busy_timeout = 5000');
+```
+
+### PostgreSQL: Connection Refused
+
+```
+Error: connect ECONNREFUSED 127.0.0.1:5432
+```
+
+**Checklist**:
+1. Check if PostgreSQL is running: `pg_isready -h localhost -p 5432`
+2. Check firewall settings
+3. Verify `pg_hba.conf` allows connections
+4. Verify `listen_addresses = '*'` in postgresql.conf
+
+### PostgreSQL: Too Many Clients
+
+```
+Error: sorry, too many clients already
+```
+
+**Solution**: Optimize connection pool
+
+```typescript
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    ...config,
+    max: 10,                    // Reduce per-instance connections
+    idleTimeoutMillis: 10000,   // Release idle connections faster
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## FAQ
+
+**Q: Can I migrate from JSONStore to database?**
+
+A: Yes, manual migration is required. A migration tool will be provided in future versions.
+
+**Q: Does database storage affect performance?**
+
+A: No. For regular operations (create, send, resume), performance is comparable to JSONStore.
+
+**Q: Can I mix SQLite and PostgreSQL?**
+
+A: Yes. The `ExtendedStore` interface abstracts the underlying implementation:
+
+```typescript
+const store = process.env.NODE_ENV === 'production'
+  ? await createExtendedStore({ type: 'postgres', ... })
+  : await createExtendedStore({ type: 'sqlite', ... });
+```
+
+**Q: How to delete old data?**
+
+```typescript
+// Delete specific Agent
+await store.delete(agentId);
+
+// Batch delete old Agents
+const sessions = await store.querySessions({
+  createdBefore: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000), // 90 days ago
+});
+for (const session of sessions) {
+  await store.delete(session.agentId);
+}
+```
+
+---
+
+## References
+
+- Store interface: [API Reference](../reference/api.md#store)
+
+---
+
+## Custom Store Implementation
+
+If you need a different database backend (MongoDB, DynamoDB, etc.), you can implement the `Store` interface.
+
+### Store Interface Overview
+
+The Store interface has three layers:
+
+```
+Store (base)
+  └── QueryableStore (adds query methods)
+        └── ExtendedStore (adds health check, metrics, distributed lock)
+```
+
+**Basic Store** (required methods):
+
+```typescript
+interface Store {
+  // Runtime State
+  saveMessages(agentId: string, messages: Message[]): Promise<void>;
+  loadMessages(agentId: string): Promise<Message[]>;
+  saveToolCallRecords(agentId: string, records: ToolCallRecord[]): Promise<void>;
+  loadToolCallRecords(agentId: string): Promise<ToolCallRecord[]>;
+  saveTodos(agentId: string, snapshot: TodoSnapshot): Promise<void>;
+  loadTodos(agentId: string): Promise<TodoSnapshot | undefined>;
+
+  // Events
+  appendEvent(agentId: string, timeline: Timeline): Promise<void>;
+  readEvents(agentId: string, opts?: { since?: Bookmark; channel?: AgentChannel }): AsyncIterable<Timeline>;
+
+  // History & Compression
+  saveHistoryWindow(agentId: string, window: HistoryWindow): Promise<void>;
+  loadHistoryWindows(agentId: string): Promise<HistoryWindow[]>;
+  saveCompressionRecord(agentId: string, record: CompressionRecord): Promise<void>;
+  loadCompressionRecords(agentId: string): Promise<CompressionRecord[]>;
+  saveRecoveredFile(agentId: string, file: RecoveredFile): Promise<void>;
+  loadRecoveredFiles(agentId: string): Promise<RecoveredFile[]>;
+
+  // Multimodal Cache
+  saveMediaCache(agentId: string, records: MediaCacheRecord[]): Promise<void>;
+  loadMediaCache(agentId: string): Promise<MediaCacheRecord[]>;
+
+  // Snapshots
+  saveSnapshot(agentId: string, snapshot: Snapshot): Promise<void>;
+  loadSnapshot(agentId: string, snapshotId: string): Promise<Snapshot | undefined>;
+  listSnapshots(agentId: string): Promise<Snapshot[]>;
+
+  // Metadata
+  saveInfo(agentId: string, info: AgentInfo): Promise<void>;
+  loadInfo(agentId: string): Promise<AgentInfo | undefined>;
+
+  // Lifecycle
+  exists(agentId: string): Promise<boolean>;
+  delete(agentId: string): Promise<void>;
+  list(prefix?: string): Promise<string[]>;
+}
+```
+
+### Minimal Custom Store Example
+
+```typescript
+import {
+  Store,
+  Message,
+  ToolCallRecord,
+  Timeline,
+  Snapshot,
+  AgentInfo,
+  TodoSnapshot,
+  HistoryWindow,
+  CompressionRecord,
+  RecoveredFile,
+  MediaCacheRecord,
+  Bookmark,
+  AgentChannel,
+} from '@shareai-lab/kode-sdk';
+import { MongoClient, Collection } from 'mongodb';
+
+export class MongoStore implements Store {
+  private db: Db;
+  private agents: Collection;
+  private messages: Collection;
+  private events: Collection;
+
+  constructor(private client: MongoClient, dbName: string) {
+    this.db = client.db(dbName);
+    this.agents = this.db.collection('agents');
+    this.messages = this.db.collection('messages');
+    this.events = this.db.collection('events');
+  }
+
+  // === Runtime State ===
+
+  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
+    await this.messages.updateOne(
+      { agentId },
+      { $set: { agentId, messages, updatedAt: new Date() } },
+      { upsert: true }
+    );
+  }
+
+  async loadMessages(agentId: string): Promise<Message[]> {
+    const doc = await this.messages.findOne({ agentId });
+    return doc?.messages || [];
+  }
+
+  async saveToolCallRecords(agentId: string, records: ToolCallRecord[]): Promise<void> {
+    await this.db.collection('tool_calls').updateOne(
+      { agentId },
+      { $set: { agentId, records, updatedAt: new Date() } },
+      { upsert: true }
+    );
+  }
+
+  async loadToolCallRecords(agentId: string): Promise<ToolCallRecord[]> {
+    const doc = await this.db.collection('tool_calls').findOne({ agentId });
+    return doc?.records || [];
+  }
+
+  // === Events ===
+
+  async appendEvent(agentId: string, timeline: Timeline): Promise<void> {
+    await this.events.insertOne({
+      agentId,
+      cursor: timeline.cursor,
+      bookmark: timeline.bookmark,
+      event: timeline.event,
+      createdAt: new Date(),
+    });
+  }
+
+  async *readEvents(agentId: string, opts?: { since?: Bookmark; channel?: AgentChannel }): AsyncIterable<Timeline> {
+    const query: any = { agentId };
+    if (opts?.since) {
+      query['bookmark.seq'] = { $gt: opts.since.seq };
+    }
+    if (opts?.channel) {
+      query['event.channel'] = opts.channel;
+    }
+
+    const cursor = this.events.find(query).sort({ 'bookmark.seq': 1 });
+    for await (const doc of cursor) {
+      yield {
+        cursor: doc.cursor,
+        bookmark: doc.bookmark,
+        event: doc.event,
+      };
+    }
+  }
+
+  // === Metadata ===
+
+  async saveInfo(agentId: string, info: AgentInfo): Promise<void> {
+    await this.agents.updateOne(
+      { agentId },
+      { $set: { ...info, updatedAt: new Date() } },
+      { upsert: true }
+    );
+  }
+
+  async loadInfo(agentId: string): Promise<AgentInfo | undefined> {
+    const doc = await this.agents.findOne({ agentId });
+    if (!doc) return undefined;
+    return {
+      agentId: doc.agentId,
+      templateId: doc.templateId,
+      createdAt: doc.createdAt,
+      lineage: doc.lineage,
+      configVersion: doc.configVersion,
+      messageCount: doc.messageCount,
+      lastSfpIndex: doc.lastSfpIndex,
+      lastBookmark: doc.lastBookmark,
+      breakpoint: doc.breakpoint,
+      metadata: doc.metadata,
+    };
+  }
+
+  // === Lifecycle ===
+
+  async exists(agentId: string): Promise<boolean> {
+    const count = await this.agents.countDocuments({ agentId });
+    return count > 0;
+  }
+
+  async delete(agentId: string): Promise<void> {
+    await Promise.all([
+      this.agents.deleteOne({ agentId }),
+      this.messages.deleteOne({ agentId }),
+      this.events.deleteMany({ agentId }),
+      this.db.collection('tool_calls').deleteOne({ agentId }),
+      this.db.collection('snapshots').deleteMany({ agentId }),
+      // ... delete other collections
+    ]);
+  }
+
+  async list(prefix?: string): Promise<string[]> {
+    const query = prefix ? { agentId: { $regex: `^${prefix}` } } : {};
+    const docs = await this.agents.find(query, { projection: { agentId: 1 } }).toArray();
+    return docs.map(d => d.agentId);
+  }
+
+  // ... implement remaining methods (snapshots, history, compression, media cache, todos)
+}
+```
+
+### Hybrid Storage Pattern
+
+For high-performance scenarios, use a hybrid approach like `PostgresStore`:
+
+```
+┌─────────────────────────────────────────────────────┐
+│                   Your Custom Store                  │
+├─────────────────────────────────────────────────────┤
+│                                                      │
+│  Database (for queryable data):       File System:   │
+│  ┌─────────────────────────┐    ┌──────────────────┐│
+│  │ AgentInfo               │    │ Events (append)  ││
+│  │ Messages                │    │ Todos            ││
+│  │ ToolCallRecords         │    │ History Windows  ││
+│  │ Snapshots               │    │ Media Cache      ││
+│  └─────────────────────────┘    └──────────────────┘│
+│                                                      │
+└─────────────────────────────────────────────────────┘
+```
+
+**Why hybrid?**
+- Database: Supports queries, indexes, transactions
+- File System: Better for high-frequency append operations (events)
+
+```typescript
+export class HybridStore implements ExtendedStore {
+  private db: Database;           // Your database client
+  private fileStore: JSONStore;   // Delegate file operations
+
+  constructor(dbConfig: any, fileDir: string) {
+    this.db = new Database(dbConfig);
+    this.fileStore = new JSONStore(fileDir);
+  }
+
+  // Database operations
+  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
+    await this.db.query('INSERT INTO messages ...');
+  }
+
+  // Delegate to JSONStore for events
+  async appendEvent(agentId: string, timeline: Timeline): Promise<void> {
+    return this.fileStore.appendEvent(agentId, timeline);
+  }
+
+  async *readEvents(agentId: string, opts?: any): AsyncIterable<Timeline> {
+    yield* this.fileStore.readEvents(agentId, opts);
+  }
+}
+```
+
+### Testing Your Store
+
+```typescript
+import { describe, it, expect } from 'vitest';
+import { MongoStore } from './mongo-store';
+
+describe('MongoStore', () => {
+  let store: MongoStore;
+
+  beforeAll(async () => {
+    const client = await MongoClient.connect('mongodb://localhost:27017');
+    store = new MongoStore(client, 'kode_test');
+  });
+
+  it('should save and load messages', async () => {
+    const agentId = 'test-agent-1';
+    const messages = [
+      { role: 'user', content: [{ type: 'text', text: 'Hello' }] },
+    ];
+
+    await store.saveMessages(agentId, messages);
+    const loaded = await store.loadMessages(agentId);
+
+    expect(loaded).toHaveLength(1);
+    expect(loaded[0].content[0].text).toBe('Hello');
+  });
+
+  it('should check existence', async () => {
+    const agentId = 'test-agent-2';
+    await store.saveInfo(agentId, { agentId, templateId: 'test', ... });
+
+    expect(await store.exists(agentId)).toBe(true);
+    expect(await store.exists('non-existent')).toBe(false);
+  });
+
+  // ... more tests for all Store methods
+});
+```
+
+### Best Practices
+
+1. **Implement all methods** - Store interface has no optional methods
+2. **Use transactions** - For operations that modify multiple tables
+3. **Index agentId** - All queries filter by agentId
+4. **Handle concurrent writes** - Use optimistic locking or upserts
+5. **Implement cleanup** - `delete()` must remove all agent data
+6. **Test edge cases** - Empty results, missing agents, large payloads
+
+---
+
+*See also: [Architecture Guide](../advanced/architecture.md) | [Production Guide](../advanced/production.md)*
diff --git a/docs/en/guides/error-handling.md b/docs/en/guides/error-handling.md
new file mode 100644
index 0000000..d0a7562
--- /dev/null
+++ b/docs/en/guides/error-handling.md
@@ -0,0 +1,309 @@
+# Error Handling Guide
+
+KODE SDK implements a comprehensive error handling mechanism with three core principles:
+
+1. **Model-Aware Errors** - All errors are visible and actionable by the model
+2. **Never Crash** - Multi-layer error catching ensures system stability
+3. **Full Observability** - All errors trigger events for monitoring and debugging
+
+---
+
+## Error Types
+
+| Error Type | Identifier | Retryable | Typical Scenarios |
+|------------|-----------|-----------|-------------------|
+| `validation` | `_validationError: true` | No | Parameter type error, missing required params |
+| `runtime` | `_thrownError: true` | Yes | File not found, permission denied, network error |
+| `logical` | Tool returns `{ok: false}` | Yes | Content mismatch, command execution failed |
+| `aborted` | Timeout/interrupt | No | Tool execution timeout, user interrupt |
+| `exception` | Unexpected exception | Yes | System exception, unknown error |
+
+---
+
+## Error Flow
+
+```
+Tool Execution
+  ├─ Parameter validation fails → {ok: false, error: ..., _validationError: true}
+  ├─ Execution throws → {ok: false, error: ..., _thrownError: true}
+  ├─ Returns {ok: false} → Keep as-is (logical error)
+  └─ Normal return → Keep as-is
+     ↓
+Agent Processing
+  ├─ Identify error type: validation | runtime | logical | aborted | exception
+  ├─ Determine retryability: validation not retryable, others retryable
+  ├─ Generate recommendations: based on error type and tool name
+  ├─ Emit tool:error event (ProgressEvent - user visible)
+  └─ Emit error event (MonitorEvent - monitoring system)
+     ↓
+Return to Model
+  └─ {
+       ok: false,
+       error: "Specific error message",
+       errorType: "error type",
+       retryable: true/false,
+       recommendations: ["suggestion 1", "suggestion 2", ...]
+     }
+```
+
+---
+
+## Listening to Errors
+
+### Progress Events (User Layer)
+
+```typescript
+// Listen to tool errors for UI
+agent.on('tool:error', (event) => {
+  console.log('Tool error:', event.error);
+  console.log('Tool state:', event.call.state);
+  // Show UI notification
+});
+
+// Using stream
+for await (const envelope of agent.stream(input)) {
+  if (envelope.event.type === 'tool:error') {
+    showNotification({
+      type: 'error',
+      message: envelope.event.error,
+    });
+  }
+}
+```
+
+### Monitor Events (System Layer)
+
+```typescript
+// Listen to all errors
+agent.on('error', (event) => {
+  if (event.phase === 'tool') {
+    const { errorType, retryable } = event.detail || {};
+
+    // Log to logging system
+    logger.warn('Tool Error', {
+      message: event.message,
+      errorType,
+      retryable,
+      severity: event.severity,
+      timestamp: Date.now(),
+    });
+
+    // Send alerts
+    if (event.severity === 'error') {
+      alerting.send('Tool execution failed', event);
+    }
+  }
+});
+```
+
+---
+
+## Model Self-Adjustment
+
+### Example: File Not Found
+
+**Tool returns:**
+```json
+{
+  "ok": false,
+  "error": "File not found: /src/utils/helper.ts",
+  "errorType": "logical",
+  "retryable": true,
+  "recommendations": [
+    "Verify the file path is correct",
+    "Use fs_glob to search for files",
+    "Check if file was externally modified"
+  ]
+}
+```
+
+**Model analysis:**
+1. `errorType: "logical"` - Not a parameter issue, file genuinely doesn't exist
+2. `retryable: true` - Can try alternative approaches
+3. Recommendations suggest "Verify the file path"
+
+**Model adjustment:**
+```
+1. Use fs_glob("src/**/*.ts") to find all ts files
+2. Use fs_grep("helper", "src/**/*.ts") to search for helper
+3. Continue with the correct file path
+```
+
+### Example: Validation Error
+
+**Tool returns:**
+```json
+{
+  "ok": false,
+  "error": "Invalid parameters: path is required",
+  "errorType": "validation",
+  "retryable": false,
+  "recommendations": [
+    "Check tool parameters against schema",
+    "Ensure all required parameters are provided",
+    "Verify parameter types are correct"
+  ]
+}
+```
+
+**Model adjustment:**
+```
+1. Check tool call, found missing path parameter
+2. Add the required path parameter
+3. Retry the tool call
+```
+
+---
+
+## Multi-Layer Protection
+
+```
+Layer 1: Tool Execution (tool.ts)
+  └─ try-catch catches all exceptions → {ok: false, _thrownError: true}
+
+Layer 2: Agent Call (agent.ts)
+  └─ try-catch catches call exceptions → errorType: 'exception'
+
+Layer 3: Parameter Validation
+  └─ safeParse prevents validation exceptions → {ok: false, _validationError: true}
+
+Layer 4: Hook Execution
+  └─ Hook failures don't affect main flow → Log error and continue
+```
+
+### Error Isolation Principles
+
+- Single tool error ≠ Agent crash
+- Agent error ≠ System crash
+- Tools are completely isolated
+- All errors are traceable
+
+---
+
+## Best Practices
+
+### For Tool Developers
+
+```typescript
+// ✅ Recommended: Use {ok: false} for expected business errors
+if (!fileExists) {
+  return {
+    ok: false,
+    error: 'File not found',
+    recommendations: ['Check file path', 'Use fs_glob to search'],
+  };
+}
+
+// ❌ Avoid: Throwing exceptions for business errors
+throw new Error('File not found');  // Only use for unexpected exceptions
+```
+
+### For Application Developers
+
+```typescript
+// Listen to errors and show UI
+agent.on('tool:error', (event) => {
+  showNotification({
+    type: 'error',
+    message: event.error,
+    action: event.call.state === 'FAILED' ? 'retry' : null,
+  });
+});
+
+// Smart retry logic
+if (result.status === 'paused' && result.permissionIds?.length) {
+  // Pending permissions, wait for user decision
+} else if (lastError?.retryable && retryCount < 3) {
+  // Retryable error, auto-retry
+  await agent.send('Please adjust and retry based on recommendations');
+}
+```
+
+### For Operations
+
+```typescript
+// Error statistics and analysis
+const errorStats = {
+  validation: 0,
+  runtime: 0,
+  logical: 0,
+  aborted: 0,
+  exception: 0,
+};
+
+agent.on('error', (event) => {
+  if (event.phase === 'tool') {
+    const type = event.detail?.errorType || 'unknown';
+    errorStats[type]++;
+
+    // Analyze error patterns periodically
+    if (errorStats.validation > 100) {
+      alert('Too many validation errors, check tool schema config');
+    }
+  }
+});
+```
+
+---
+
+## Error Event Types
+
+### ProgressToolErrorEvent
+
+```typescript
+interface ProgressToolErrorEvent {
+  channel: 'progress';
+  type: 'tool:error';
+  call: ToolCallSnapshot;  // Tool call snapshot
+  error: string;           // Error message
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorErrorEvent
+
+```typescript
+interface MonitorErrorEvent {
+  channel: 'monitor';
+  type: 'error';
+  severity: 'warn' | 'error';
+  phase: 'model' | 'tool' | 'sandbox' | 'system';
+  message: string;
+  detail?: {
+    errorType?: string;
+    retryable?: boolean;
+    [key: string]: any;
+  };
+}
+```
+
+---
+
+## Summary
+
+The error handling mechanism provides:
+
+**Model Intelligence**
+- Clear error types (validation/runtime/logical/aborted/exception)
+- Explicit retryability (retryable: true/false)
+- Actionable recommendations (customized by tool and error type)
+
+**System Stability**
+- Tool layer try-catch fallback
+- Agent layer try-catch protection
+- Parameter validation safeParse
+- Hook execution isolation
+
+**Full Observability**
+- Progress events (tool:error) - user visible
+- Monitor events (error) - system logging
+- Tool records (ToolCallRecord) - complete audit
+- Event timeline (EventBus) - traceable
+
+---
+
+## References
+
+- [Events Guide](./events.md)
+- [Tools Guide](./tools.md)
+- [Resume/Fork Guide](./resume-fork.md)
diff --git a/docs/en/guides/events.md b/docs/en/guides/events.md
new file mode 100644
index 0000000..2269f3c
--- /dev/null
+++ b/docs/en/guides/events.md
@@ -0,0 +1,166 @@
+# Event System Guide
+
+KODE SDK's core philosophy is "push only necessary events by default, everything else goes through callbacks". We split interactions into three independent channels:
+
+```
+Progress  → Data plane (UI rendering)
+Control   → Approval plane (human decisions)
+Monitor   → Governance plane (audit/alerting)
+```
+
+This guide covers event types, best practices, and common pitfalls for each channel.
+
+---
+
+## Progress: Data Plane
+
+Progress handles all user-visible data streams: text deltas, tool lifecycle, and completion signals. Events are pushed in chronological order and support `cursor`/`bookmark` for resumable streaming.
+
+| Event | Description |
+|-------|-------------|
+| `think_chunk_start / think_chunk / think_chunk_end` | Model thinking phase (enable via template `exposeThinking`). |
+| `text_chunk_start / text_chunk / text_chunk_end` | Text deltas and final segments. |
+| `tool:start / tool:error / tool:end` | Tool execution lifecycle; `tool:end` always fires (even on failure). |
+| `done` | Current turn complete, includes `bookmark { seq, timestamp }`. |
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'], { since: lastBookmark })) {
+  switch (envelope.event.type) {
+    case 'text_chunk':
+      ui.append(envelope.event.delta);
+      break;
+    case 'tool:start':
+      ui.showToolSpinner(envelope.event.call);
+      break;
+    case 'tool:end':
+      ui.hideToolSpinner(envelope.event.call);
+      break;
+    case 'done':
+      lastBookmark = envelope.bookmark;
+      break;
+  }
+}
+```
+
+**Best Practices**
+
+- Use **SSE/WebSocket** to push Progress to frontend.
+- Save `bookmark`/`cursor`, resume with `since` after disconnection.
+- UI only handles display; business logic (approval, governance) goes to Control/Monitor or Hooks.
+- Enable `exposeThinking` only when needed; keep it off by default to reduce noise.
+
+**Common Pitfalls**
+
+- Forgetting to consume `done` causes frontend to wait indefinitely.
+- Putting approval logic in Progress makes the system hard to extend.
+
+---
+
+## Control: Approval Plane
+
+Control handles moments requiring human decisions. Events are few but critical, typically persisted to approval systems.
+
+| Event | Description |
+|-------|-------------|
+| `permission_required` | Tool execution needs approval, includes `call` snapshot and `respond(decision, opts?)` callback. |
+| `permission_decided` | Approval result broadcast, includes `callId`, `decision`, `decidedBy`, `note`. |
+
+```typescript
+agent.on('permission_required', async (event) => {
+  const ticketId = await approvalStore.create({
+    agentId: agent.agentId,
+    callId: event.call.id,
+    tool: event.call.name,
+    preview: event.call.inputPreview,
+  });
+
+  // Give immediate default response, or wait for UI/approval flow
+  await event.respond('deny', { note: `Pending approval ticket ${ticketId}` });
+});
+```
+
+**Best Practices**
+
+- Combine template `permission.requireApprovalTools` with Hook `preToolUse` for approval strategy.
+- If approval needs user decision, save `event.call.id` and call `agent.decide(callId, 'allow' | 'deny', note)` later.
+- Re-bind Control event listeners after Resume.
+
+**Common Pitfalls**
+
+- Forgetting to handle `permission_required` causes tool to stay in `AWAITING_APPROVAL`.
+- Approval callback errors: `agent.decide` can only be called once, duplicate calls throw "Permission not pending".
+
+---
+
+## Monitor: Governance Plane
+
+Monitor is for platform governance, audit, and alerting. Pushes only when necessary, suitable for logs and metrics.
+
+| Event | Description |
+|-------|-------------|
+| `state_changed` | Agent state transition (READY / WORKING / PAUSED). |
+| `tool_executed` | Tool execution complete, includes duration, approval, audit info. |
+| `error` | Categorized error (`phase: model/tool/system`), with detailed context. |
+| `todo_changed` / `todo_reminder` | Todo lifecycle events. |
+| `file_changed` | FilePool detected external modification. |
+| `context_compression` | Context compression summary and ratio. |
+| `agent_resumed` | Resume complete, includes auto-sealed list. |
+| `tool_manual_updated` | Tool manual injected/refreshed. |
+
+```typescript
+agent.on('tool_executed', (event) => {
+  auditLogger.info({
+    agentId: agent.agentId,
+    tool: event.call.name,
+    durationMs: event.call.durationMs,
+    approval: event.call.approval,
+  });
+});
+
+agent.on('error', (event) => {
+  alerting.notify(`Agent ${agent.agentId} error`, {
+    phase: event.phase,
+    severity: event.severity,
+    detail: event.detail,
+  });
+});
+```
+
+**Best Practices**
+
+- Send Monitor events to logging/monitoring platforms for audit and SLA tracking.
+- On `file_changed`, auto-trigger reminders or scheduled tasks.
+- Log `agent_resumed` events for audit trail of auto-sealing.
+
+**Common Pitfalls**
+
+- Pushing Monitor directly to end users creates noise; filter on backend first.
+- Ignoring `severity` field mixes critical errors with informational messages.
+
+---
+
+## subscribe vs on: When to Use Which?
+
+- `agent.subscribe([...])` → **Ordered event stream**, ideal for frontend/SSE/WebSocket. Supports `{ since, kinds }` filtering. Returns `AsyncIterable`, remember to handle `done` and close connection.
+- `agent.on(type, handler)` → **Callback-style listener**, ideal for backend logic (approval, audit, alerting). Returns `unsubscribe` function, must re-bind after Resume.
+
+```typescript
+const stream = agent.subscribe(['progress', 'monitor']);
+const iterator = stream[Symbol.asyncIterator]();
+
+// Backend governance
+const off = agent.on('tool_executed', handler);
+// Call off() to unsubscribe when appropriate
+```
+
+> **Convention**: UI subscribes to Progress; approval systems listen to Control; governance/monitoring consumes Monitor. For other scenarios, use Hooks or built-in events, avoid custom polling.
+
+---
+
+## Debugging Tips
+
+- Enable `monitor.state_changed` logging to check if Agent is stuck at a breakpoint (e.g., `AWAITING_APPROVAL`).
+- Use `agent.status()` to view `lastSfpIndex`, `cursor`, `state` for debugging stalls.
+- Combine `EventBus.getTimeline()` (internal API) or Store event logs for replay.
+
+Master the three-channel mindset to build "collaborate like a colleague" Agent experiences.
diff --git a/docs/en/guides/multimodal.md b/docs/en/guides/multimodal.md
new file mode 100644
index 0000000..dc8aa10
--- /dev/null
+++ b/docs/en/guides/multimodal.md
@@ -0,0 +1,323 @@
+# Multimodal Content Guide
+
+KODE SDK supports multimodal input including images, audio, and files (PDF). This guide covers how to send multimodal content to LLM models and manage multimodal history.
+
+---
+
+## Supported Content Types
+
+| Type | Block Type | Supported Providers |
+|------|------------|---------------------|
+| Images | `image` | Anthropic, OpenAI, Gemini, GLM, Minimax |
+| PDF Files | `file` | Anthropic, OpenAI (Responses API), Gemini |
+| Audio | `audio` | OpenAI, Gemini |
+
+---
+
+## Sending Multimodal Content
+
+### Image Input
+
+Send images using `ContentBlock[]` with `agent.send()`:
+
+```typescript
+import { Agent, ContentBlock } from '@shareai-lab/kode-sdk';
+import * as fs from 'fs';
+
+// Read image as base64
+const imageBuffer = fs.readFileSync('./image.png');
+const base64 = imageBuffer.toString('base64');
+
+// Build content blocks
+const content: ContentBlock[] = [
+  { type: 'text', text: 'What animals are in this image?' },
+  { type: 'image', base64, mime_type: 'image/png' }
+];
+
+// Send to agent
+const response = await agent.send(content);
+```
+
+### URL-based Images
+
+You can also use URLs instead of base64:
+
+```typescript
+const content: ContentBlock[] = [
+  { type: 'text', text: 'Describe this image.' },
+  { type: 'image', url: 'https://example.com/image.jpg' }
+];
+
+const response = await agent.send(content);
+```
+
+### PDF File Input
+
+```typescript
+const pdfBuffer = fs.readFileSync('./document.pdf');
+const base64 = pdfBuffer.toString('base64');
+
+const content: ContentBlock[] = [
+  { type: 'text', text: 'Extract the main topics from this PDF.' },
+  { type: 'file', base64, mime_type: 'application/pdf', filename: 'document.pdf' }
+];
+
+const response = await agent.send(content);
+```
+
+---
+
+## Multimodal Configuration
+
+### Agent Configuration
+
+Configure multimodal behavior when creating an Agent:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'multimodal-assistant',
+  // Keep multimodal content in conversation history
+  multimodalContinuation: 'history',
+  // Keep recent 3 messages with multimodal content when compressing context
+  multimodalRetention: { keepRecent: 3 },
+}, deps);
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `multimodalContinuation` | `'history'` | `'history'` | Preserve multimodal content in conversation history |
+| `multimodalRetention.keepRecent` | `number` | `3` | Number of recent multimodal messages to keep during context compression |
+
+### Provider Configuration
+
+Configure multimodal options in the model configuration:
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined, // baseUrl
+  undefined, // proxyUrl
+  {
+    multimodal: {
+      mode: 'url+base64',           // Allow both URL and base64
+      maxBase64Bytes: 20_000_000,   // 20MB max for base64
+      allowMimeTypes: [             // Allowed MIME types
+        'image/jpeg',
+        'image/png',
+        'image/gif',
+        'image/webp',
+        'application/pdf',
+      ],
+    },
+  }
+);
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `mode` | `'url'` \| `'url+base64'` | `'url'` | URL handling mode |
+| `maxBase64Bytes` | `number` | `20000000` | Maximum size for base64 content |
+| `allowMimeTypes` | `string[]` | Common image + PDF types | Allowed MIME types |
+
+---
+
+## Supported MIME Types
+
+### Images
+
+| MIME Type | Extension | Notes |
+|-----------|-----------|-------|
+| `image/jpeg` | `.jpg`, `.jpeg` | All providers |
+| `image/png` | `.png` | All providers |
+| `image/webp` | `.webp` | All providers |
+| `image/gif` | `.gif` | Not supported by Gemini |
+
+### Documents
+
+| MIME Type | Extension | Notes |
+|-----------|-----------|-------|
+| `application/pdf` | `.pdf` | Anthropic, OpenAI (Responses API), Gemini |
+
+---
+
+## Provider-Specific Notes
+
+### Anthropic
+
+- Supports images and PDF files
+- Use `files-api-2025-04-14` beta for file uploads
+- Base64 images embedded directly in messages
+
+```typescript
+const provider = new AnthropicProvider(apiKey, model, baseUrl, proxyUrl, {
+  beta: {
+    filesApi: true,  // Enable Files API
+  },
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+### OpenAI
+
+- Images: Supported in Chat Completions API
+- PDF/Files: Requires Responses API (`openaiApi: 'responses'`)
+
+```typescript
+const provider = new OpenAIProvider(apiKey, model, baseUrl, proxyUrl, {
+  api: 'responses',  // Required for PDF support
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+### Gemini
+
+- Supports images and PDF files
+- GIF format not supported
+- Use `mediaResolution` option for image quality
+
+```typescript
+const provider = new GeminiProvider(apiKey, model, baseUrl, proxyUrl, {
+  mediaResolution: 'high',  // 'low' | 'medium' | 'high'
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+---
+
+## Best Practices
+
+### 1. Use Appropriate Image Sizes
+
+Large images increase token usage and latency. Resize images before sending:
+
+```typescript
+// Recommendation: Keep images under 1MB for optimal performance
+const maxBytes = 1024 * 1024; // 1MB
+
+function validateImageSize(base64: string): boolean {
+  const bytes = Math.ceil(base64.length * 3 / 4);
+  return bytes <= maxBytes;
+}
+```
+
+### 2. Handle Multimodal Context Retention
+
+For long conversations with many images, configure retention to avoid context overflow:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'vision-assistant',
+  multimodalRetention: { keepRecent: 2 },  // Keep only recent 2 images
+  context: {
+    maxTokens: 100_000,
+    compressToTokens: 60_000,
+  },
+}, deps);
+```
+
+### 3. Validate MIME Types
+
+Always validate MIME types before sending:
+
+```typescript
+const ALLOWED_IMAGE_TYPES = ['image/jpeg', 'image/png', 'image/webp'];
+
+function getImageMimeType(filename: string): string {
+  const ext = filename.toLowerCase().split('.').pop();
+  const mimeMap: Record<string, string> = {
+    jpg: 'image/jpeg',
+    jpeg: 'image/jpeg',
+    png: 'image/png',
+    webp: 'image/webp',
+  };
+  const mimeType = mimeMap[ext!];
+  if (!mimeType || !ALLOWED_IMAGE_TYPES.includes(mimeType)) {
+    throw new Error(`Unsupported image type: ${ext}`);
+  }
+  return mimeType;
+}
+```
+
+---
+
+## Error Handling
+
+Common multimodal errors:
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `MultimodalValidationError: Base64 is not allowed` | `mode` set to `'url'` only | Set `mode: 'url+base64'` |
+| `MultimodalValidationError: base64 payload too large` | Exceeds `maxBase64Bytes` | Resize image or increase limit |
+| `MultimodalValidationError: mime_type not allowed` | MIME type not in allowlist | Add to `allowMimeTypes` |
+| `MultimodalValidationError: Missing url/file_id/base64` | No content source provided | Provide `url`, `file_id`, or `base64` |
+
+---
+
+## Complete Example
+
+```typescript
+import { Agent, AnthropicProvider, JSONStore, ContentBlock } from '@shareai-lab/kode-sdk';
+import * as fs from 'fs';
+
+async function analyzeImage() {
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    'claude-sonnet-4-20250514',
+    undefined,
+    undefined,
+    {
+      multimodal: {
+        mode: 'url+base64',
+        maxBase64Bytes: 10_000_000,
+      },
+    }
+  );
+
+  const store = new JSONStore('./.kode');
+
+  const agent = await Agent.create({
+    templateId: 'vision-assistant',
+    multimodalContinuation: 'history',
+    multimodalRetention: { keepRecent: 3 },
+  }, {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    modelFactory: () => provider,
+  });
+
+  // Read and send image
+  const imageBuffer = fs.readFileSync('./photo.jpg');
+  const base64 = imageBuffer.toString('base64');
+
+  const content: ContentBlock[] = [
+    { type: 'text', text: 'What objects are in this photo?' },
+    { type: 'image', base64, mime_type: 'image/jpeg' }
+  ];
+
+  for await (const envelope of agent.subscribe(['progress'])) {
+    if (envelope.event.type === 'text_chunk') {
+      process.stdout.write(envelope.event.delta);
+    }
+    if (envelope.event.type === 'done') break;
+  }
+
+  await agent.send(content);
+}
+```
+
+---
+
+## References
+
+- [Provider Guide](./providers.md) - Provider-specific configuration
+- [Events Guide](./events.md) - Progress event handling
+- [API Reference](../reference/api.md) - ContentBlock types
diff --git a/docs/en/guides/providers.md b/docs/en/guides/providers.md
new file mode 100644
index 0000000..e45779a
--- /dev/null
+++ b/docs/en/guides/providers.md
@@ -0,0 +1,409 @@
+# Provider Configuration Guide
+
+KODE SDK provides three built-in Provider implementations that support any model service conforming to the corresponding API protocol.
+
+---
+
+## Built-in Providers
+
+| Provider | API Protocol | Compatible Services |
+|----------|--------------|---------------------|
+| `AnthropicProvider` | Anthropic Messages API | Anthropic, compatible services |
+| `OpenAIProvider` | OpenAI Chat/Responses API | OpenAI, DeepSeek, GLM, Qwen, Minimax, OpenRouter, etc. |
+| `GeminiProvider` | Google Generative AI API | Google Gemini |
+
+> **Note**: Any service with a compatible API protocol can use the corresponding Provider. For example, DeepSeek, GLM, Qwen, etc. all use OpenAI-compatible APIs and can be used via `OpenAIProvider` with a custom `baseURL`.
+
+---
+
+## Environment Variables
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # optional
+export OPENAI_API_KEY=sk-...
+export OPENAI_BASE_URL=https://api.openai.com/v1  # optional
+export GOOGLE_API_KEY=...
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-ant-..."
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # optional
+$env:OPENAI_API_KEY="sk-..."
+$env:OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
+$env:GOOGLE_API_KEY="..."
+```
+<!-- tabs:end -->
+
+---
+
+## AnthropicProvider
+
+For Anthropic Claude models and services compatible with the Anthropic API.
+
+### Basic Configuration
+
+```typescript
+import { AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',  // any supported model ID
+  process.env.ANTHROPIC_BASE_URL  // optional, default: https://api.anthropic.com
+);
+```
+
+### Enable Extended Thinking
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,
+  undefined,
+  {
+    extraBody: {
+      thinking: {
+        type: 'enabled',
+        budget_tokens: 10000,  // minimum 1024
+      },
+    },
+  }
+);
+```
+
+### Enable Caching
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,
+  undefined,
+  {
+    cache: {
+      breakpoints: 4,  // 1-4 cache breakpoints
+      defaultTtl: '1h', // '5m' or '1h'
+    },
+    beta: {
+      extendedCacheTtl: true,
+    },
+  }
+);
+```
+
+### Example Models
+
+The following are common model examples. Any model compatible with the Anthropic API is supported:
+
+| Model | Description |
+|-------|-------------|
+| `claude-sonnet-4-5-20250929` | Claude 4.5 Sonnet (recommended) |
+| `claude-opus-4-5-20251101` | Claude 4.5 Opus |
+| `claude-haiku-4-5-20251015` | Claude 4.5 Haiku (fast, low-cost) |
+
+---
+
+## OpenAIProvider
+
+For OpenAI and all OpenAI API-compatible services (DeepSeek, GLM, Qwen, Minimax, OpenRouter, etc.).
+
+### Basic Configuration
+
+```typescript
+import { OpenAIProvider } from '@shareai-lab/kode-sdk';
+
+// OpenAI official
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'gpt-5-2025-08-07',  // any supported model ID
+  process.env.OPENAI_BASE_URL  // optional, default: https://api.openai.com/v1
+);
+```
+
+### Using DeepSeek
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-chat',
+  'https://api.deepseek.com/v1'
+);
+
+// DeepSeek reasoning model
+const reasonerProvider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-reasoner',
+  'https://api.deepseek.com/v1',
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      stripFromHistory: true,
+    },
+  }
+);
+```
+
+### Using GLM (Zhipu)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.GLM_API_KEY!,
+  'glm-4-plus',
+  'https://open.bigmodel.cn/api/paas/v4'
+);
+```
+
+### Using Qwen (Tongyi Qianwen)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.QWEN_API_KEY!,
+  'qwen-plus',
+  'https://dashscope.aliyuncs.com/compatible-mode/v1'
+);
+```
+
+### Using Minimax
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.MINIMAX_API_KEY!,
+  'abab6.5s-chat',
+  'https://api.minimax.chat/v1'
+);
+```
+
+### Using OpenRouter
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENROUTER_API_KEY!,
+  'anthropic/claude-sonnet-4.5',  // OpenRouter model format
+  'https://openrouter.ai/api/v1'
+);
+```
+
+### Enable Reasoning (o4 models)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'o4-mini',
+  undefined,
+  undefined,
+  {
+    api: 'responses',
+    responses: {
+      reasoning: {
+        effort: 'medium',  // 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+      },
+    },
+  }
+);
+```
+
+### Example Models
+
+The following are common model examples. Any model compatible with the OpenAI API is supported:
+
+| Service | Example Models |
+|---------|----------------|
+| OpenAI | `gpt-5.2-pro-2025-12-11`, `gpt-5-2025-08-07`, `o4-mini-2025-04-16` |
+| DeepSeek | `deepseek-chat`, `deepseek-reasoner` |
+| GLM | `glm-4-plus`, `glm-4-flash` |
+| Qwen | `qwen-plus`, `qwen-turbo` |
+| OpenRouter | `anthropic/claude-sonnet-4.5`, `openai/gpt-5` |
+
+---
+
+## GeminiProvider
+
+For Google Gemini models.
+
+### Basic Configuration
+
+```typescript
+import { GeminiProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-3-flash'  // any supported model ID
+);
+```
+
+### Enable Thinking
+
+```typescript
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-2.5-pro',
+  undefined,
+  undefined,
+  {
+    thinking: {
+      level: 'medium',  // 'minimal' | 'low' | 'medium' | 'high'
+      includeThoughts: true,
+    },
+  }
+);
+```
+
+### Example Models
+
+The following are common model examples. Any model compatible with the Gemini API is supported:
+
+| Model | Description |
+|-------|-------------|
+| `gemini-3-flash` | Gemini 3 Flash (latest, recommended) |
+| `gemini-2.5-pro` | Gemini 2.5 Pro (stable, supports thinking) |
+| `gemini-2.5-flash` | Gemini 2.5 Flash (stable) |
+
+---
+
+## Using with Agent
+
+### Provider Factory Pattern
+
+```typescript
+import { Agent, AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const agent = await Agent.create(
+  {
+    templateId: 'default',
+    sandbox: { kind: 'local', workDir: './workspace' },
+  },
+  {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    // Simple factory - ignores config, uses env vars
+    modelFactory: () => new AnthropicProvider(
+      process.env.ANTHROPIC_API_KEY!,
+      process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-5-20250929'
+    ),
+  }
+);
+```
+
+### Using ModelConfig from Template
+
+The `modelFactory` receives a `ModelConfig` object that may include the model ID from the template:
+
+```typescript
+// Template with model specification
+templates.register({
+  id: 'gpt-assistant',
+  systemPrompt: 'You are a helpful assistant.',
+  model: 'gpt-4o',  // This is passed to modelFactory
+});
+
+// Factory that uses the config
+modelFactory: (config: ModelConfig) => {
+  const modelId = config.model ?? 'claude-sonnet-4-5-20250929';
+  return new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    modelId
+  );
+}
+```
+
+### Multi-Provider Factory
+
+For applications supporting multiple providers, create a factory that selects based on config:
+
+```typescript
+function createModelFactory(): (config: ModelConfig) => ModelProvider {
+  return (config: ModelConfig) => {
+    // Use config.provider or infer from model name
+    const provider = config.provider ?? inferProvider(config.model);
+
+    switch (provider) {
+      case 'anthropic':
+        return new AnthropicProvider(
+          config.apiKey ?? process.env.ANTHROPIC_API_KEY!,
+          config.model ?? 'claude-sonnet-4-5-20250929',
+          config.baseUrl,
+          config.proxyUrl
+        );
+      case 'openai':
+        return new OpenAIProvider(
+          config.apiKey ?? process.env.OPENAI_API_KEY!,
+          config.model ?? 'gpt-4o',
+          config.baseUrl,
+          config.proxyUrl
+        );
+      case 'gemini':
+        return new GeminiProvider(
+          config.apiKey ?? process.env.GOOGLE_API_KEY!,
+          config.model ?? 'gemini-3-flash'
+        );
+      default:
+        throw new Error(`Unknown provider: ${provider}`);
+    }
+  };
+}
+
+function inferProvider(model?: string): string {
+  if (!model) return 'anthropic';
+  if (model.startsWith('claude')) return 'anthropic';
+  if (model.startsWith('gpt')) return 'openai';
+  if (model.startsWith('gemini')) return 'gemini';
+  return 'anthropic';
+}
+```
+
+---
+
+## Proxy Configuration
+
+All Providers support proxy configuration:
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,  // baseUrl
+  process.env.HTTPS_PROXY  // proxyUrl
+);
+```
+
+---
+
+## Error Handling
+
+```typescript
+try {
+  await agent.send('Hello');
+} catch (error) {
+  if (error.message.includes('rate limit')) {
+    // Rate limited, retry after delay
+  } else if (error.message.includes('authentication')) {
+    // Invalid API key
+  }
+}
+```
+
+---
+
+## Best Practices
+
+1. **Use environment variables** for API keys and baseURL
+2. **Set reasonable timeouts** based on expected response times
+3. **Enable caching** for repeated prompts (Anthropic, Gemini)
+4. **Handle rate limits** with exponential backoff
+
+---
+
+## References
+
+- [Anthropic API Documentation](https://docs.anthropic.com/)
+- [OpenAI API Documentation](https://platform.openai.com/docs/)
+- [Google AI Documentation](https://ai.google.dev/docs)
+- [DeepSeek API Documentation](https://platform.deepseek.com/docs)
+- [OpenRouter Documentation](https://openrouter.ai/docs)
diff --git a/docs/en/guides/resume-fork.md b/docs/en/guides/resume-fork.md
new file mode 100644
index 0000000..8f454fb
--- /dev/null
+++ b/docs/en/guides/resume-fork.md
@@ -0,0 +1,239 @@
+# Resume / Fork Guide
+
+Long-running Agents must have the ability to "resume anytime, fork, and audit". KODE SDK implements a unified persistence protocol at the kernel level (messages, tool calls, Todo, events, breakpoints, lineage).
+
+---
+
+## Key Concepts
+
+| Concept | Description |
+|---------|-------------|
+| **Metadata** | Serializes template, tool descriptors, permissions, Todo, sandbox config, breakpoints, lineage |
+| **Safe-Fork-Point (SFP)** | Every user message or tool result creates a recoverable node for snapshot/fork |
+| **BreakpointState** | Marks current execution phase (`READY` → `PRE_MODEL` → ... → `POST_TOOL`) |
+| **Auto-Seal** | When crash occurs during tool execution, Resume auto-seals with `tool_result` |
+
+---
+
+## Resume Methods
+
+### Method 1: Explicit Configuration
+
+```typescript
+import { Agent } from '@shareai-lab/kode-sdk';
+
+const agent = await Agent.resume('agt-demo', {
+  templateId: 'repo-assistant',
+  modelConfig: {
+    provider: 'anthropic',
+    model: process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+    apiKey: process.env.ANTHROPIC_API_KEY!,
+  },
+  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+}, deps, {
+  strategy: 'crash',  // Auto-seal incomplete tools
+  autoRun: true,      // Continue processing queue after resume
+});
+```
+
+### Method 2: Resume from Store (Recommended)
+
+```typescript
+const agent = await Agent.resumeFromStore('agt-demo', deps, {
+  overrides: {
+    modelConfig: {
+      provider: 'anthropic',
+      model: process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+      apiKey: process.env.ANTHROPIC_API_KEY!,
+    },
+  },
+});
+```
+
+### Resume Options
+
+| Option | Values | Description |
+|--------|--------|-------------|
+| `strategy` | `'manual'` \| `'crash'` | `crash` auto-seals incomplete tools |
+| `autoRun` | `boolean` | Continue processing message queue after resume |
+| `overrides` | `Partial<AgentConfig>` | Override metadata (model upgrade, permission changes, etc.) |
+
+> **Important**: You **must** re-bind event listeners after Resume (Control/Monitor callbacks are not auto-restored).
+
+---
+
+## SDK vs Application Responsibilities
+
+| Capability | SDK | Application |
+|------------|-----|-------------|
+| Template, tools, sandbox restore | Auto-rebuild | Not needed |
+| Messages, tool records, Todo, Lineage | Auto-load | Not needed |
+| FilePool watching | Auto-restore | Not needed |
+| Hooks | Auto-register | Not needed |
+| Control/Monitor listeners | Not handled | Must re-bind after Resume |
+| Approval flows, alerts | Not handled | Integrate with business systems |
+| Dependency singleton management | Not handled | Ensure `store`/`registry` global reuse |
+
+---
+
+## Snapshot and Fork
+
+### Creating Snapshots
+
+```typescript
+// Create snapshot at current point
+const bookmarkId = await agent.snapshot('pre-release-audit');
+```
+
+### Forking an Agent
+
+```typescript
+// Fork from a snapshot
+const forked = await agent.fork(bookmarkId);
+
+// Fork from latest point
+const forked2 = await agent.fork();
+
+// Use forked Agent
+await forked.send('This is a new task forked from the original conversation.');
+```
+
+- `snapshot(label?)` returns `SnapshotId` (default: `sfp-{index}`)
+- `fork(sel?)` creates new Agent: inherits tools/permissions/lineage, copies messages to new Store namespace
+- Forked Agent needs independent event binding
+
+---
+
+## Auto-Seal Mechanism
+
+When crash occurs during these phases, Resume auto-writes compensating `tool_result`:
+
+| Phase | Seal Info | Recommended Action |
+|-------|-----------|-------------------|
+| `PENDING` | Tool not executed | Validate params and retry |
+| `APPROVAL_REQUIRED` | Waiting for approval | Re-trigger approval or manually complete |
+| `APPROVED` | Ready to execute | Confirm input still valid and retry |
+| `EXECUTING` | Execution interrupted | Check side effects, manual confirm if needed |
+
+Auto-seal triggers:
+
+- `monitor.agent_resumed`: Contains `sealed` list and `strategy`
+- `progress.tool:end`: Adds failed `tool_result` with `recommendations`
+
+---
+
+## Re-binding Events After Resume
+
+```typescript
+const agent = await Agent.resumeFromStore('agt-demo', deps);
+
+// Re-bind Control/Monitor event listeners
+agent.on('tool_executed', (event) => {
+  console.log('Tool executed:', event.call.name);
+});
+
+agent.on('error', (event) => {
+  console.error('Error:', event.message);
+});
+
+agent.on('permission_required', async (event) => {
+  await event.respond('allow');
+});
+
+// For Progress events, use subscribe()
+const progressSubscription = (async () => {
+  for await (const envelope of agent.subscribe(['progress'])) {
+    if (envelope.event.type === 'text_chunk') {
+      process.stdout.write(envelope.event.delta);
+    }
+    if (envelope.event.type === 'done') break;
+  }
+})();
+
+// Continue processing
+await agent.run();
+await progressSubscription;
+```
+
+---
+
+## Multi-Instance / Serverless Best Practices
+
+1. **Singleton Dependencies**: Create `AgentDependencies` at module level to avoid multiple instances writing to same Store directory
+
+2. **Event Re-binding**: Call event binding immediately after every `resume`
+
+3. **Concurrency Control**: Same AgentId should only run in single instance; use external locks or queues
+
+4. **Persistence Directory**: `JSONStore` works for single-machine or shared disk environments. For distributed deployments, implement custom Store (e.g., S3 + DynamoDB)
+
+5. **Observability**: Listen to `monitor.state_changed` and `monitor.error` for quick issue identification
+
+---
+
+## Troubleshooting
+
+| Symptom | Investigation |
+|---------|--------------|
+| `AGENT_NOT_FOUND` on Resume | Store directory missing or not persisted. Check `store.baseDir` mount |
+| `TEMPLATE_NOT_FOUND` on Resume | Template not registered at startup; ensure template ID matches metadata |
+| Missing tools | ToolRegistry not registered; built-in tools need manual registration |
+| FilePool not restored | Custom Sandbox not implementing `watchFiles`; disable watch or complete implementation |
+| Event listeners not working | Not calling `agent.on(...)` after Resume |
+
+---
+
+## Complete Resume Example
+
+```typescript
+import { Agent, createExtendedStore } from '@shareai-lab/kode-sdk';
+
+async function resumeAgent(agentId: string) {
+  const store = await createExtendedStore();
+  const deps = createDependencies({ store });
+
+  // Check if Agent exists
+  const exists = await store.exists(agentId);
+  if (!exists) {
+    throw new Error(`Agent ${agentId} not found`);
+  }
+
+  // Resume from store
+  const agent = await Agent.resumeFromStore(agentId, deps, {
+    strategy: 'crash',
+    autoRun: false,
+  });
+
+  // Re-bind Monitor event listeners (on() only supports Control/Monitor events)
+  agent.on('tool_executed', (e) => console.log('Tool:', e.call.name));
+  agent.on('agent_resumed', (e) => {
+    if (e.sealed.length > 0) {
+      console.log('Auto-sealed tools:', e.sealed);
+    }
+  });
+  agent.on('error', (e) => console.error('Error:', e.message));
+
+  // For Progress events, use subscribe()
+  const progressTask = (async () => {
+    for await (const env of agent.subscribe(['progress'])) {
+      if (env.event.type === 'text_chunk') {
+        process.stdout.write(env.event.delta);
+      }
+      if (env.event.type === 'done') break;
+    }
+  })();
+
+  // Continue processing
+  await agent.run();
+
+  return agent;
+}
+```
+
+---
+
+## References
+
+- [Events Guide](./events.md)
+- [Error Handling Guide](./error-handling.md)
+- [Database Guide](./database.md)
diff --git a/docs/en/guides/skills.md b/docs/en/guides/skills.md
new file mode 100644
index 0000000..fcd59c0
--- /dev/null
+++ b/docs/en/guides/skills.md
@@ -0,0 +1,329 @@
+# Skills System Guide
+
+KODE SDK provides a complete Skills system supporting modular, reusable capability units that allow Agents to dynamically load and execute specific skills.
+
+---
+
+## Core Features
+
+| Feature | Description |
+|---------|-------------|
+| **Hot Reload** | Skills auto-reload when code changes |
+| **Metadata Injection** | Auto-inject skill descriptions into system prompt |
+| **Sandbox Isolation** | Each skill has independent file system space |
+| **Whitelist Filter** | Selectively load specific skills |
+
+---
+
+## Directory Structure
+
+```
+skills/
+├── skill-name/              # Skill directory
+│   ├── SKILL.md            # Skill definition (required)
+│   ├── metadata.json       # Skill metadata (optional)
+│   ├── references/         # Reference documents
+│   ├── scripts/            # Executable scripts
+│   └── assets/             # Static resources
+└── .archived/              # Archived skills
+    └── archived-skill/
+```
+
+### SKILL.md Format
+
+```markdown
+<!-- skill: skill-name -->
+<!-- version: 1.0.0 -->
+<!-- author: Your Name -->
+
+# Skill Name
+
+Brief description of the skill's functionality.
+
+## Use Cases
+
+- Case 1
+- Case 2
+
+## Usage Guide
+
+Detailed instructions for using this skill...
+```
+
+### metadata.json Format
+
+```json
+{
+  "name": "skill-name",
+  "description": "Skill description",
+  "version": "1.0.0",
+  "author": "Author",
+  "baseDir": "/path/to/skill"
+}
+```
+
+---
+
+## Environment Variables
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export SKILLS_DIR=/path/to/skills
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:SKILLS_DIR="/path/to/skills"
+```
+
+#### **Windows (CMD)**
+```cmd
+set SKILLS_DIR=/path/to/skills
+```
+<!-- tabs:end -->
+
+---
+
+## SkillsManager (Agent Runtime)
+
+SkillsManager is used at Agent runtime for hot updates and dynamic loading.
+
+### Basic Usage
+
+```typescript
+import { SkillsManager } from '@shareai-lab/kode-sdk';
+
+// Create Skills manager
+const skillsManager = new SkillsManager(
+  './skills',           // Skills directory path
+  ['skill1', 'skill2']  // Optional: whitelist
+);
+
+// Scan all skills
+const skills = await skillsManager.getSkillsMetadata();
+console.log(`Found ${skills.length} skills`);
+
+// Load specific skill content
+const skillContent = await skillsManager.loadSkillContent('skill-name');
+if (skillContent) {
+  console.log('Metadata:', skillContent.metadata);
+  console.log('Content:', skillContent.content);
+  console.log('References:', skillContent.references);
+  console.log('Scripts:', skillContent.scripts);
+}
+```
+
+### Hot Reload
+
+SkillsManager rescans the file system on each call to ensure fresh data:
+
+```typescript
+await skillsManager.getSkillsMetadata();  // Scan 1
+// ... modify files ...
+await skillsManager.getSkillsMetadata();  // Scan 2, gets latest data
+```
+
+### Whitelist Filtering
+
+Limit Agent to only load specific skills:
+
+```typescript
+// Only load whitelisted skills
+const manager = new SkillsManager('./skills', ['allowed-skill-1', 'allowed-skill-2']);
+const skills = await manager.getSkillsMetadata();
+// Returns only whitelisted skills
+```
+
+---
+
+## SkillsManagementManager (CRUD Operations)
+
+SkillsManagementManager provides skill CRUD operations including create, update, and archive.
+
+### Basic Operations
+
+```typescript
+import { SkillsManagementManager } from '@shareai-lab/kode-sdk';
+
+const manager = new SkillsManagementManager('./skills');
+
+// List all online skills
+const skills = await manager.listSkills();
+
+// Get skill details
+const skillDetail = await manager.getSkillInfo('skill-name');
+
+// Create new skill
+await manager.createSkill('new-skill', {
+  description: 'New skill description',
+  content: '# New Skill\n\nDetailed content...'
+});
+
+// Update skill
+await manager.updateSkill('skill-name', {
+  content: '# Updated content'
+});
+
+// Delete skill (move to archive)
+await manager.deleteSkill('skill-name');
+
+// List archived skills
+const archived = await manager.listArchivedSkills();
+
+// Restore archived skill
+await manager.restoreSkill('archived-skill');
+```
+
+### File Operations
+
+```typescript
+// Get skill file tree
+const files = await manager.getSkillFileTree('skill-name');
+
+// Read skill file
+const content = await manager.readSkillFile('skill-name', 'SKILL.md');
+
+// Write skill file
+await manager.writeSkillFile('skill-name', 'references/doc.md', 'content');
+
+// Delete skill file
+await manager.deleteSkillFile('skill-name', 'references/old-doc.md');
+
+// Upload file to skill directory
+await manager.uploadSkillFile('skill-name', 'assets/image.png', fileBuffer);
+```
+
+---
+
+## Agent Integration
+
+### Register Skills Tool
+
+```typescript
+import { Agent, createSkillsTool, SkillsManager } from '@shareai-lab/kode-sdk';
+
+const deps = createDependencies();
+
+// Create Skills manager
+const skillsManager = new SkillsManager('./skills');
+
+// Register Skills tool
+const skillsTool = createSkillsTool(skillsManager);
+deps.toolRegistry.register('skills', () => skillsTool);
+
+// Create Agent
+const agent = await Agent.create({
+  templateId: 'my-agent',
+  tools: ['skills', 'fs_read', 'fs_write'],
+}, deps);
+```
+
+### Skills Tool Usage
+
+Agent can dynamically load skills via the `skills` tool:
+
+```
+User: I need to format code
+
+Agent: Let me load the code formatting skill.
+
+[Calls skills tool, action=load, skill_name=code-formatter]
+
+Agent: Code formatting skill loaded. Now I can help you format code.
+```
+
+---
+
+## Best Practices
+
+### 1. Skill Design Principles
+
+- **Single Responsibility**: Each skill does one thing
+- **Composable**: Skills can call each other
+- **Well Documented**: Provide clear usage instructions
+- **Version Control**: Use semantic versioning
+
+### 2. Whitelist Management
+
+```typescript
+// Production: use whitelist
+const allowedSkills = ['safe-skill-1', 'safe-skill-2'];
+const manager = new SkillsManager('./skills', allowedSkills);
+
+// Development: load all skills
+const devManager = new SkillsManager('./skills');
+```
+
+### 3. Error Handling
+
+```typescript
+const content = await skillsManager.loadSkillContent('skill-name');
+if (!content) {
+  console.error('Skill not found or failed to load');
+  // Fallback handling
+}
+```
+
+---
+
+## Monitoring
+
+### Monitor Events
+
+```typescript
+// Listen to skill tool calls
+agent.on('tool_executed', (event) => {
+  if (event.call.name === 'skills') {
+    console.log('Skill loaded:', event.call.input.skill_name);
+  }
+});
+
+// Listen to tool manual updates
+agent.on('tool_manual_updated', (event) => {
+  console.log('Tools manual updated:', event.tools);
+});
+```
+
+---
+
+## Troubleshooting
+
+### Common Issues
+
+**Skill not found**
+- Check skills directory path
+- Confirm SKILL.md file exists
+- Check whitelist configuration
+
+**Hot reload not working**
+- Confirm file saved successfully
+- Check file system permissions
+- Review logs for scan timing
+
+**Sandbox permission error**
+- Check sandbox work directory configuration
+- Confirm file path is within allowed range
+- Check sandbox logs
+
+### Debug Tips
+
+```typescript
+// Enable verbose logging
+process.env.LOG_LEVEL = 'debug';
+
+// Check skill metadata
+console.log(JSON.stringify(skills, null, 2));
+
+// Verify skills directory
+const fs = require('fs');
+console.log(fs.readdirSync('./skills'));
+```
+
+---
+
+## References
+
+- [Tools Guide](./tools.md)
+- [Events Guide](./events.md)
+- [API Reference](../reference/api.md)
diff --git a/docs/en/guides/thinking.md b/docs/en/guides/thinking.md
new file mode 100644
index 0000000..c7a17a3
--- /dev/null
+++ b/docs/en/guides/thinking.md
@@ -0,0 +1,463 @@
+# Extended Thinking Guide
+
+KODE SDK supports extended thinking (also known as reasoning or chain-of-thought) features from various LLM providers. This guide covers how to enable, configure, and use thinking capabilities including interleaved thinking.
+
+---
+
+## Overview
+
+Extended thinking allows models to "think" through complex problems step-by-step before providing a final answer. Different providers implement this differently:
+
+| Provider | Feature Name | Implementation |
+|----------|--------------|----------------|
+| Anthropic | Extended Thinking | `thinking` blocks with budget tokens |
+| OpenAI | Reasoning | `reasoning_effort` parameter |
+| Gemini | Thinking | `thinkingLevel` parameter |
+| DeepSeek | Deep Think | `reasoning_content` field |
+| GLM | Thinking | `reasoning_content` field |
+| Minimax | Reasoning | `reasoning_details` field |
+
+---
+
+## Agent Configuration
+
+### Enable Thinking Exposure
+
+Configure thinking exposure when creating an Agent:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'reasoning-assistant',
+  // Expose thinking events to Progress channel
+  exposeThinking: true,
+  // Retain thinking blocks in message history
+  retainThinking: true,
+}, deps);
+```
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `exposeThinking` | `boolean` | `false` | Emit `think_chunk_start`, `think_chunk`, `think_chunk_end` events |
+| `retainThinking` | `boolean` | `false` | Persist reasoning blocks in message history |
+
+---
+
+## Provider Configuration
+
+### Anthropic Extended Thinking
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined,
+  undefined,
+  {
+    // Enable extended thinking
+    extraBody: {
+      thinking: {
+        type: 'enabled',
+        budget_tokens: 10000,  // Minimum 1024
+      },
+    },
+    // How to transport reasoning in history
+    reasoningTransport: 'provider',  // 'provider' | 'text' | 'omit'
+    // Enable interleaved thinking beta
+    beta: {
+      interleavedThinking: true,  // interleaved-thinking-2025-05-14
+    },
+  }
+);
+```
+
+### OpenAI Reasoning
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'o3-mini',
+  undefined,
+  undefined,
+  {
+    api: 'responses',  // Responses API required for reasoning
+    responses: {
+      reasoning: {
+        effort: 'medium',  // 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+      },
+    },
+    reasoningTransport: 'text',
+  }
+);
+```
+
+### Gemini Thinking
+
+```typescript
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-2.5-pro',
+  undefined,
+  undefined,
+  {
+    thinking: {
+      level: 'medium',  // 'minimal' | 'low' | 'medium' | 'high'
+      includeThoughts: true,
+    },
+    reasoningTransport: 'text',
+  }
+);
+```
+
+### DeepSeek / GLM / Qwen
+
+These providers use OpenAI-compatible API with custom reasoning fields:
+
+```typescript
+// DeepSeek
+const provider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-reasoner',
+  'https://api.deepseek.com/v1',
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      stripFromHistory: true,  // Required for DeepSeek
+    },
+    reasoningTransport: 'text',
+  }
+);
+
+// GLM
+const provider = new OpenAIProvider(
+  process.env.GLM_API_KEY!,
+  'glm-zero-preview',
+  process.env.GLM_BASE_URL!,
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      requestParams: {
+        thinking: { type: 'enabled', clear_thinking: false },
+      },
+    },
+    reasoningTransport: 'provider',
+  }
+);
+```
+
+---
+
+## Reasoning Transport
+
+The `reasoningTransport` option controls how thinking content is handled in message history:
+
+| Value | Behavior | Use Case |
+|-------|----------|----------|
+| `'provider'` | Keep as native `reasoning` blocks | Full thinking preservation, multi-turn continuity |
+| `'text'` | Wrap in `<think></think>` tags | Cross-provider compatibility |
+| `'omit'` | Remove from history | Save tokens, privacy |
+
+```typescript
+// Provider native format
+const config = {
+  reasoningTransport: 'provider',  // { type: 'reasoning', reasoning: '...' }
+};
+
+// Text format
+const config = {
+  reasoningTransport: 'text',  // { type: 'text', text: '<think>...</think>' }
+};
+
+// Omit from history
+const config = {
+  reasoningTransport: 'omit',  // Thinking blocks removed
+};
+```
+
+---
+
+## Interleaved Thinking
+
+Interleaved thinking allows the model to think between tool calls, enabling more sophisticated reasoning:
+
+```
+User: Search for X, then summarize
+Model: <thinking> Let me search for X first... </thinking>
+Model: [tool_use: search_tool]
+[tool_result]
+Model: <thinking> Got results, now I should summarize... </thinking>
+Model: [tool_use: summarize_tool]
+[tool_result]
+Model: <thinking> Combining everything... </thinking>
+Model: Here's the summary...
+```
+
+### Enable Interleaved Thinking
+
+```typescript
+// Anthropic with interleaved thinking
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined,
+  undefined,
+  {
+    extraBody: {
+      thinking: { type: 'enabled', budget_tokens: 10000 },
+    },
+    beta: {
+      interleavedThinking: true,
+    },
+    reasoningTransport: 'provider',
+  }
+);
+
+const agent = await Agent.create({
+  templateId: 'reasoning-agent',
+  exposeThinking: true,
+  retainThinking: true,
+}, deps);
+```
+
+---
+
+## Thinking Events
+
+When `exposeThinking: true`, thinking events are emitted to the Progress channel:
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'think_chunk_start':
+      // Thinking block started
+      console.log('[Thinking...]');
+      break;
+
+    case 'think_chunk':
+      // Thinking content delta
+      process.stdout.write(envelope.event.delta);
+      break;
+
+    case 'think_chunk_end':
+      // Thinking block ended
+      console.log('[/Thinking]');
+      break;
+
+    case 'tool:start':
+      console.log(`[Tool: ${envelope.event.call.name}]`);
+      break;
+
+    case 'text_chunk':
+      process.stdout.write(envelope.event.delta);
+      break;
+
+    case 'done':
+      break;
+  }
+}
+```
+
+### Event Sequence
+
+Typical interleaved thinking sequence:
+
+```
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> tool:start -> tool:end
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> tool:start -> tool:end
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> text_chunk_start -> text_chunk (x N) -> text_chunk_end
+  -> done
+```
+
+---
+
+## ThinkingOptions
+
+Configure thinking via `CompletionOptions.thinking`:
+
+```typescript
+interface ThinkingOptions {
+  enabled?: boolean;          // Enable thinking mode
+  budgetTokens?: number;      // Token budget (Anthropic, Gemini 2.5)
+  effort?: 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';  // OpenAI
+  level?: 'minimal' | 'low' | 'medium' | 'high';  // Gemini 3.x
+}
+```
+
+---
+
+## Best Practices
+
+### 1. Choose Appropriate Budget
+
+Higher budget = more thorough thinking but slower and more expensive:
+
+```typescript
+// Quick tasks: lower budget
+const quickThinking = { type: 'enabled', budget_tokens: 2000 };
+
+// Complex reasoning: higher budget
+const deepThinking = { type: 'enabled', budget_tokens: 16000 };
+```
+
+### 2. Use `retainThinking` for Multi-Turn Reasoning
+
+For conversations requiring continuity of reasoning:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'analyst',
+  exposeThinking: true,
+  retainThinking: true,  // Keep reasoning for context
+}, deps);
+```
+
+### 3. Strip Thinking for Token Savings
+
+If thinking is only for single-turn and not needed in history:
+
+```typescript
+const provider = new AnthropicProvider(apiKey, model, undefined, undefined, {
+  reasoningTransport: 'omit',  // Don't persist thinking
+  extraBody: {
+    thinking: { type: 'enabled', budget_tokens: 5000 },
+  },
+});
+
+const agent = await Agent.create({
+  templateId: 'solver',
+  exposeThinking: true,   // Show thinking to user
+  retainThinking: false,  // Don't persist
+}, deps);
+```
+
+### 4. Prompt for Interleaved Thinking
+
+Encourage the model to think between steps:
+
+```typescript
+const prompt = `
+I need to analyze this data. Please:
+1. First, use the fetch_data tool to get the data
+2. Think about what patterns you see
+3. Use the analyze_tool to run analysis
+4. Think about the implications
+5. Provide your conclusions
+
+Think carefully between each step.
+`;
+
+await agent.send(prompt);
+```
+
+---
+
+## Complete Example
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  defineTool,
+} from '@shareai-lab/kode-sdk';
+
+// Define tools
+const searchTool = defineTool({
+  name: 'search',
+  description: 'Search for information',
+  params: {
+    query: { type: 'string', description: 'Search query' }
+  },
+  async exec(args) {
+    return { results: `Results for: ${args.query}` };
+  }
+});
+
+async function reasoningAgent() {
+  // Configure provider with extended thinking
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    'claude-sonnet-4-20250514',
+    undefined,
+    undefined,
+    {
+      extraBody: {
+        thinking: { type: 'enabled', budget_tokens: 10000 },
+      },
+      beta: {
+        interleavedThinking: true,
+      },
+      reasoningTransport: 'provider',
+    }
+  );
+
+  const store = new JSONStore('./.kode');
+
+  // Create agent with thinking enabled
+  const agent = await Agent.create({
+    templateId: 'reasoning-assistant',
+    exposeThinking: true,
+    retainThinking: true,
+  }, {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    modelFactory: () => provider,
+  });
+
+  // Listen for progress events
+  const progressTask = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      const event = envelope.event;
+
+      if (event.type === 'think_chunk_start') {
+        process.stdout.write('\n[Thinking] ');
+      } else if (event.type === 'think_chunk') {
+        process.stdout.write(event.delta);
+      } else if (event.type === 'think_chunk_end') {
+        process.stdout.write(' [/Thinking]\n');
+      } else if (event.type === 'tool:start') {
+        console.log(`\n[Tool: ${event.call.name}]`);
+      } else if (event.type === 'text_chunk') {
+        process.stdout.write(event.delta);
+      } else if (event.type === 'done') {
+        break;
+      }
+    }
+  })();
+
+  // Send task requiring reasoning
+  await agent.send(`
+    Research "machine learning trends" using the search tool,
+    then provide a thoughtful analysis. Think step by step.
+  `);
+
+  await progressTask;
+}
+```
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| No thinking events | `exposeThinking: false` | Set `exposeThinking: true` |
+| Thinking not retained | `retainThinking: false` | Set `retainThinking: true` |
+| Thinking stripped from history | `reasoningTransport: 'omit'` | Use `'provider'` or `'text'` |
+| No interleaving with tools | Beta not enabled | Enable `beta.interleavedThinking` |
+| "Thinking signature invalid" error | Modified thinking blocks | Don't modify reasoning content |
+
+---
+
+## References
+
+- [Provider Guide](./providers.md) - Provider-specific thinking configuration
+- [Events Guide](./events.md) - Progress event handling
+- [Tools Guide](./tools.md) - Tool integration
+- [API Reference](../reference/api.md) - ThinkingOptions interface
diff --git a/docs/en/guides/tools.md b/docs/en/guides/tools.md
new file mode 100644
index 0000000..6fc5d40
--- /dev/null
+++ b/docs/en/guides/tools.md
@@ -0,0 +1,533 @@
+# Tool System Guide
+
+KODE SDK provides a comprehensive tool system with built-in tools, custom tool definition APIs, and MCP integration. All tools follow these conventions:
+
+- **Prompt Instructions**: Each tool includes detailed prompts guiding the model's safe usage
+- **Structured Returns**: Tools return JSON structures (e.g., `fs_read` returns `{ content, offset, limit, truncated }`)
+- **FilePool Integration**: File tools automatically validate and record through FilePool, preventing freshness conflicts
+- **Audit Trail**: ToolCallRecord captures approval, duration, and errors, fully restored on Resume
+
+---
+
+## Built-in Tools
+
+### File System Tools
+
+| Tool | Description | Returns |
+|------|-------------|---------|
+| `fs_read` | Read file segment | `{ path, offset, limit, truncated, content }` |
+| `fs_write` | Create/overwrite file with freshness validation | `{ ok, path, bytes, length }` |
+| `fs_edit` | Precise text replacement (supports `replace_all`) | `{ ok, path, replacements, length }` |
+| `fs_glob` | Match files using glob patterns | `{ ok, pattern, cwd, matches, truncated }` |
+| `fs_grep` | Search text/regex in files or wildcard sets | `{ ok, pattern, path, matches[] }` |
+| `fs_multi_edit` | Batch edit multiple files | `{ ok, results[{ path, status, replacements, message? }] }` |
+
+#### FilePool
+
+- `recordRead` / `recordEdit`: Track last read/write times for conflict detection
+- `validateWrite`: Verify file wasn't externally modified after Agent's last read
+- `watchFiles`: Auto-monitor file changes, triggers `monitor.file_changed` event
+
+### Bash Tools
+
+- `bash_run`: Execute commands (foreground/background), controllable via Hooks or `permission.mode='approval'`
+- `bash_logs`: Read background command output
+- `bash_kill`: Terminate background commands
+
+**Recommended Security Strategy:**
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'secure-runner',
+  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+  overrides: {
+    hooks: {
+      preToolUse(call) {
+        if (call.name === 'bash_run' && !/^git /.test(call.args.cmd)) {
+          return { decision: 'ask', meta: { reason: 'Non-whitelisted command' } };
+        }
+        return undefined;
+      },
+    },
+  },
+}, deps);
+```
+
+### Todo Tools
+
+- `todo_read`: Return Todo list
+- `todo_write`: Write complete Todo list (validates unique IDs, max 1 in-progress). Integrates with `TodoManager` for auto-reminders and events.
+
+### Task (Sub-Agent)
+
+- `task_run`: Dispatch sub-Agents from template pool, supports `subagent_type`, `context`, `model_name` parameters
+- Templates can limit depth and available templates via `runtime.subagents`
+
+### Skills Tool
+
+- `skills`: Load specific skill content (instructions, references, scripts, assets)
+  - **Parameters**:
+    - `action`: Operation type (currently only `load`)
+    - `skill_name`: Skill name (required when action=load)
+  - **Returns**:
+    ```typescript
+    {
+      ok: true,
+      data: {
+        name: string,           // Skill name
+        description: string,    // Skill description
+        content: string,        // SKILL.md content
+        base_dir: string,       // Skill base directory
+        references: string[],   // Reference document list
+        scripts: string[],      // Available scripts
+        assets: string[]        // Asset files
+      }
+    }
+    ```
+
+See [skills.md](./skills.md) for complete Skills system documentation.
+
+---
+
+## Defining Custom Tools
+
+### Quick Start with `defineTool()` (Recommended)
+
+The simplified API (v2.7+) auto-generates JSON Schema from parameter definitions:
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const weatherTool = defineTool({
+  name: 'get_weather',
+  description: 'Get weather information',
+
+  // Concise parameter definition - auto-generates Schema
+  params: {
+    city: {
+      type: 'string',
+      description: 'City name'
+    },
+    units: {
+      type: 'string',
+      description: 'Temperature units',
+      enum: ['celsius', 'fahrenheit'],
+      required: false,
+      default: 'celsius'
+    }
+  },
+
+  // Simplified attributes
+  attributes: {
+    readonly: true,   // Read-only tool
+    noEffect: true    // No side effects, safe to retry
+  },
+
+  async exec(args, ctx) {
+    // Custom events
+    ctx.emit('weather_fetched', { city: args.city });
+    return { temperature: 22, condition: 'sunny' };
+  }
+});
+```
+
+### Batch Definition with `defineTools()`
+
+```typescript
+import { defineTools } from '@shareai-lab/kode-sdk';
+
+const calculatorTools = defineTools([
+  {
+    name: 'add',
+    description: 'Add two numbers',
+    params: {
+      a: { type: 'number' },
+      b: { type: 'number' }
+    },
+    attributes: { readonly: true, noEffect: true },
+    async exec(args, ctx) {
+      return args.a + args.b;
+    }
+  },
+  {
+    name: 'multiply',
+    description: 'Multiply two numbers',
+    params: {
+      a: { type: 'number' },
+      b: { type: 'number' }
+    },
+    attributes: { readonly: true, noEffect: true },
+    async exec(args, ctx) {
+      return args.a * args.b;
+    }
+  }
+]);
+```
+
+### Traditional ToolInstance Interface
+
+For fine-grained control, use the classic interface:
+
+```typescript
+const registry = new ToolRegistry();
+
+registry.register('greet', () => ({
+  name: 'greet',
+  description: 'Greet a person by name',
+  input_schema: {
+    type: 'object',
+    properties: { name: { type: 'string' } },
+    required: ['name']
+  },
+  prompt: 'Use this tool to greet teammates by name.',
+  async exec(args) {
+    return `Hello, ${args.name}!`;
+  },
+  toDescriptor() {
+    return { source: 'registered', name: 'greet', registryId: 'greet' };
+  },
+}));
+```
+
+---
+
+## Parameter Definition
+
+### Basic Types
+
+```typescript
+params: {
+  str: { type: 'string', description: 'A string' },
+  num: { type: 'number', description: 'A number' },
+  bool: { type: 'boolean', description: 'A boolean' },
+
+  // Optional parameter
+  optional: { type: 'string', required: false },
+
+  // Default value
+  withDefault: { type: 'number', default: 42 },
+
+  // Enum
+  choice: {
+    type: 'string',
+    enum: ['option1', 'option2', 'option3']
+  }
+}
+```
+
+### Complex Types
+
+```typescript
+params: {
+  // Array
+  tags: {
+    type: 'array',
+    description: 'List of tags',
+    items: { type: 'string' }
+  },
+
+  // Nested object
+  profile: {
+    type: 'object',
+    description: 'User profile',
+    properties: {
+      email: { type: 'string' },
+      age: { type: 'number', required: false },
+      roles: {
+        type: 'array',
+        items: { type: 'string' }
+      }
+    }
+  }
+}
+```
+
+### Direct JSON Schema (Advanced)
+
+For constraints like `pattern`, `minLength`, use `input_schema` directly:
+
+```typescript
+defineTool({
+  name: 'advanced_tool',
+  description: 'Advanced tool',
+  input_schema: {
+    type: 'object',
+    properties: {
+      data: {
+        type: 'string',
+        pattern: '^[A-Z]{3}$',
+        minLength: 3,
+        maxLength: 3
+      }
+    },
+    required: ['data']
+  },
+  async exec(args, ctx) {
+    // ...
+  }
+});
+```
+
+---
+
+## Tool Attributes
+
+### `readonly` - Read-only Tool
+
+Indicates the tool doesn't modify any state (files, database, external APIs):
+
+```typescript
+attributes: {
+  readonly: true
+}
+```
+
+**Use Cases**:
+- Auto-approved in `readonly` permission mode
+- Suitable for queries, reads, computations
+
+### `noEffect` - No Side Effects
+
+Indicates the tool can be safely retried with identical results:
+
+```typescript
+attributes: {
+  noEffect: true
+}
+```
+
+**Use Cases**:
+- Safe for re-execution on Resume
+- Suitable for idempotent operations (GET requests, pure calculations)
+
+### Default Behavior
+
+Without `attributes`, tools are treated as:
+- Non-readonly (may write)
+- Has side effects (cannot retry)
+
+---
+
+## Custom Events
+
+### Basic Usage
+
+```typescript
+defineTool({
+  name: 'process_data',
+  description: 'Process data',
+  params: { input: { type: 'string' } },
+
+  async exec(args, ctx: EnhancedToolContext) {
+    ctx.emit('processing_started', { input: args.input });
+    const result = await heavyComputation(args.input);
+    ctx.emit('processing_completed', { result, duration: 1234 });
+    return result;
+  }
+});
+```
+
+### Listening to Custom Events
+
+```typescript
+agent.on('tool_custom_event', (event) => {
+  console.log(`[${event.toolName}] ${event.eventType}:`, event.data);
+});
+```
+
+### Event Structure
+
+```typescript
+interface MonitorToolCustomEvent {
+  channel: 'monitor';
+  type: 'tool_custom_event';
+  toolName: string;        // Tool name
+  eventType: string;       // Custom event type
+  data?: any;              // Event data
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Tool Timeout & AbortSignal
+
+### Timeout Configuration
+
+Default tool execution timeout is **60 seconds**, customizable via Agent config:
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'my-assistant',
+  metadata: {
+    toolTimeoutMs: 120000, // 2 minutes
+  }
+}, deps);
+```
+
+### Handling AbortSignal (Required)
+
+All custom tools receive `context.signal` - **must** check in long-running operations:
+
+```typescript
+export class MyLongRunningTool implements ToolInstance {
+  async exec(args: any, context: ToolContext) {
+    // Check before long operations
+    if (context.signal?.aborted) {
+      throw new Error('Operation aborted');
+    }
+
+    // Pass signal to underlying APIs
+    const response = await fetch(url, { signal: context.signal });
+
+    // Check periodically in loops
+    for (const item of items) {
+      if (context.signal?.aborted) {
+        throw new Error('Operation aborted');
+      }
+      await processItem(item);
+    }
+
+    return result;
+  }
+}
+```
+
+### CPU-Intensive Tasks
+
+For pure computation tasks, actively check in loops:
+
+```typescript
+for (let i = 0; i < args.iterations; i++) {
+  // Check every 100 iterations
+  if (i % 100 === 0 && context.signal?.aborted) {
+    throw new Error('Computation aborted');
+  }
+  result.push(this.compute(i));
+}
+```
+
+### Timeout Recovery
+
+After timeout, Agent will:
+1. Send `abort` signal
+2. Mark tool call as `FAILED`
+3. Generate `tool_result` with timeout info
+4. Continue to next `runStep`
+
+On Resume, timed-out tool calls are auto-sealed (Auto-Seal), not re-executed.
+
+---
+
+## MCP Integration
+
+Register MCP loaders in ToolRegistry with `registryId` pointing to MCP service:
+
+```typescript
+const registry = new ToolRegistry();
+
+// Register MCP tool loader
+registry.registerMCPLoader('my-mcp-server', async () => {
+  const client = await connectToMCPServer('my-mcp-server');
+  return client.getTools();
+});
+```
+
+Combined with TemplateRegistry, specify which templates enable MCP tools for proper Resume recovery.
+
+---
+
+## Best Practices
+
+1. **Always check `context.signal?.aborted`** in long-running operations
+2. **Pass signal to APIs supporting AbortSignal** (fetch, axios, etc.)
+3. **Set appropriate `attributes`** to help permission system
+4. **Use custom events** for tool execution observability
+5. **Prefer `defineTool()`** for cleaner, type-safe code
+6. **Use `input_schema`** only for advanced Schema constraints
+7. **Monitor timeout events** for alerting
+
+```typescript
+agent.on('error', (event) => {
+  if (event.phase === 'tool' && event.message.includes('aborted')) {
+    console.log('Tool execution timed out:', event.detail);
+  }
+});
+```
+
+---
+
+## Migration from Legacy API
+
+### Metadata Mapping
+
+| Legacy | New |
+|--------|-----|
+| `{ access: 'read', mutates: false }` | `{ readonly: true }` |
+| `{ access: 'write', mutates: true }` | (default, no need to set) |
+| `{ safe: true }` | `{ noEffect: true }` |
+
+### Adding Custom Events
+
+```typescript
+// Legacy - cannot emit events
+async exec(args, ctx: ToolContext) {
+  return result;
+}
+
+// New - can emit events
+async exec(args, ctx: EnhancedToolContext) {
+  ctx.emit('event_name', { data: 'value' });
+  return result;
+}
+```
+
+---
+
+## FAQ
+
+**Q: Must I use the new API?**
+
+A: No, the legacy `ToolInstance` interface is fully compatible. The new API is optional enhancement.
+
+**Q: What's the difference between `readonly` and `noEffect`?**
+
+A:
+- `readonly`: Tool doesn't modify any state (files, database, etc.)
+- `noEffect`: Tool can be safely retried with identical results
+
+A read-only tool is usually also side-effect-free, but not vice versa.
+
+**Q: Are custom events persisted?**
+
+A: Yes, custom events are persisted to WAL as `MonitorToolCustomEvent`, recoverable on Resume.
+
+**Q: Can I mix old and new APIs?**
+
+A: Yes, freely mix them - Register tools in ToolRegistry and reference by name:
+
+```typescript
+const tools = new ToolRegistry();
+
+// Register different styles
+tools.register('old_tool', () => oldStyleTool);
+tools.register('new_tool', () => defineTool({ name: 'new_tool', /* ... */ }));
+tools.register('fs_read', () => new FsRead());
+
+// Reference in template
+templates.register({
+  id: 'my-assistant',
+  tools: ['old_tool', 'new_tool', 'fs_read'],
+});
+
+const agent = await Agent.create({ templateId: 'my-assistant' }, deps);
+```
+
+---
+
+## Reference
+
+- Example code: `examples/tooling/simplified-tools.ts`
+- Type definitions: `src/tools/define.ts`
+- Event system: [events.md](./events.md)
diff --git a/docs/en/reference/api.md b/docs/en/reference/api.md
new file mode 100644
index 0000000..4c723a1
--- /dev/null
+++ b/docs/en/reference/api.md
@@ -0,0 +1,691 @@
+# API Reference
+
+This document provides a complete API reference for KODE SDK v2.7.0.
+
+---
+
+## Agent
+
+The core class for creating and managing AI agents.
+
+### Static Methods
+
+#### `Agent.create(config, deps)`
+
+Creates a new Agent instance.
+
+```typescript
+static async create(config: AgentConfig, deps: AgentDependencies): Promise<Agent>
+```
+
+**Parameters:**
+- `config: AgentConfig` - Agent configuration
+- `deps: AgentDependencies` - Required dependencies
+
+**Example:**
+```typescript
+const agent = await Agent.create({
+  templateId: 'assistant',
+  modelConfig: {
+    provider: 'anthropic',
+    apiKey: process.env.ANTHROPIC_API_KEY!,
+  },
+  sandbox: { kind: 'local', workDir: './workspace' },
+}, deps);
+```
+
+#### `Agent.resume(agentId, config, deps, opts?)`
+
+Resumes an existing Agent from storage.
+
+```typescript
+static async resume(
+  agentId: string,
+  config: AgentConfig,
+  deps: AgentDependencies,
+  opts?: { autoRun?: boolean; strategy?: ResumeStrategy }
+): Promise<Agent>
+```
+
+**Parameters:**
+- `agentId: string` - Agent ID to resume
+- `config: AgentConfig` - Agent configuration
+- `deps: AgentDependencies` - Required dependencies
+- `opts.autoRun?: boolean` - Continue processing after resume (default: false)
+- `opts.strategy?: ResumeStrategy` - `'crash'` (auto-seal) or `'manual'`
+
+#### `Agent.resumeFromStore(agentId, deps, opts?)`
+
+Resumes an Agent using metadata from store (recommended).
+
+```typescript
+static async resumeFromStore(
+  agentId: string,
+  deps: AgentDependencies,
+  opts?: { overrides?: Partial<AgentConfig>; autoRun?: boolean; strategy?: ResumeStrategy }
+): Promise<Agent>
+```
+
+### Instance Methods
+
+#### `agent.send(message, options?)`
+
+Sends a message and returns the text response.
+
+```typescript
+async send(message: string | ContentBlock[], options?: SendOptions): Promise<string>
+```
+
+#### `agent.chat(input, opts?)`
+
+Sends a message and returns structured result with status.
+
+```typescript
+async chat(input: string | ContentBlock[], opts?: StreamOptions): Promise<CompleteResult>
+```
+
+**Returns:**
+```typescript
+interface CompleteResult {
+  status: 'ok' | 'paused';
+  text?: string;
+  last?: Bookmark;
+  permissionIds?: string[];
+}
+```
+
+#### `agent.complete(input, opts?)`
+
+Alias for `chat()`.
+
+#### `agent.decide(permissionId, decision, note?)`
+
+Responds to a permission request.
+
+```typescript
+async decide(permissionId: string, decision: 'allow' | 'deny', note?: string): Promise<void>
+```
+
+#### `agent.interrupt(opts?)`
+
+Interrupts the current processing.
+
+```typescript
+async interrupt(opts?: { note?: string }): Promise<void>
+```
+
+#### `agent.snapshot(label?)`
+
+Creates a snapshot at the current Safe-Fork-Point.
+
+```typescript
+async snapshot(label?: string): Promise<SnapshotId>
+```
+
+#### `agent.fork(sel?)`
+
+Creates a forked Agent from a snapshot.
+
+```typescript
+async fork(sel?: SnapshotId | { at?: string }): Promise<Agent>
+```
+
+#### `agent.status()`
+
+Returns current Agent status.
+
+```typescript
+async status(): Promise<AgentStatus>
+```
+
+**Returns:**
+```typescript
+interface AgentStatus {
+  agentId: string;
+  state: AgentRuntimeState;  // 'READY' | 'WORKING' | 'PAUSED'
+  stepCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  cursor: number;
+  breakpoint: BreakpointState;
+}
+```
+
+#### `agent.info()`
+
+Returns Agent metadata.
+
+```typescript
+async info(): Promise<AgentInfo>
+```
+
+#### `agent.setTodos(todos)`
+
+Sets the entire Todo list.
+
+```typescript
+async setTodos(todos: TodoInput[]): Promise<void>
+```
+
+#### `agent.updateTodo(todo)`
+
+Updates a single Todo item.
+
+```typescript
+async updateTodo(todo: TodoInput): Promise<void>
+```
+
+#### `agent.deleteTodo(id)`
+
+Deletes a Todo item.
+
+```typescript
+async deleteTodo(id: string): Promise<void>
+```
+
+#### `agent.on(event, handler)`
+
+Subscribes to Control and Monitor events. Returns an unsubscribe function.
+
+```typescript
+on<T extends ControlEvent['type'] | MonitorEvent['type']>(
+  event: T,
+  handler: (evt: any) => void
+): () => void
+```
+
+**Supported events:**
+- Control: `'permission_required'`, `'permission_decided'`
+- Monitor: `'state_changed'`, `'step_complete'`, `'error'`, `'token_usage'`, `'tool_executed'`, `'agent_resumed'`, `'todo_changed'`, `'file_changed'`
+
+**Example:**
+```typescript
+// Monitor events
+const unsubscribe = agent.on('tool_executed', (event) => {
+  console.log(`Tool ${event.call.name} executed`);
+});
+
+agent.on('error', (event) => {
+  console.error('Error:', event.error);
+});
+
+// Control events
+agent.on('permission_required', (event) => {
+  console.log(`Permission needed for: ${event.call.name}`);
+});
+
+// Unsubscribe when done
+unsubscribe();
+```
+
+> **Note:** For Progress events (`text_chunk`, `tool:start`, `done`, etc.), use `agent.subscribe(['progress'])` instead.
+
+---
+
+## AgentConfig
+
+Configuration for creating an Agent.
+
+```typescript
+interface AgentConfig {
+  agentId?: string;                    // Auto-generated if not provided
+  templateId: string;                  // Required: template ID
+  templateVersion?: string;            // Optional: template version
+  model?: ModelProvider;               // Direct model provider
+  modelConfig?: ModelConfig;           // Or model configuration
+  sandbox?: Sandbox | SandboxConfig;   // Sandbox instance or config
+  tools?: string[];                    // Tool names to enable
+  exposeThinking?: boolean;            // Emit thinking events
+  retainThinking?: boolean;            // Keep thinking in message history
+  overrides?: {
+    permission?: PermissionConfig;
+    todo?: TodoConfig;
+    subagents?: SubAgentConfig;
+    hooks?: Hooks;
+  };
+  context?: ContextManagerOptions;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## AgentDependencies
+
+Required dependencies for Agent creation.
+
+```typescript
+interface AgentDependencies {
+  store: Store;                        // Storage backend
+  templateRegistry: AgentTemplateRegistry;
+  sandboxFactory: SandboxFactory;
+  toolRegistry: ToolRegistry;
+  modelFactory?: ModelFactory;         // Optional factory for model creation
+  skillsManager?: SkillsManager;       // Optional skills manager
+}
+```
+
+---
+
+## Store
+
+Interface for Agent data persistence.
+
+### Core Methods
+
+```typescript
+interface Store {
+  // Messages
+  saveMessages(agentId: string, messages: Message[]): Promise<void>;
+  loadMessages(agentId: string): Promise<Message[]>;
+
+  // Tool Records
+  saveToolCallRecords(agentId: string, records: ToolCallRecord[]): Promise<void>;
+  loadToolCallRecords(agentId: string): Promise<ToolCallRecord[]>;
+
+  // Todos
+  saveTodos(agentId: string, snapshot: TodoSnapshot): Promise<void>;
+  loadTodos(agentId: string): Promise<TodoSnapshot | undefined>;
+
+  // Events
+  appendEvent(agentId: string, timeline: Timeline): Promise<void>;
+  readEvents(agentId: string, opts?: { since?: Bookmark; channel?: AgentChannel }): AsyncIterable<Timeline>;
+
+  // Snapshots
+  saveSnapshot(agentId: string, snapshot: Snapshot): Promise<void>;
+  loadSnapshot(agentId: string, snapshotId: string): Promise<Snapshot | undefined>;
+  listSnapshots(agentId: string): Promise<Snapshot[]>;
+
+  // Metadata
+  saveInfo(agentId: string, info: AgentInfo): Promise<void>;
+  loadInfo(agentId: string): Promise<AgentInfo | undefined>;
+
+  // Lifecycle
+  exists(agentId: string): Promise<boolean>;
+  delete(agentId: string): Promise<void>;
+  list(prefix?: string): Promise<string[]>;
+}
+```
+
+### Store Implementations
+
+| Class | Description |
+|-------|-------------|
+| `JSONStore` | File-based storage (default) |
+| `SqliteStore` | SQLite database storage |
+| `PostgresStore` | PostgreSQL database storage |
+
+### Factory Function
+
+```typescript
+import { createExtendedStore } from '@shareai-lab/kode-sdk';
+
+// SQLite
+const store = await createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/store',
+});
+
+// PostgreSQL
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## QueryableStore
+
+Extended Store interface with query capabilities.
+
+```typescript
+interface QueryableStore extends Store {
+  querySessions(filters: SessionFilters): Promise<SessionInfo[]>;
+  queryMessages(filters: MessageFilters): Promise<Message[]>;
+  queryToolCalls(filters: ToolCallFilters): Promise<ToolCallRecord[]>;
+  aggregateStats(agentId: string): Promise<AgentStats>;
+}
+```
+
+### SessionFilters
+
+```typescript
+interface SessionFilters {
+  agentId?: string;
+  templateId?: string;
+  userId?: string;
+  startDate?: number;      // Unix timestamp (ms)
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+  sortBy?: 'created_at' | 'updated_at' | 'message_count';
+  sortOrder?: 'asc' | 'desc';
+}
+```
+
+### MessageFilters
+
+```typescript
+interface MessageFilters {
+  agentId?: string;
+  role?: 'user' | 'assistant' | 'system';
+  startDate?: number;
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+}
+```
+
+### ToolCallFilters
+
+```typescript
+interface ToolCallFilters {
+  agentId?: string;
+  toolName?: string;
+  state?: ToolCallState;
+  startDate?: number;
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+}
+```
+
+---
+
+## ExtendedStore
+
+Store with advanced features.
+
+```typescript
+interface ExtendedStore extends QueryableStore {
+  healthCheck(): Promise<StoreHealthStatus>;
+  checkConsistency(agentId: string): Promise<ConsistencyCheckResult>;
+  getMetrics(): Promise<StoreMetrics>;
+  acquireAgentLock(agentId: string, timeoutMs?: number): Promise<LockReleaseFn>;
+  batchFork(agentId: string, count: number): Promise<string[]>;
+  close(): Promise<void>;
+}
+```
+
+---
+
+## ToolRegistry
+
+Registry for tool factories.
+
+```typescript
+class ToolRegistry {
+  register(id: string, factory: ToolFactory): void;
+  has(id: string): boolean;
+  create(id: string, config?: Record<string, any>): ToolInstance;
+  list(): string[];
+}
+```
+
+### ToolInstance
+
+```typescript
+interface ToolInstance {
+  name: string;
+  description: string;
+  input_schema: any;                   // JSON Schema
+  hooks?: Hooks;
+  prompt?: string | ((ctx: ToolContext) => string | Promise<string>);
+  exec(args: any, ctx: ToolContext): Promise<any>;
+  toDescriptor(): ToolDescriptor;
+}
+```
+
+### defineTool()
+
+Simplified API for creating tools.
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const myTool = defineTool({
+  name: 'my_tool',
+  description: 'Does something useful',
+  params: {
+    input: { type: 'string', description: 'Input value' },
+    count: { type: 'number', required: false, default: 1 },
+  },
+  attributes: {
+    readonly: true,
+    noEffect: true,
+  },
+  async exec(args, ctx) {
+    ctx.emit('custom_event', { data: 'value' });
+    return { result: args.input };
+  },
+});
+```
+
+---
+
+## AgentTemplateRegistry
+
+Registry for Agent templates.
+
+```typescript
+class AgentTemplateRegistry {
+  register(template: AgentTemplateDefinition): void;
+  bulkRegister(templates: AgentTemplateDefinition[]): void;
+  has(id: string): boolean;
+  get(id: string): AgentTemplateDefinition;
+  list(): string[];
+}
+```
+
+### AgentTemplateDefinition
+
+```typescript
+interface AgentTemplateDefinition {
+  id: string;                          // Required: unique identifier
+  name?: string;                       // Display name
+  desc?: string;                       // Description
+  version?: string;                    // Template version
+  systemPrompt: string;                // Required: system prompt
+  model?: string;                      // Default model
+  sandbox?: Record<string, any>;       // Sandbox configuration
+  tools?: '*' | string[];              // '*' for all, or specific tools
+  permission?: PermissionConfig;       // Permission configuration
+  runtime?: TemplateRuntimeConfig;     // Runtime options
+  hooks?: Hooks;                       // Hook functions
+  metadata?: Record<string, any>;      // Custom metadata
+}
+```
+
+---
+
+## AgentPool
+
+Manages multiple Agent instances.
+
+```typescript
+class AgentPool {
+  constructor(opts: AgentPoolOptions);
+
+  async create(agentId: string, config: AgentConfig): Promise<Agent>;
+  get(agentId: string): Agent | undefined;
+  list(opts?: { prefix?: string }): string[];
+  async status(agentId: string): Promise<AgentStatus | undefined>;
+  async fork(agentId: string, snapshotSel?: SnapshotId | { at?: string }): Promise<Agent>;
+  async resume(agentId: string, config: AgentConfig, opts?: { autoRun?: boolean; strategy?: ResumeStrategy }): Promise<Agent>;
+  async destroy(agentId: string): Promise<void>;
+}
+```
+
+---
+
+## Room
+
+Multi-Agent collaboration space.
+
+```typescript
+class Room {
+  constructor(pool: AgentPool);
+
+  join(name: string, agentId: string): void;
+  leave(name: string): void;
+  async say(from: string, text: string): Promise<void>;
+  getMembers(): RoomMember[];
+}
+```
+
+**Example:**
+```typescript
+const pool = new AgentPool({ dependencies: deps });
+const room = new Room(pool);
+
+// Create and join agents
+const agent1 = await pool.create('agent-1', config);
+const agent2 = await pool.create('agent-2', config);
+
+room.join('Alice', 'agent-1');
+room.join('Bob', 'agent-2');
+
+// Broadcast message
+await room.say('Alice', 'Hello everyone!');
+
+// Directed message
+await room.say('Alice', '@Bob What do you think?');
+```
+
+---
+
+## Providers
+
+### AnthropicProvider
+
+```typescript
+import { AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+  {
+    thinking: { enabled: true, budgetTokens: 10000 },
+    cache: { breakpoints: 4 },
+  }
+);
+```
+
+### OpenAIProvider
+
+```typescript
+import { OpenAIProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  process.env.OPENAI_MODEL_ID ?? 'gpt-4o',
+  {
+    api: 'responses',
+    responses: { reasoning: { effort: 'medium' } },
+  }
+);
+```
+
+### GeminiProvider
+
+```typescript
+import { GeminiProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  process.env.GEMINI_MODEL_ID ?? 'gemini-2.0-flash',
+  {
+    thinking: { level: 'medium', includeThoughts: true },
+  }
+);
+```
+
+---
+
+## Built-in Tools
+
+| Tool | Description |
+|------|-------------|
+| `fs_read` | Read file content |
+| `fs_write` | Create/overwrite file |
+| `fs_edit` | Edit file with replacements |
+| `fs_glob` | Match files with glob patterns |
+| `fs_grep` | Search text/regex in files |
+| `fs_multi_edit` | Batch edit multiple files |
+| `bash_run` | Execute shell commands |
+| `bash_logs` | Read background command output |
+| `bash_kill` | Terminate background commands |
+| `todo_read` | Read Todo list |
+| `todo_write` | Write Todo list |
+| `task_run` | Dispatch sub-Agent |
+| `skills` | Load skills |
+
+### Registering Built-in Tools
+
+```typescript
+import { builtin, ToolRegistry } from '@shareai-lab/kode-sdk';
+
+const registry = new ToolRegistry();
+
+// builtin is an object with methods that return ToolInstance[]
+for (const tool of [...builtin.fs(), ...builtin.bash(), ...builtin.todo()]) {
+  registry.register(tool.name, () => tool);
+}
+
+// Or register specific tool groups
+builtin.fs().forEach(tool => registry.register(tool.name, () => tool));
+builtin.bash().forEach(tool => registry.register(tool.name, () => tool));
+builtin.todo().forEach(tool => registry.register(tool.name, () => tool));
+```
+
+**Available builtin groups:**
+- `builtin.fs()` - File system tools: `fs_read`, `fs_write`, `fs_edit`, `fs_glob`, `fs_grep`, `fs_multi_edit`
+- `builtin.bash()` - Shell tools: `bash_run`, `bash_logs`, `bash_kill`
+- `builtin.todo()` - Todo tools: `todo_read`, `todo_write`
+- `builtin.task(templates)` - Sub-agent tool: `task_run` (requires templates)
+
+---
+
+## SkillsManager
+
+Manages skills at Agent runtime.
+
+```typescript
+class SkillsManager {
+  constructor(skillsDir: string, whitelist?: string[]);
+
+  async getSkillsMetadata(): Promise<SkillMetadata[]>;
+  async loadSkillContent(skillName: string): Promise<SkillContent | null>;
+}
+```
+
+---
+
+## Utility Functions
+
+### generateAgentId()
+
+Generates a unique Agent ID.
+
+```typescript
+import { generateAgentId } from '@shareai-lab/kode-sdk';
+
+const agentId = generateAgentId(); // e.g., 'agt-abc123xyz'
+```
+
+---
+
+## References
+
+- [Types Reference](./types.md)
+- [Events Reference](./events-reference.md)
+- [Guides](../guides/events.md)
diff --git a/docs/en/reference/events-reference.md b/docs/en/reference/events-reference.md
new file mode 100644
index 0000000..e6bde21
--- /dev/null
+++ b/docs/en/reference/events-reference.md
@@ -0,0 +1,576 @@
+# Events Reference
+
+Complete reference for all KODE SDK events organized by channel.
+
+---
+
+## Event Channels
+
+| Channel | Purpose | Subscriber |
+|---------|---------|------------|
+| `progress` | Streaming output (text, tool calls) | User interface |
+| `control` | Permission requests and decisions | Business logic |
+| `monitor` | System observability | Monitoring/logging |
+
+---
+
+## Progress Events
+
+Events for streaming output to users.
+
+### ProgressTextChunkStartEvent
+
+Emitted when text streaming begins.
+
+```typescript
+interface ProgressTextChunkStartEvent {
+  channel: 'progress';
+  type: 'text_chunk_start';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressTextChunkEvent
+
+Emitted for each text chunk during streaming.
+
+```typescript
+interface ProgressTextChunkEvent {
+  channel: 'progress';
+  type: 'text_chunk';
+  step: number;
+  delta: string;           // Text chunk content
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressTextChunkEndEvent
+
+Emitted when text streaming completes.
+
+```typescript
+interface ProgressTextChunkEndEvent {
+  channel: 'progress';
+  type: 'text_chunk_end';
+  step: number;
+  text: string;            // Complete text
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkStartEvent
+
+Emitted when thinking/reasoning streaming begins.
+
+```typescript
+interface ProgressThinkChunkStartEvent {
+  channel: 'progress';
+  type: 'think_chunk_start';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkEvent
+
+Emitted for each thinking chunk.
+
+```typescript
+interface ProgressThinkChunkEvent {
+  channel: 'progress';
+  type: 'think_chunk';
+  step: number;
+  delta: string;           // Thinking chunk content
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkEndEvent
+
+Emitted when thinking streaming completes.
+
+```typescript
+interface ProgressThinkChunkEndEvent {
+  channel: 'progress';
+  type: 'think_chunk_end';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolStartEvent
+
+Emitted when tool execution starts.
+
+```typescript
+interface ProgressToolStartEvent {
+  channel: 'progress';
+  type: 'tool:start';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolEndEvent
+
+Emitted when tool execution completes.
+
+```typescript
+interface ProgressToolEndEvent {
+  channel: 'progress';
+  type: 'tool:end';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolErrorEvent
+
+Emitted when tool execution fails.
+
+```typescript
+interface ProgressToolErrorEvent {
+  channel: 'progress';
+  type: 'tool:error';
+  call: ToolCallSnapshot;
+  error: string;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressDoneEvent
+
+Emitted when processing completes.
+
+```typescript
+interface ProgressDoneEvent {
+  channel: 'progress';
+  type: 'done';
+  step: number;
+  reason: 'completed' | 'interrupted';
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Control Events
+
+Events for permission handling.
+
+### ControlPermissionRequiredEvent
+
+Emitted when a tool call requires approval.
+
+```typescript
+interface ControlPermissionRequiredEvent {
+  channel: 'control';
+  type: 'permission_required';
+  call: ToolCallSnapshot;
+  respond(decision: 'allow' | 'deny', opts?: { note?: string }): Promise<void>;
+  bookmark?: Bookmark;
+}
+```
+
+**Usage:**
+```typescript
+agent.on('permission_required', async (event) => {
+  // Review the tool call
+  console.log('Tool:', event.call.name);
+  console.log('Input:', event.call.inputPreview);
+
+  // Make decision
+  await event.respond('allow', { note: 'Approved by admin' });
+});
+```
+
+### ControlPermissionDecidedEvent
+
+Emitted when a permission decision is made.
+
+```typescript
+interface ControlPermissionDecidedEvent {
+  channel: 'control';
+  type: 'permission_decided';
+  callId: string;
+  decision: 'allow' | 'deny';
+  decidedBy: string;
+  note?: string;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Monitor Events
+
+Events for system observability.
+
+### MonitorStateChangedEvent
+
+Emitted when Agent state changes.
+
+```typescript
+interface MonitorStateChangedEvent {
+  channel: 'monitor';
+  type: 'state_changed';
+  state: AgentRuntimeState;   // 'READY' | 'WORKING' | 'PAUSED'
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorStepCompleteEvent
+
+Emitted when a processing step completes.
+
+```typescript
+interface MonitorStepCompleteEvent {
+  channel: 'monitor';
+  type: 'step_complete';
+  step: number;
+  durationMs?: number;
+  bookmark: Bookmark;
+}
+```
+
+### MonitorErrorEvent
+
+Emitted when an error occurs.
+
+```typescript
+interface MonitorErrorEvent {
+  channel: 'monitor';
+  type: 'error';
+  severity: 'info' | 'warn' | 'error';
+  phase: 'model' | 'tool' | 'system' | 'lifecycle';
+  message: string;
+  detail?: any;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTokenUsageEvent
+
+Emitted with token usage statistics.
+
+```typescript
+interface MonitorTokenUsageEvent {
+  channel: 'monitor';
+  type: 'token_usage';
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolExecutedEvent
+
+Emitted when a tool execution completes.
+
+```typescript
+interface MonitorToolExecutedEvent {
+  channel: 'monitor';
+  type: 'tool_executed';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorAgentResumedEvent
+
+Emitted when an Agent resumes from storage.
+
+```typescript
+interface MonitorAgentResumedEvent {
+  channel: 'monitor';
+  type: 'agent_resumed';
+  strategy: 'crash' | 'manual';
+  sealed: ToolCallSnapshot[];    // Auto-sealed tool calls
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorBreakpointChangedEvent
+
+Emitted when breakpoint state changes.
+
+```typescript
+interface MonitorBreakpointChangedEvent {
+  channel: 'monitor';
+  type: 'breakpoint_changed';
+  previous: BreakpointState;
+  current: BreakpointState;
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTodoChangedEvent
+
+Emitted when Todo list changes.
+
+```typescript
+interface MonitorTodoChangedEvent {
+  channel: 'monitor';
+  type: 'todo_changed';
+  current: TodoItem[];
+  previous: TodoItem[];
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTodoReminderEvent
+
+Emitted when a Todo reminder is triggered.
+
+```typescript
+interface MonitorTodoReminderEvent {
+  channel: 'monitor';
+  type: 'todo_reminder';
+  todos: TodoItem[];
+  reason: string;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorFileChangedEvent
+
+Emitted when a watched file changes.
+
+```typescript
+interface MonitorFileChangedEvent {
+  channel: 'monitor';
+  type: 'file_changed';
+  path: string;
+  mtime: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorReminderSentEvent
+
+Emitted when a reminder is sent to the model.
+
+```typescript
+interface MonitorReminderSentEvent {
+  channel: 'monitor';
+  type: 'reminder_sent';
+  category: 'file' | 'todo' | 'security' | 'performance' | 'general';
+  content: string;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorContextCompressionEvent
+
+Emitted during context compression.
+
+```typescript
+interface MonitorContextCompressionEvent {
+  channel: 'monitor';
+  type: 'context_compression';
+  phase: 'start' | 'end';
+  summary?: string;
+  ratio?: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorSchedulerTriggeredEvent
+
+Emitted when a scheduled task triggers.
+
+```typescript
+interface MonitorSchedulerTriggeredEvent {
+  channel: 'monitor';
+  type: 'scheduler_triggered';
+  taskId: string;
+  spec: string;
+  kind: 'steps' | 'time' | 'cron';
+  triggeredAt: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolManualUpdatedEvent
+
+Emitted when tool manuals are updated.
+
+```typescript
+interface MonitorToolManualUpdatedEvent {
+  channel: 'monitor';
+  type: 'tool_manual_updated';
+  tools: string[];
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorSkillsMetadataUpdatedEvent
+
+Emitted when skills metadata is updated.
+
+```typescript
+interface MonitorSkillsMetadataUpdatedEvent {
+  channel: 'monitor';
+  type: 'skills_metadata_updated';
+  skills: string[];
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolCustomEvent
+
+Custom events emitted by tools.
+
+```typescript
+interface MonitorToolCustomEvent {
+  channel: 'monitor';
+  type: 'tool_custom_event';
+  toolName: string;
+  eventType: string;
+  data?: any;
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Subscribing to Events
+
+### Using `agent.on()` (Control/Monitor only)
+
+`agent.on()` only supports Control and Monitor events.
+
+```typescript
+// Control events
+agent.on('permission_required', async (event) => {
+  console.log('Permission needed for:', event.call.name);
+  await event.respond('allow');
+});
+
+agent.on('permission_decided', (event) => {
+  console.log(`Decision: ${event.decision} by ${event.decidedBy}`);
+});
+
+// Monitor events
+agent.on('error', (event) => {
+  console.error(`[${event.severity}] ${event.message}`);
+});
+
+agent.on('token_usage', (event) => {
+  console.log(`Tokens: ${event.totalTokens}`);
+});
+
+agent.on('tool_executed', (event) => {
+  console.log(`Tool ${event.call.name} executed`);
+});
+
+agent.on('state_changed', (event) => {
+  console.log(`State: ${event.state}`);
+});
+```
+
+### Using `agent.subscribe()` (All channels)
+
+For Progress events, use `agent.subscribe()`:
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  const { event } = envelope;
+
+  switch (event.type) {
+    case 'text_chunk':
+      process.stdout.write(event.delta);
+      break;
+    case 'tool:start':
+      console.log('Tool:', event.call.name);
+      break;
+    case 'done':
+      console.log('Completed');
+      break;
+  }
+}
+```
+
+### Using Async Iterator with `stream()`
+
+```typescript
+for await (const envelope of agent.stream('Hello')) {
+  const { event } = envelope;
+
+  switch (event.type) {
+    case 'text_chunk':
+      process.stdout.write(event.delta);
+      break;
+    case 'tool:start':
+      console.log('Tool:', event.call.name);
+      break;
+    case 'done':
+      console.log('Completed');
+      break;
+  }
+}
+```
+
+---
+
+## Event Type Unions
+
+### ProgressEvent
+
+```typescript
+type ProgressEvent =
+  | ProgressThinkChunkStartEvent
+  | ProgressThinkChunkEvent
+  | ProgressThinkChunkEndEvent
+  | ProgressTextChunkStartEvent
+  | ProgressTextChunkEvent
+  | ProgressTextChunkEndEvent
+  | ProgressToolStartEvent
+  | ProgressToolEndEvent
+  | ProgressToolErrorEvent
+  | ProgressDoneEvent;
+```
+
+### ControlEvent
+
+```typescript
+type ControlEvent =
+  | ControlPermissionRequiredEvent
+  | ControlPermissionDecidedEvent;
+```
+
+### MonitorEvent
+
+```typescript
+type MonitorEvent =
+  | MonitorStateChangedEvent
+  | MonitorStepCompleteEvent
+  | MonitorErrorEvent
+  | MonitorTokenUsageEvent
+  | MonitorToolExecutedEvent
+  | MonitorAgentResumedEvent
+  | MonitorTodoChangedEvent
+  | MonitorTodoReminderEvent
+  | MonitorFileChangedEvent
+  | MonitorReminderSentEvent
+  | MonitorContextCompressionEvent
+  | MonitorSchedulerTriggeredEvent
+  | MonitorBreakpointChangedEvent
+  | MonitorToolManualUpdatedEvent
+  | MonitorSkillsMetadataUpdatedEvent
+  | MonitorToolCustomEvent;
+```
+
+---
+
+## References
+
+- [Events Guide](../guides/events.md)
+- [API Reference](./api.md)
+- [Types Reference](./types.md)
diff --git a/docs/en/reference/types.md b/docs/en/reference/types.md
new file mode 100644
index 0000000..003d167
--- /dev/null
+++ b/docs/en/reference/types.md
@@ -0,0 +1,483 @@
+# Types Reference
+
+This document provides a reference for all TypeScript types exported by KODE SDK.
+
+---
+
+## Message Types
+
+### MessageRole
+
+```typescript
+type MessageRole = 'user' | 'assistant' | 'system';
+```
+
+### Message
+
+```typescript
+interface Message {
+  role: MessageRole;
+  content: ContentBlock[];
+  metadata?: MessageMetadata;
+}
+```
+
+### MessageMetadata
+
+```typescript
+interface MessageMetadata {
+  content_blocks?: ContentBlock[];
+  transport?: 'provider' | 'text' | 'omit';
+}
+```
+
+---
+
+## Content Blocks
+
+### ContentBlock
+
+Union type for all content block types.
+
+```typescript
+type ContentBlock =
+  | { type: 'text'; text: string }
+  | { type: 'image_url'; image_url: { url: string } }
+  | { type: 'tool_use'; id: string; name: string; input: any; meta?: Record<string, any> }
+  | { type: 'tool_result'; tool_use_id: string; content: any; is_error?: boolean }
+  | ReasoningContentBlock
+  | ImageContentBlock
+  | AudioContentBlock
+  | FileContentBlock;
+```
+
+### ReasoningContentBlock
+
+```typescript
+type ReasoningContentBlock = {
+  type: 'reasoning';
+  reasoning: string;
+  meta?: Record<string, any>;
+};
+```
+
+### ImageContentBlock
+
+```typescript
+type ImageContentBlock = {
+  type: 'image';
+  url?: string;
+  file_id?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+### AudioContentBlock
+
+```typescript
+type AudioContentBlock = {
+  type: 'audio';
+  url?: string;
+  file_id?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+### FileContentBlock
+
+```typescript
+type FileContentBlock = {
+  type: 'file';
+  url?: string;
+  file_id?: string;
+  filename?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+---
+
+## Agent State Types
+
+### AgentRuntimeState
+
+```typescript
+type AgentRuntimeState = 'READY' | 'WORKING' | 'PAUSED';
+```
+
+| State | Description |
+|-------|-------------|
+| `READY` | Agent is idle and ready to receive messages |
+| `WORKING` | Agent is processing a message |
+| `PAUSED` | Agent is paused waiting for permission decision |
+
+### BreakpointState
+
+```typescript
+type BreakpointState =
+  | 'READY'
+  | 'PRE_MODEL'
+  | 'STREAMING_MODEL'
+  | 'TOOL_PENDING'
+  | 'AWAITING_APPROVAL'
+  | 'PRE_TOOL'
+  | 'TOOL_EXECUTING'
+  | 'POST_TOOL';
+```
+
+### AgentStatus
+
+```typescript
+interface AgentStatus {
+  agentId: string;
+  state: AgentRuntimeState;
+  stepCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  cursor: number;
+  breakpoint: BreakpointState;
+}
+```
+
+### AgentInfo
+
+```typescript
+interface AgentInfo {
+  agentId: string;
+  templateId: string;
+  createdAt: string;
+  lineage: string[];
+  configVersion: string;
+  messageCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  breakpoint?: BreakpointState;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## Tool Call Types
+
+### ToolCallState
+
+```typescript
+type ToolCallState =
+  | 'PENDING'
+  | 'APPROVAL_REQUIRED'
+  | 'APPROVED'
+  | 'EXECUTING'
+  | 'COMPLETED'
+  | 'FAILED'
+  | 'DENIED'
+  | 'SEALED';
+```
+
+| State | Description |
+|-------|-------------|
+| `PENDING` | Tool call received, not yet processed |
+| `APPROVAL_REQUIRED` | Waiting for user approval |
+| `APPROVED` | Approved, ready to execute |
+| `EXECUTING` | Currently executing |
+| `COMPLETED` | Execution completed successfully |
+| `FAILED` | Execution failed |
+| `DENIED` | User denied the tool call |
+| `SEALED` | Auto-sealed during resume |
+
+### ToolCallRecord
+
+```typescript
+interface ToolCallRecord {
+  id: string;
+  name: string;
+  input: any;
+  state: ToolCallState;
+  approval: ToolCallApproval;
+  result?: any;
+  error?: string;
+  isError?: boolean;
+  startedAt?: number;
+  completedAt?: number;
+  durationMs?: number;
+  createdAt: number;
+  updatedAt: number;
+  auditTrail: ToolCallAuditEntry[];
+}
+```
+
+### ToolCallSnapshot
+
+```typescript
+type ToolCallSnapshot = Pick<
+  ToolCallRecord,
+  'id' | 'name' | 'state' | 'approval' | 'result' | 'error' | 'isError' | 'durationMs' | 'startedAt' | 'completedAt'
+> & {
+  inputPreview?: any;
+  auditTrail?: ToolCallAuditEntry[];
+};
+```
+
+### ToolCallApproval
+
+```typescript
+interface ToolCallApproval {
+  required: boolean;
+  decision?: 'allow' | 'deny';
+  decidedBy?: string;
+  decidedAt?: number;
+  note?: string;
+  meta?: Record<string, any>;
+}
+```
+
+### ToolCallAuditEntry
+
+```typescript
+interface ToolCallAuditEntry {
+  state: ToolCallState;
+  timestamp: number;
+  note?: string;
+}
+```
+
+### ToolOutcome
+
+```typescript
+interface ToolOutcome {
+  id: string;
+  name: string;
+  ok: boolean;
+  content: any;
+  durationMs?: number;
+}
+```
+
+### ToolCall
+
+```typescript
+interface ToolCall {
+  id: string;
+  name: string;
+  args: any;
+  agentId: string;
+}
+```
+
+### ToolContext
+
+```typescript
+interface ToolContext {
+  agentId: string;
+  sandbox: Sandbox;
+  agent: any;
+  services?: Record<string, any>;
+  signal?: AbortSignal;
+  emit?: (eventType: string, data?: any) => void;
+}
+```
+
+---
+
+## Event Types
+
+### Bookmark
+
+```typescript
+interface Bookmark {
+  seq: number;
+  timestamp: number;
+}
+```
+
+### AgentChannel
+
+```typescript
+type AgentChannel = 'progress' | 'control' | 'monitor';
+```
+
+### AgentEvent
+
+```typescript
+type AgentEvent = ProgressEvent | ControlEvent | MonitorEvent;
+```
+
+### AgentEventEnvelope
+
+```typescript
+interface AgentEventEnvelope<T extends AgentEvent = AgentEvent> {
+  cursor: number;
+  bookmark: Bookmark;
+  event: T;
+}
+```
+
+### Timeline
+
+```typescript
+interface Timeline {
+  cursor: number;
+  bookmark: Bookmark;
+  event: AgentEvent;
+}
+```
+
+---
+
+## Snapshot Types
+
+### SnapshotId
+
+```typescript
+type SnapshotId = string;
+```
+
+### Snapshot
+
+```typescript
+interface Snapshot {
+  id: SnapshotId;
+  messages: Message[];
+  lastSfpIndex: number;
+  lastBookmark: Bookmark;
+  createdAt: string;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## Hook Types
+
+### HookDecision
+
+```typescript
+type HookDecision =
+  | { decision: 'ask'; meta?: any }
+  | { decision: 'deny'; reason?: string; toolResult?: any }
+  | { result: any }
+  | void;
+```
+
+### PostHookResult
+
+```typescript
+type PostHookResult =
+  | void
+  | { update: Partial<ToolOutcome> }
+  | { replace: ToolOutcome };
+```
+
+---
+
+## Configuration Types
+
+### PermissionConfig
+
+```typescript
+interface PermissionConfig {
+  mode: PermissionDecisionMode;
+  requireApprovalTools?: string[];
+  allowTools?: string[];
+  denyTools?: string[];
+  metadata?: Record<string, any>;
+}
+```
+
+### PermissionDecisionMode
+
+```typescript
+type PermissionDecisionMode = 'auto' | 'approval' | 'readonly' | (string & {});
+```
+
+| Mode | Description |
+|------|-------------|
+| `auto` | Automatically allow all tool calls |
+| `approval` | Require approval for all tool calls |
+| `readonly` | Allow read-only tools, require approval for others |
+
+### SubAgentConfig
+
+```typescript
+interface SubAgentConfig {
+  templates?: string[];
+  depth: number;
+  inheritConfig?: boolean;
+  overrides?: {
+    permission?: PermissionConfig;
+    todo?: TodoConfig;
+  };
+}
+```
+
+### TodoConfig
+
+```typescript
+interface TodoConfig {
+  enabled: boolean;
+  remindIntervalSteps?: number;
+  storagePath?: string;
+  reminderOnStart?: boolean;
+}
+```
+
+### SandboxConfig
+
+```typescript
+interface SandboxConfig {
+  kind: SandboxKind;
+  workDir?: string;
+  enforceBoundary?: boolean;
+  allowPaths?: string[];
+  watchFiles?: boolean;
+  [key: string]: any;
+}
+```
+
+### SandboxKind
+
+```typescript
+type SandboxKind = 'local' | 'docker' | 'remote';
+```
+
+---
+
+## Resume Types
+
+### ResumeStrategy
+
+```typescript
+type ResumeStrategy = 'crash' | 'manual';
+```
+
+| Strategy | Description |
+|----------|-------------|
+| `crash` | Auto-seal incomplete tools and emit `agent_resumed` event |
+| `manual` | Leave incomplete tools as-is for manual handling |
+
+---
+
+## Reminder Types
+
+### ReminderOptions
+
+```typescript
+interface ReminderOptions {
+  skipStandardEnding?: boolean;
+  priority?: 'low' | 'medium' | 'high';
+  category?: 'file' | 'todo' | 'security' | 'performance' | 'general';
+}
+```
+
+---
+
+## References
+
+- [API Reference](./api.md)
+- [Events Reference](./events-reference.md)
diff --git a/docs/playbooks.md b/docs/playbooks.md
deleted file mode 100644
index 79f6c87..0000000
--- a/docs/playbooks.md
+++ /dev/null
@@ -1,94 +0,0 @@
-# Playbooks：典型场景脚本
-
-本页从实践角度拆解四个最常见的使用场景，给出心智地图、关键 API、示例文件以及注意事项。示例代码位于 `examples/` 目录，可直接 `ts-node` 运行。
-
----
-
-## 1. 协作收件箱（事件驱动 UI）
-
-- **目标**：持续运行的单 Agent，UI 通过 Progress 流展示文本/工具进度，Monitor 做轻量告警。
-- **示例**：`examples/01-agent-inbox.ts`
-- **如何运行**：`npm run example:agent-inbox`
-- **关键步骤**：
-  1. `Agent.create` + `agent.subscribe(['progress'])` 推送文本增量。
-  2. 使用 `bookmark` / `cursor` 做断点续播。
-  3. `agent.on('tool_executed')` / `agent.on('error')` 将治理事件写入日志或监控。
-  4. `agent.todoManager` 自动提醒，UI 可展示 Todo 面板。
-- **注意事项**：
-  - 建议将 Progress 流通过 SSE/WebSocket 暴露给前端。
-  - 若 UI 需要思考过程，可在模板 metadata 中开启 `exposeThinking`。
-
----
-
-## 2. 工具审批 & 治理
-
-- **目标**：对敏感工具（如 `bash_run`、数据库写入）进行审批；结合 Hook 实现策略守卫。
-- **示例**：`examples/02-approval-control.ts`
-- **如何运行**：`npm run example:approval`
-- **关键步骤**：
-  1. 模板中配置 `permission`（如 `mode: 'approval'` + `requireApprovalTools`）。
-  2. 订阅 `agent.on('permission_required')`，将审批任务推送到业务系统。
-  3. 审批 UI 调用 `agent.decide(id, 'allow' | 'deny', note)`。
-  4. 结合 `HookManager` 的 `preToolUse` / `postToolUse` 做更细粒度的策略（如路径守卫、结果截断）。
-- **注意事项**：
-  - 审批过程中 Agent 处于 `AWAITING_APPROVAL` 断点，恢复后需调用 `ensureProcessing`（SDK 自动处理）。
-  - 拒绝工具会自动写入 `tool_result`，UI 可以提示用户重试策略。
-
----
-
-## 3. 多 Agent 小组协作
-
-- **目标**：一个 Planner 调度多个 Specialist，所有 Agent 长驻且可随时分叉。
-- **示例**：`examples/03-room-collab.ts`
-- **如何运行**：`npm run example:room`
-- **关键步骤**：
-  1. 使用单例 `AgentPool` 管理 Agent 生命周期（`create` / `resume` / `fork`）。
-  2. 通过 `Room` 实现广播/点名消息；消息带 `[from:name]` 模式进行协作。
-  3. 子 Agent 通过 `task_run` 工具或显式 `pool.create` 拉起。
-  4. 利用 `agent.snapshot()` + `agent.fork()` 在 Safe-Fork-Point 分叉出新任务。
-- **注意事项**：
-  - 模板的 `runtime.subagents` 可限制可分派模板与深度。
-  - 需要持久化 lineage（SDK 默认写入 metadata），便于审计和回放。
-  - 如果不希望监控不存在的文件，可以在模板中关闭 `watchFiles`（示例已设置）。
-
----
-
-## 4. 调度与系统提醒
-
-- **目标**：让 Agent 在长时运行中定期执行任务、监控文件变更、发送系统提醒。
-- **示例**：`examples/04-scheduler-watch.ts`
-- **如何运行**：`npm run example:scheduler`
-- **关键步骤**：
-  1. `const scheduler = agent.schedule(); scheduler.everySteps(N, callback)` 注册步数触发。
-  2. 使用 `agent.remind(text, options)` 发送系统级提醒（走 Monitor，不污染 Progress）。
-  3. FilePool 默认会监听写入文件，`monitor.file_changed` 触发后可结合 `scheduler.notifyExternalTrigger` 做自动响应。
-  4. Todo 结合 `remindIntervalSteps` 做定期回顾。
-- **注意事项**：
-  - 调度任务应保持幂等，遵循事件驱动思想。
-  - 对高频任务可结合外部 Cron，在触发时调用 `scheduler.notifyExternalTrigger`。
-
----
-
-## 5. 组合拳：审批 + 协作 + 调度
-
-- **场景**：代码审查机器人，Planner 负责拆分任务并分配到不同 Specialist，工具操作需审批，定时提醒确保 SLA。
-- **实现路径**：
-  1. Planner 模板：具备 `task_run` 工具与调度 Hook，每日早晨自动巡检。
-  2. Specialist 模板：聚焦 `fs_*` + `todo_*` 工具，审批策略只对 `bash_run` 开启。
-  3. 统一的审批服务：监听全部 Agent 的 Control 事件，打通企业 IM / 审批流。
-  4. Room 协作：Planner 将任务以 `@executor` 形式投递，执行完成再 @planner 汇报。
-  5. SLA 监控：Monitor 事件进入 observability pipeline（Prometheus / ELK / Datadog）。
-  6. 调度提醒：使用 Scheduler 定期检查待办或外部系统信号。
-
----
-
-## 常用组合 API 速查
-
-- 事件：`agent.subscribe(['progress'])`、`agent.on('error', handler)`、`agent.on('tool_executed', handler)`
-- 审批：`permission_required` → `event.respond()` / `agent.decide()`
-- 多 Agent：`new AgentPool({ dependencies, maxAgents })`、`const room = new Room(pool)`
-- 分叉：`const snapshot = await agent.snapshot(); const fork = await agent.fork(snapshot);`
-- 调度：`agent.schedule().everySteps(10, ...)`、`scheduler.notifyExternalTrigger(...)`
-- Todo：`agent.getTodos()` / `agent.setTodos()` / `todo_read` / `todo_write`
-
-结合这些 playbook，可以快速落地从“单人助手”到“多人团队协作”的完整产品体验。
diff --git a/docs/provider-architecture.md b/docs/provider-architecture.md
deleted file mode 100644
index a5c8b29..0000000
--- a/docs/provider-architecture.md
+++ /dev/null
@@ -1,1989 +0,0 @@
-# Provider Architecture V2: 100-Point Best Practice Design
-
-Based on comprehensive research of official documentation from Anthropic, OpenAI, Google Gemini, DeepSeek, Qwen, GLM, Kimi, Minimax, OpenRouter, Groq, and Cerebras.
-
-## Executive Summary
-
-This document defines a production-grade provider architecture that:
-- Supports 11+ model providers with unified internal format
-- Handles thinking/reasoning across all providers correctly
-- Implements prompt caching with provider-specific strategies
-- Provides typed error handling with retry logic
-- Tracks usage statistics with cache metrics
-- Supports agent resume/fork mechanisms seamlessly
-
----
-
-## Part 1: Unified Type System
-
-### 1.1 Core Message Types
-
-```typescript
-// core/types.ts
-
-export type MessageRole = 'user' | 'assistant' | 'system';
-
-export interface Message {
-  role: MessageRole;
-  content: ContentBlock[];
-  metadata?: MessageMetadata;
-}
-
-export interface MessageMetadata {
-  // Original content blocks before any transformation
-  content_blocks?: ContentBlock[];
-
-  // How reasoning was transported
-  transport?: ReasoningTransport;
-
-  // Cache control for this message
-  cacheControl?: CacheControl;
-
-  // Message-level tracking
-  messageId?: string;
-  timestamp?: number;
-}
-
-export type ReasoningTransport = 'provider' | 'text' | 'omit';
-```
-
-### 1.2 Content Block Types
-
-```typescript
-// Unified content blocks - Anthropic-style as canonical format
-
-export type ContentBlock =
-  | TextBlock
-  | ReasoningBlock
-  | ImageBlock
-  | AudioBlock
-  | FileBlock
-  | ToolUseBlock
-  | ToolResultBlock;
-
-export interface TextBlock {
-  type: 'text';
-  text: string;
-  cacheControl?: CacheControl;
-}
-
-export interface ReasoningBlock {
-  type: 'reasoning';
-  reasoning: string;
-  meta?: ReasoningMeta;
-}
-
-export interface ReasoningMeta {
-  // Anthropic: signature for multi-turn verification
-  signature?: string;
-
-  // Gemini: thought signature for function calls
-  thoughtSignature?: string;
-
-  // OpenAI Responses: reasoning item ID for state persistence
-  reasoningId?: string;
-
-  // DeepSeek/Qwen: whether to include in next turn
-  includeInHistory?: boolean;
-}
-
-export interface ImageBlock {
-  type: 'image';
-  // Source variants
-  base64?: string;
-  url?: string;
-  file_id?: string;
-  // Metadata
-  mime_type?: string;
-  detail?: 'low' | 'high' | 'auto';
-}
-
-export interface AudioBlock {
-  type: 'audio';
-  base64?: string;
-  url?: string;
-  mime_type?: string;
-}
-
-export interface FileBlock {
-  type: 'file';
-  base64?: string;
-  url?: string;
-  file_id?: string;
-  filename?: string;
-  mime_type?: string;
-}
-
-export interface ToolUseBlock {
-  type: 'tool_use';
-  id: string;
-  name: string;
-  input: Record<string, unknown>;
-  meta?: ToolUseMeta;
-}
-
-export interface ToolUseMeta {
-  // For tracking parallel tool calls
-  index?: number;
-  // Provider-specific tool call ID format
-  originalId?: string;
-}
-
-export interface ToolResultBlock {
-  type: 'tool_result';
-  tool_use_id: string;
-  content: string | ContentBlock[];
-  is_error?: boolean;
-}
-```
-
-### 1.3 Cache Control Types
-
-```typescript
-export interface CacheControl {
-  type: 'ephemeral';
-  ttl?: '5m' | '1h';  // Anthropic extended TTL
-}
-
-export interface CacheMetrics {
-  // Tokens written to cache this request
-  cacheCreationTokens: number;
-
-  // Tokens read from cache (cache hits)
-  cacheReadTokens: number;
-
-  // Cost savings from cache
-  cacheSavingsEstimate?: number;
-
-  // Provider-specific cache details
-  provider: {
-    anthropic?: {
-      breakpointsUsed: number;  // 0-4
-      ttlUsed: '5m' | '1h';
-    };
-    gemini?: {
-      cachedContentName?: string;
-      implicitCacheHit: boolean;
-    };
-    openai?: {
-      automaticCacheHit: boolean;
-    };
-    deepseek?: {
-      prefixCacheHit: boolean;
-    };
-  };
-}
-```
-
----
-
-## Part 2: Provider-Specific Options
-
-### 2.1 Provider Options Interface Hierarchy
-
-```typescript
-// Each provider has its own options type - no pseudo-abstraction
-
-export interface BaseProviderOptions {
-  // How to handle reasoning blocks
-  reasoningTransport?: ReasoningTransport;
-
-  // Proxy configuration
-  proxyUrl?: string;
-
-  // Request timeout in ms
-  timeout?: number;
-}
-
-export interface AnthropicProviderOptions extends BaseProviderOptions {
-  thinking?: {
-    enabled: boolean;
-    budgetTokens?: number;  // Minimum 1024
-  };
-
-  // Beta features
-  beta?: {
-    interleavedThinking?: boolean;  // interleaved-thinking-2025-05-14
-    filesApi?: boolean;  // files-api-2025-04-14
-    extendedCacheTtl?: boolean;  // extended-cache-ttl-2025-04-11
-  };
-
-  // Cache strategy
-  cache?: {
-    breakpoints?: number;  // 1-4
-    defaultTtl?: '5m' | '1h';
-  };
-}
-
-export interface OpenAIProviderOptions extends BaseProviderOptions {
-  // Which API to use
-  api: 'chat' | 'responses';
-
-  // For Responses API only
-  responses?: {
-    reasoning?: {
-      effort: 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
-    };
-    store?: boolean;  // Enable state persistence
-    previousResponseId?: string;  // For multi-turn
-  };
-
-  // Streaming options
-  streamOptions?: {
-    includeUsage?: boolean;
-  };
-}
-
-export interface GeminiProviderOptions extends BaseProviderOptions {
-  // Gemini 3.x thinking config
-  thinking?: {
-    level: 'minimal' | 'low' | 'medium' | 'high';
-    includeThoughts?: boolean;
-  };
-
-  // Context caching
-  cache?: {
-    // Explicit cache name to use
-    cachedContentName?: string;
-    // Create new cache with TTL
-    createCache?: {
-      displayName: string;
-      ttlSeconds: number;  // e.g., 3600
-    };
-  };
-
-  // Media resolution for multimodal
-  mediaResolution?: 'low' | 'medium' | 'high';
-}
-
-export interface DeepSeekProviderOptions extends BaseProviderOptions {
-  thinking?: {
-    enabled: boolean;
-  };
-
-  // DeepSeek uses automatic prefix caching
-  // No explicit cache config needed
-}
-
-export interface QwenProviderOptions extends BaseProviderOptions {
-  thinking?: {
-    enabled: boolean;
-    budget?: number;  // thinking_budget parameter
-  };
-
-  // Region selection
-  region?: 'beijing' | 'singapore' | 'virginia';
-}
-
-export interface GLMProviderOptions extends BaseProviderOptions {
-  thinking?: {
-    enabled: boolean;
-  };
-
-  // Max 128 functions
-  maxFunctions?: number;
-}
-
-export interface MinimaxProviderOptions extends BaseProviderOptions {
-  // reasoning_split parameter
-  reasoningSplit?: boolean;
-}
-```
-
----
-
-## Part 3: Usage Statistics Module
-
-### 3.1 Unified Usage Interface
-
-```typescript
-// core/usage.ts
-
-export interface UsageStatistics {
-  // Core token counts
-  inputTokens: number;
-  outputTokens: number;
-  totalTokens: number;
-
-  // Reasoning/thinking tokens (separate from output)
-  reasoningTokens?: number;
-
-  // Cache metrics
-  cache: CacheMetrics;
-
-  // Cost calculation
-  cost: CostBreakdown;
-
-  // Request metadata
-  request: RequestMetrics;
-
-  // Provider-specific raw usage
-  raw?: Record<string, unknown>;
-}
-
-export interface CostBreakdown {
-  // Input token cost (after cache discounts)
-  inputCost: number;
-
-  // Output token cost (includes reasoning)
-  outputCost: number;
-
-  // Cache write cost (Anthropic: 1.25x for 5m, 2x for 1h)
-  cacheWriteCost: number;
-
-  // Total cost
-  totalCost: number;
-
-  // Savings from cache
-  cacheSavings: number;
-
-  // Currency (always USD)
-  currency: 'USD';
-}
-
-export interface RequestMetrics {
-  // Request timing
-  startTime: number;
-  endTime: number;
-  latencyMs: number;
-
-  // First token timing
-  timeToFirstTokenMs?: number;
-
-  // Tokens per second
-  tokensPerSecond?: number;
-
-  // Request ID from provider
-  requestId?: string;
-
-  // Model actually used (important for OpenRouter fallbacks)
-  modelUsed: string;
-
-  // Stop reason
-  stopReason?: string;
-}
-```
-
-### 3.2 Usage Normalization Functions
-
-```typescript
-// providers/usage-normalizer.ts
-
-export function normalizeAnthropicUsage(raw: any): UsageStatistics {
-  return {
-    inputTokens: raw.input_tokens || 0,
-    outputTokens: raw.output_tokens || 0,
-    totalTokens: (raw.input_tokens || 0) + (raw.output_tokens || 0) +
-                 (raw.cache_creation_input_tokens || 0) +
-                 (raw.cache_read_input_tokens || 0),
-
-    cache: {
-      cacheCreationTokens: raw.cache_creation_input_tokens || 0,
-      cacheReadTokens: raw.cache_read_input_tokens || 0,
-      provider: {
-        anthropic: {
-          breakpointsUsed: 0,  // Inferred from request
-          ttlUsed: '5m',
-        },
-      },
-    },
-
-    // Cost calculation uses model-specific pricing
-    cost: calculateAnthropicCost(raw),
-
-    request: {
-      startTime: 0,
-      endTime: 0,
-      latencyMs: 0,
-      modelUsed: '',
-    },
-
-    raw,
-  };
-}
-
-export function normalizeOpenAIUsage(raw: any, api: 'chat' | 'responses'): UsageStatistics {
-  const details = raw.output_tokens_details || {};
-
-  return {
-    inputTokens: raw.prompt_tokens || raw.input_tokens || 0,
-    outputTokens: raw.completion_tokens || raw.output_tokens || 0,
-    totalTokens: raw.total_tokens || 0,
-
-    reasoningTokens: details.reasoning_tokens || 0,
-
-    cache: {
-      cacheCreationTokens: 0,
-      cacheReadTokens: raw.prompt_tokens_details?.cached_tokens || 0,
-      provider: {
-        openai: {
-          automaticCacheHit: (raw.prompt_tokens_details?.cached_tokens || 0) > 0,
-        },
-      },
-    },
-
-    cost: calculateOpenAICost(raw, api),
-
-    request: {
-      startTime: 0,
-      endTime: 0,
-      latencyMs: 0,
-      modelUsed: '',
-    },
-
-    raw,
-  };
-}
-
-export function normalizeGeminiUsage(raw: any): UsageStatistics {
-  return {
-    inputTokens: raw.promptTokenCount || 0,
-    outputTokens: raw.candidatesTokenCount || 0,
-    totalTokens: raw.totalTokenCount || 0,
-
-    reasoningTokens: raw.thoughtsTokenCount || 0,
-
-    cache: {
-      cacheCreationTokens: 0,
-      cacheReadTokens: raw.cachedContentTokenCount || 0,
-      provider: {
-        gemini: {
-          cachedContentName: undefined,
-          implicitCacheHit: (raw.cachedContentTokenCount || 0) > 0,
-        },
-      },
-    },
-
-    cost: calculateGeminiCost(raw),
-
-    request: {
-      startTime: 0,
-      endTime: 0,
-      latencyMs: 0,
-      modelUsed: '',
-    },
-
-    raw,
-  };
-}
-```
-
----
-
-## Part 4: Error Handling Hierarchy
-
-### 4.1 Error Class Hierarchy
-
-```typescript
-// core/errors.ts
-
-export abstract class ProviderError extends Error {
-  abstract readonly code: ProviderErrorCode;
-  abstract readonly retryable: boolean;
-
-  readonly provider: string;
-  readonly requestId?: string;
-  readonly timestamp: number;
-
-  constructor(message: string, provider: string, requestId?: string) {
-    super(message);
-    this.name = this.constructor.name;
-    this.provider = provider;
-    this.requestId = requestId;
-    this.timestamp = Date.now();
-  }
-
-  toJSON(): ProviderErrorDetails {
-    return {
-      name: this.name,
-      code: this.code,
-      message: this.message,
-      provider: this.provider,
-      requestId: this.requestId,
-      retryable: this.retryable,
-      timestamp: this.timestamp,
-    };
-  }
-}
-
-export type ProviderErrorCode =
-  | 'RATE_LIMIT'
-  | 'AUTH_FAILED'
-  | 'CONTEXT_LENGTH'
-  | 'INVALID_REQUEST'
-  | 'SERVER_ERROR'
-  | 'TIMEOUT'
-  | 'NETWORK_ERROR'
-  | 'CONTENT_FILTER'
-  | 'MODEL_NOT_FOUND'
-  | 'QUOTA_EXCEEDED'
-  | 'SERVICE_UNAVAILABLE'
-  | 'THINKING_SIGNATURE_INVALID';
-
-export class RateLimitError extends ProviderError {
-  readonly code = 'RATE_LIMIT' as const;
-  readonly retryable = true;
-
-  readonly retryAfter?: number;
-  readonly limitType?: 'requests' | 'tokens';
-
-  constructor(
-    provider: string,
-    retryAfter?: number,
-    limitType?: 'requests' | 'tokens',
-    requestId?: string
-  ) {
-    super(
-      `Rate limit exceeded${retryAfter ? `, retry after ${retryAfter}s` : ''}`,
-      provider,
-      requestId
-    );
-    this.retryAfter = retryAfter;
-    this.limitType = limitType;
-  }
-}
-
-export class AuthenticationError extends ProviderError {
-  readonly code = 'AUTH_FAILED' as const;
-  readonly retryable = false;
-
-  constructor(provider: string, requestId?: string) {
-    super('Authentication failed - check API key', provider, requestId);
-  }
-}
-
-export class ContextLengthError extends ProviderError {
-  readonly code = 'CONTEXT_LENGTH' as const;
-  readonly retryable = false;
-
-  readonly maxTokens: number;
-  readonly requestedTokens: number;
-
-  constructor(
-    provider: string,
-    maxTokens: number,
-    requestedTokens: number,
-    requestId?: string
-  ) {
-    super(
-      `Context length ${requestedTokens} exceeds maximum ${maxTokens}`,
-      provider,
-      requestId
-    );
-    this.maxTokens = maxTokens;
-    this.requestedTokens = requestedTokens;
-  }
-}
-
-export class ThinkingSignatureError extends ProviderError {
-  readonly code = 'THINKING_SIGNATURE_INVALID' as const;
-  readonly retryable = false;
-
-  constructor(provider: string, requestId?: string) {
-    super(
-      'Thinking signature invalid - blocks may have been modified',
-      provider,
-      requestId
-    );
-  }
-}
-
-export class ServerError extends ProviderError {
-  readonly code = 'SERVER_ERROR' as const;
-  readonly retryable = true;
-
-  readonly statusCode?: number;
-
-  constructor(provider: string, statusCode?: number, requestId?: string) {
-    super(
-      `Server error${statusCode ? ` (${statusCode})` : ''}`,
-      provider,
-      requestId
-    );
-    this.statusCode = statusCode;
-  }
-}
-
-export class ContentFilterError extends ProviderError {
-  readonly code = 'CONTENT_FILTER' as const;
-  readonly retryable = false;
-
-  readonly category?: string;
-
-  constructor(provider: string, category?: string, requestId?: string) {
-    super(
-      `Content filtered${category ? `: ${category}` : ''}`,
-      provider,
-      requestId
-    );
-    this.category = category;
-  }
-}
-```
-
-### 4.2 Error Parser
-
-```typescript
-// providers/error-parser.ts
-
-export function parseProviderError(
-  error: any,
-  provider: string
-): ProviderError {
-  const statusCode = error.status || error.statusCode;
-  const requestId = error.request_id || error.requestId;
-
-  // Rate limit
-  if (statusCode === 429) {
-    const retryAfter = parseRetryAfter(error);
-    return new RateLimitError(provider, retryAfter, undefined, requestId);
-  }
-
-  // Auth errors
-  if (statusCode === 401 || statusCode === 403) {
-    return new AuthenticationError(provider, requestId);
-  }
-
-  // Server errors (retryable)
-  if (statusCode === 529 || statusCode >= 500) {
-    return new ServerError(provider, statusCode, requestId);
-  }
-
-  // Context length
-  if (error.code === 'context_length_exceeded' ||
-      error.message?.includes('context') ||
-      error.message?.includes('token')) {
-    return new ContextLengthError(
-      provider,
-      error.max_tokens || 0,
-      error.requested_tokens || 0,
-      requestId
-    );
-  }
-
-  // Content filter
-  if (error.code === 'content_policy_violation' ||
-      error.message?.includes('safety') ||
-      error.message?.includes('filter')) {
-    return new ContentFilterError(provider, error.category, requestId);
-  }
-
-  // Anthropic thinking signature
-  if (error.message?.includes('signature')) {
-    return new ThinkingSignatureError(provider, requestId);
-  }
-
-  // Default to server error
-  return new ServerError(provider, statusCode, requestId);
-}
-
-function parseRetryAfter(error: any): number | undefined {
-  const header = error.headers?.['retry-after'];
-  if (header) {
-    const seconds = parseInt(header, 10);
-    if (!isNaN(seconds)) return seconds;
-  }
-  return undefined;
-}
-```
-
-### 4.3 Retry Strategy
-
-```typescript
-// core/retry.ts
-
-export interface RetryConfig {
-  maxRetries: number;
-  baseDelayMs: number;
-  maxDelayMs: number;
-  jitterFactor: number;
-}
-
-export const DEFAULT_RETRY_CONFIG: RetryConfig = {
-  maxRetries: 3,
-  baseDelayMs: 1000,
-  maxDelayMs: 60000,
-  jitterFactor: 0.2,
-};
-
-export async function withRetry<T>(
-  fn: () => Promise<T>,
-  config: RetryConfig = DEFAULT_RETRY_CONFIG,
-  onRetry?: (error: ProviderError, attempt: number, delayMs: number) => void
-): Promise<T> {
-  let lastError: ProviderError | undefined;
-
-  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
-    try {
-      return await fn();
-    } catch (error) {
-      const providerError = error instanceof ProviderError
-        ? error
-        : parseProviderError(error, 'unknown');
-
-      lastError = providerError;
-
-      // Don't retry non-retryable errors
-      if (!providerError.retryable || attempt === config.maxRetries) {
-        throw providerError;
-      }
-
-      // Calculate delay with exponential backoff and jitter
-      let delay = Math.min(
-        config.baseDelayMs * Math.pow(2, attempt),
-        config.maxDelayMs
-      );
-
-      // Use retry-after header if available
-      if (providerError instanceof RateLimitError && providerError.retryAfter) {
-        delay = Math.max(delay, providerError.retryAfter * 1000);
-      }
-
-      // Add jitter
-      const jitter = delay * config.jitterFactor * (Math.random() - 0.5);
-      delay = Math.floor(delay + jitter);
-
-      onRetry?.(providerError, attempt + 1, delay);
-
-      await sleep(delay);
-    }
-  }
-
-  throw lastError;
-}
-
-function sleep(ms: number): Promise<void> {
-  return new Promise(resolve => setTimeout(resolve, ms));
-}
-```
-
----
-
-## Part 5: Logging and Debugging Module
-
-### 5.1 Logger Interface
-
-```typescript
-// core/logger.ts
-
-export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
-
-export interface LogEntry {
-  level: LogLevel;
-  message: string;
-  timestamp: number;
-  context?: Record<string, unknown>;
-
-  // Request correlation
-  requestId?: string;
-  agentId?: string;
-  sessionId?: string;
-}
-
-export interface Logger {
-  debug(message: string, context?: Record<string, unknown>): void;
-  info(message: string, context?: Record<string, unknown>): void;
-  warn(message: string, context?: Record<string, unknown>): void;
-  error(message: string, context?: Record<string, unknown>): void;
-
-  // Create child logger with additional context
-  child(context: Record<string, unknown>): Logger;
-}
-
-export interface ProviderLogger extends Logger {
-  // Log request/response pairs
-  logRequest(request: ProviderRequest): void;
-  logResponse(response: ProviderResponse, durationMs: number): void;
-  logError(error: ProviderError): void;
-
-  // Log streaming events
-  logStreamEvent(event: StreamEvent): void;
-
-  // Log cache operations
-  logCacheHit(tokens: number): void;
-  logCacheWrite(tokens: number, ttl: string): void;
-}
-```
-
-### 5.2 Debug Configuration
-
-```typescript
-// core/debug.ts
-
-export interface DebugConfig {
-  // Enable verbose logging
-  verbose: boolean;
-
-  // Log raw API requests/responses
-  logRawRequests: boolean;
-  logRawResponses: boolean;
-
-  // Log thinking/reasoning content
-  logThinking: boolean;
-
-  // Log token counts
-  logTokenUsage: boolean;
-
-  // Log cache operations
-  logCache: boolean;
-
-  // Log retry attempts
-  logRetries: boolean;
-
-  // Redact sensitive data (API keys, etc.)
-  redactSensitive: boolean;
-
-  // Max content length in logs
-  maxContentLength: number;
-}
-
-export const DEFAULT_DEBUG_CONFIG: DebugConfig = {
-  verbose: false,
-  logRawRequests: false,
-  logRawResponses: false,
-  logThinking: false,
-  logTokenUsage: true,
-  logCache: true,
-  logRetries: true,
-  redactSensitive: true,
-  maxContentLength: 500,
-};
-```
-
-### 5.3 Audit Trail
-
-```typescript
-// core/audit.ts
-
-export interface AuditRecord {
-  id: string;
-  timestamp: number;
-
-  // Request info
-  provider: string;
-  model: string;
-  requestId?: string;
-
-  // Token usage
-  usage: UsageStatistics;
-
-  // Cache performance
-  cacheHit: boolean;
-  cacheSavings?: number;
-
-  // Error info
-  error?: ProviderErrorDetails;
-
-  // Timing
-  latencyMs: number;
-  timeToFirstTokenMs?: number;
-
-  // Agent context
-  agentId?: string;
-  sessionId?: string;
-  stepNumber?: number;
-}
-
-export interface AuditStore {
-  record(audit: AuditRecord): Promise<void>;
-  query(filter: AuditFilter): Promise<AuditRecord[]>;
-  aggregate(filter: AuditFilter): Promise<AuditAggregation>;
-}
-
-export interface AuditAggregation {
-  totalRequests: number;
-  totalTokens: number;
-  totalCost: number;
-  cacheHitRate: number;
-  averageLatencyMs: number;
-  errorRate: number;
-
-  byProvider: Map<string, {
-    requests: number;
-    tokens: number;
-    cost: number;
-  }>;
-
-  byModel: Map<string, {
-    requests: number;
-    tokens: number;
-    cost: number;
-  }>;
-}
-```
-
----
-
-## Part 6: Provider Interface
-
-### 6.1 Minimal Provider Interface
-
-```typescript
-// providers/types.ts
-
-export interface ModelProvider<TOptions extends BaseProviderOptions = BaseProviderOptions> {
-  readonly id: string;
-  readonly model: string;
-  readonly capabilities: ProviderCapabilities;
-
-  // Core operations
-  complete(
-    messages: Message[],
-    options?: CompletionOptions
-  ): Promise<ModelResponse>;
-
-  stream(
-    messages: Message[],
-    options?: CompletionOptions
-  ): AsyncIterable<StreamChunk>;
-
-  // Provider configuration
-  configure(options: Partial<TOptions>): void;
-  getConfig(): TOptions;
-}
-
-export interface ProviderCapabilities {
-  // Feature support
-  supportsThinking: boolean;
-  supportsInterleavedThinking: boolean;
-  supportsImages: boolean;
-  supportsAudio: boolean;
-  supportsFiles: boolean;
-  supportsTools: boolean;
-  supportsStreaming: boolean;
-  supportsCache: boolean;
-
-  // Limits
-  maxContextTokens: number;
-  maxOutputTokens: number;
-
-  // Cache requirements
-  minCacheableTokens?: number;
-  maxCacheBreakpoints?: number;
-}
-
-export interface ModelResponse {
-  role: 'assistant';
-  content: ContentBlock[];
-  usage: UsageStatistics;
-  stopReason?: string;
-}
-
-export interface StreamChunk {
-  type: StreamEventType;
-  index?: number;
-  delta?: ContentBlockDelta;
-  block?: ContentBlock;
-  usage?: Partial<UsageStatistics>;
-}
-
-export type StreamEventType =
-  | 'message_start'
-  | 'content_block_start'
-  | 'content_block_delta'
-  | 'content_block_stop'
-  | 'message_delta'
-  | 'message_stop'
-  | 'error'
-  | 'ping';
-```
-
-### 6.2 Optional Extension Interfaces
-
-```typescript
-// providers/extensions.ts
-
-export interface FileUploadProvider {
-  uploadFile(input: FileUploadInput): Promise<FileUploadResult>;
-  listFiles?(): Promise<FileInfo[]>;
-  deleteFile?(fileId: string): Promise<void>;
-}
-
-export interface TokenCountProvider {
-  countTokens(messages: Message[]): Promise<number>;
-  countTokensSync?(text: string): number;
-}
-
-export interface CacheProvider {
-  createCache(input: CacheCreateInput): Promise<CacheInfo>;
-  useCache(cacheId: string): void;
-  clearCache?(cacheId: string): Promise<void>;
-  listCaches?(): Promise<CacheInfo[]>;
-}
-```
-
----
-
-## Part 7: Message Transformation (Pure Functions)
-
-### 7.1 Internal to Provider Format
-
-```typescript
-// providers/transformers/anthropic.ts
-
-export function toAnthropicMessages(
-  messages: Message[],
-  options: AnthropicProviderOptions
-): AnthropicMessage[] {
-  return messages.map(msg => toAnthropicMessage(msg, options));
-}
-
-function toAnthropicMessage(
-  msg: Message,
-  options: AnthropicProviderOptions
-): AnthropicMessage {
-  const blocks = getMessageBlocks(msg);
-
-  return {
-    role: msg.role === 'system' ? 'user' : msg.role,
-    content: blocks.map(block => toAnthropicBlock(block, options)),
-  };
-}
-
-function toAnthropicBlock(
-  block: ContentBlock,
-  options: AnthropicProviderOptions
-): AnthropicBlock {
-  switch (block.type) {
-    case 'text':
-      return {
-        type: 'text',
-        text: block.text,
-        ...(block.cacheControl && { cache_control: block.cacheControl }),
-      };
-
-    case 'reasoning':
-      // Only include if transport is 'provider'
-      if (options.reasoningTransport !== 'provider') {
-        throw new Error('Reasoning block with non-provider transport');
-      }
-      return {
-        type: 'thinking',
-        thinking: block.reasoning,
-        ...(block.meta?.signature && { signature: block.meta.signature }),
-      };
-
-    case 'image':
-      return toAnthropicImageBlock(block);
-
-    case 'file':
-      return toAnthropicFileBlock(block);
-
-    case 'tool_use':
-      return {
-        type: 'tool_use',
-        id: block.id,
-        name: block.name,
-        input: block.input,
-      };
-
-    case 'tool_result':
-      return {
-        type: 'tool_result',
-        tool_use_id: block.tool_use_id,
-        content: formatToolResultContent(block.content),
-        ...(block.is_error && { is_error: true }),
-      };
-
-    default:
-      throw new Error(`Unsupported block type: ${(block as any).type}`);
-  }
-}
-```
-
-### 7.2 Provider Response to Internal Format
-
-```typescript
-// providers/transformers/anthropic-response.ts
-
-export function fromAnthropicResponse(
-  response: AnthropicAPIResponse
-): ModelResponse {
-  return {
-    role: 'assistant',
-    content: response.content.map(fromAnthropicBlock),
-    usage: normalizeAnthropicUsage(response.usage),
-    stopReason: response.stop_reason,
-  };
-}
-
-function fromAnthropicBlock(block: AnthropicResponseBlock): ContentBlock {
-  switch (block.type) {
-    case 'text':
-      return { type: 'text', text: block.text };
-
-    case 'thinking':
-      return {
-        type: 'reasoning',
-        reasoning: block.thinking,
-        meta: {
-          signature: block.signature,
-        },
-      };
-
-    case 'redacted_thinking':
-      // Preserve redacted blocks for multi-turn
-      return {
-        type: 'reasoning',
-        reasoning: '[redacted]',
-        meta: {
-          signature: block.data,  // Encrypted data
-        },
-      };
-
-    case 'tool_use':
-      return {
-        type: 'tool_use',
-        id: block.id,
-        name: block.name,
-        input: block.input,
-      };
-
-    default:
-      throw new Error(`Unknown block type: ${(block as any).type}`);
-  }
-}
-```
-
----
-
-## Part 8: Resume/Fork Compatibility
-
-### 8.1 Safe Fork Point Detection
-
-```typescript
-// core/fork.ts
-
-export interface ForkPoint {
-  messageIndex: number;
-  isSafe: boolean;
-  reason?: string;
-}
-
-export function findSafeForkPoints(messages: Message[]): ForkPoint[] {
-  const points: ForkPoint[] = [];
-
-  for (let i = 0; i < messages.length; i++) {
-    const msg = messages[i];
-    const point = analyzeForkSafety(msg, i, messages);
-    points.push(point);
-  }
-
-  return points;
-}
-
-function analyzeForkSafety(
-  msg: Message,
-  index: number,
-  messages: Message[]
-): ForkPoint {
-  // User messages are always safe fork points
-  if (msg.role === 'user') {
-    return { messageIndex: index, isSafe: true };
-  }
-
-  // Assistant messages without tool_use are safe
-  if (msg.role === 'assistant') {
-    const hasToolUse = msg.content.some(b => b.type === 'tool_use');
-    if (!hasToolUse) {
-      return { messageIndex: index, isSafe: true };
-    }
-
-    // Check if all tool calls have results
-    const toolUseIds = msg.content
-      .filter((b): b is ToolUseBlock => b.type === 'tool_use')
-      .map(b => b.id);
-
-    const nextMsg = messages[index + 1];
-    if (nextMsg?.role === 'user') {
-      const resultIds = nextMsg.content
-        .filter((b): b is ToolResultBlock => b.type === 'tool_result')
-        .map(b => b.tool_use_id);
-
-      const allHaveResults = toolUseIds.every(id => resultIds.includes(id));
-      if (allHaveResults) {
-        return { messageIndex: index + 1, isSafe: true };
-      }
-    }
-
-    return {
-      messageIndex: index,
-      isSafe: false,
-      reason: 'Pending tool calls without results',
-    };
-  }
-
-  return { messageIndex: index, isSafe: false, reason: 'Unknown message role' };
-}
-```
-
-### 8.2 Message Serialization for Resume
-
-```typescript
-// core/serialization.ts
-
-export interface SerializedMessage {
-  role: MessageRole;
-  content: ContentBlock[];
-  metadata?: MessageMetadata;
-}
-
-export function serializeForResume(
-  messages: Message[],
-  options: SerializationOptions
-): SerializedMessage[] {
-  return messages.map(msg => serializeMessage(msg, options));
-}
-
-function serializeMessage(
-  msg: Message,
-  options: SerializationOptions
-): SerializedMessage {
-  const serialized: SerializedMessage = {
-    role: msg.role,
-    content: [],
-    metadata: msg.metadata,
-  };
-
-  for (const block of msg.content) {
-    const serializedBlock = serializeBlock(block, options);
-    if (serializedBlock) {
-      serialized.content.push(serializedBlock);
-    }
-  }
-
-  return serialized;
-}
-
-function serializeBlock(
-  block: ContentBlock,
-  options: SerializationOptions
-): ContentBlock | null {
-  // Handle reasoning blocks based on transport
-  if (block.type === 'reasoning') {
-    switch (options.reasoningTransport) {
-      case 'provider':
-        // Keep as-is for Anthropic/OpenAI
-        return block;
-
-      case 'text':
-        // Convert to text block with <think> tags
-        return {
-          type: 'text',
-          text: `<think>${block.reasoning}</think>`,
-        };
-
-      case 'omit':
-        // Exclude from serialized output
-        return null;
-    }
-  }
-
-  return block;
-}
-
-export interface SerializationOptions {
-  reasoningTransport: ReasoningTransport;
-
-  // Whether to preserve thinking signatures
-  preserveSignatures: boolean;
-
-  // Max content length for truncation
-  maxContentLength?: number;
-}
-```
-
-### 8.3 Provider-Specific Resume Requirements
-
-```typescript
-// providers/resume-handlers.ts
-
-export interface ResumeHandler {
-  // Prepare messages for resuming conversation
-  prepareForResume(messages: Message[]): Message[];
-
-  // Validate messages are suitable for resume
-  validateForResume(messages: Message[]): ValidationResult;
-}
-
-export const anthropicResumeHandler: ResumeHandler = {
-  prepareForResume(messages) {
-    // Anthropic requires thinking blocks with signatures for Claude 4+
-    // Claude Opus 4.5 preserves thinking by default
-    return messages.map(msg => {
-      if (msg.role !== 'assistant') return msg;
-
-      // Ensure reasoning blocks have signatures
-      const validBlocks = msg.content.filter(block => {
-        if (block.type === 'reasoning') {
-          // Blocks without signatures can still be passed (they'll be ignored)
-          return true;
-        }
-        return true;
-      });
-
-      return { ...msg, content: validBlocks };
-    });
-  },
-
-  validateForResume(messages) {
-    // Check for sequence integrity
-    const errors: string[] = [];
-
-    for (let i = 0; i < messages.length; i++) {
-      const msg = messages[i];
-
-      // Check tool_use has corresponding tool_result
-      if (msg.role === 'assistant') {
-        const toolUses = msg.content.filter(b => b.type === 'tool_use');
-        if (toolUses.length > 0 && i < messages.length - 1) {
-          const nextMsg = messages[i + 1];
-          if (nextMsg.role !== 'user') {
-            errors.push(`Tool use at index ${i} not followed by user message`);
-          }
-        }
-      }
-    }
-
-    return {
-      valid: errors.length === 0,
-      errors,
-    };
-  },
-};
-
-export const deepseekResumeHandler: ResumeHandler = {
-  prepareForResume(messages) {
-    // DeepSeek: MUST NOT include reasoning_content in next turn
-    // Only include content field
-    return messages.map(msg => {
-      if (msg.role !== 'assistant') return msg;
-
-      // Filter out reasoning blocks
-      const filteredBlocks = msg.content.filter(b => b.type !== 'reasoning');
-
-      return { ...msg, content: filteredBlocks };
-    });
-  },
-
-  validateForResume(messages) {
-    // Check that reasoning is not included in history
-    const errors: string[] = [];
-
-    for (let i = 0; i < messages.length - 1; i++) {  // Skip last message
-      const msg = messages[i];
-      if (msg.role === 'assistant') {
-        const hasReasoning = msg.content.some(b => b.type === 'reasoning');
-        if (hasReasoning) {
-          errors.push(
-            `DeepSeek: reasoning_content must not be included at index ${i}`
-          );
-        }
-      }
-    }
-
-    return {
-      valid: errors.length === 0,
-      errors,
-    };
-  },
-};
-
-export const qwenResumeHandler: ResumeHandler = {
-  prepareForResume(messages) {
-    // Qwen: Similar to DeepSeek, reasoning_content should be omitted
-    return messages.map(msg => {
-      if (msg.role !== 'assistant') return msg;
-
-      const filteredBlocks = msg.content.filter(b => b.type !== 'reasoning');
-
-      return { ...msg, content: filteredBlocks };
-    });
-  },
-
-  validateForResume(messages) {
-    return { valid: true, errors: [] };
-  },
-};
-```
-
----
-
-## Part 9: Provider-Specific Implementations
-
-### 9.1 Anthropic Provider
-
-```typescript
-// providers/anthropic.ts
-
-export class AnthropicProvider implements ModelProvider<AnthropicProviderOptions> {
-  readonly id = 'anthropic';
-  readonly model: string;
-  readonly capabilities: ProviderCapabilities;
-
-  private options: AnthropicProviderOptions;
-  private client: AnthropicClient;
-
-  constructor(
-    apiKey: string,
-    model: string,
-    options: AnthropicProviderOptions = {}
-  ) {
-    this.model = model;
-    this.options = {
-      reasoningTransport: 'provider',
-      ...options,
-    };
-
-    this.capabilities = {
-      supportsThinking: true,
-      supportsInterleavedThinking: true,
-      supportsImages: true,
-      supportsAudio: false,
-      supportsFiles: true,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 200000,
-      maxOutputTokens: 8192,
-      minCacheableTokens: this.getMinCacheableTokens(),
-      maxCacheBreakpoints: 4,
-    };
-
-    this.client = new AnthropicClient(apiKey, options.proxyUrl);
-  }
-
-  async complete(
-    messages: Message[],
-    options?: CompletionOptions
-  ): Promise<ModelResponse> {
-    const body = this.buildRequestBody(messages, options);
-
-    const response = await withRetry(
-      () => this.client.post('/v1/messages', body),
-      DEFAULT_RETRY_CONFIG
-    );
-
-    return fromAnthropicResponse(response);
-  }
-
-  async *stream(
-    messages: Message[],
-    options?: CompletionOptions
-  ): AsyncIterable<StreamChunk> {
-    const body = {
-      ...this.buildRequestBody(messages, options),
-      stream: true,
-    };
-
-    const response = await this.client.postStream('/v1/messages', body);
-
-    for await (const event of response) {
-      yield normalizeAnthropicStreamEvent(event);
-    }
-  }
-
-  private buildRequestBody(
-    messages: Message[],
-    options?: CompletionOptions
-  ): AnthropicRequestBody {
-    const body: AnthropicRequestBody = {
-      model: this.model,
-      messages: toAnthropicMessages(messages, this.options),
-      max_tokens: options?.maxTokens ?? 4096,
-    };
-
-    // System prompt
-    if (options?.system) {
-      body.system = this.buildSystemPrompt(options.system);
-    }
-
-    // Thinking configuration
-    if (this.options.thinking?.enabled) {
-      body.thinking = {
-        type: 'enabled',
-        budget_tokens: this.options.thinking.budgetTokens ?? 10000,
-      };
-    }
-
-    // Tools
-    if (options?.tools?.length) {
-      body.tools = options.tools.map(toAnthropicTool);
-    }
-
-    return body;
-  }
-
-  private buildHeaders(): Record<string, string> {
-    const headers: Record<string, string> = {
-      'anthropic-version': '2023-06-01',
-    };
-
-    const betas: string[] = [];
-
-    if (this.options.beta?.interleavedThinking) {
-      betas.push('interleaved-thinking-2025-05-14');
-    }
-
-    if (this.options.beta?.filesApi) {
-      betas.push('files-api-2025-04-14');
-    }
-
-    if (this.options.beta?.extendedCacheTtl) {
-      betas.push('extended-cache-ttl-2025-04-11');
-    }
-
-    if (betas.length > 0) {
-      headers['anthropic-beta'] = betas.join(',');
-    }
-
-    return headers;
-  }
-
-  private getMinCacheableTokens(): number {
-    if (this.model.includes('opus')) return 4096;
-    if (this.model.includes('haiku-4-5')) return 4096;
-    if (this.model.includes('haiku')) return 2048;
-    return 1024;  // Sonnet
-  }
-}
-```
-
-### 9.2 OpenAI Provider (Unified)
-
-```typescript
-// providers/openai.ts
-
-export class OpenAIProvider implements ModelProvider<OpenAIProviderOptions> {
-  readonly id = 'openai';
-  readonly model: string;
-  readonly capabilities: ProviderCapabilities;
-
-  private options: OpenAIProviderOptions;
-  private client: OpenAIClient;
-
-  constructor(
-    apiKey: string,
-    model: string,
-    options: OpenAIProviderOptions
-  ) {
-    this.model = model;
-    this.options = options;
-
-    this.capabilities = this.deriveCapabilities();
-    this.client = new OpenAIClient(apiKey, options.proxyUrl);
-  }
-
-  async complete(
-    messages: Message[],
-    options?: CompletionOptions
-  ): Promise<ModelResponse> {
-    if (this.options.api === 'responses') {
-      return this.completeResponses(messages, options);
-    }
-    return this.completeChat(messages, options);
-  }
-
-  private async completeChat(
-    messages: Message[],
-    options?: CompletionOptions
-  ): Promise<ModelResponse> {
-    const body = {
-      model: this.model,
-      messages: toOpenAIChatMessages(messages),
-      max_tokens: options?.maxTokens,
-      ...(options?.tools && { tools: options.tools.map(toOpenAITool) }),
-    };
-
-    const response = await withRetry(
-      () => this.client.post('/chat/completions', body)
-    );
-
-    return fromOpenAIChatResponse(response);
-  }
-
-  private async completeResponses(
-    messages: Message[],
-    options?: CompletionOptions
-  ): Promise<ModelResponse> {
-    const body: OpenAIResponsesBody = {
-      model: this.model,
-      input: toOpenAIResponsesInput(messages),
-    };
-
-    // Reasoning configuration
-    if (this.options.responses?.reasoning) {
-      body.reasoning = this.options.responses.reasoning;
-    }
-
-    // State persistence
-    if (this.options.responses?.store) {
-      body.store = true;
-    }
-
-    // Multi-turn continuation
-    if (this.options.responses?.previousResponseId) {
-      body.previous_response_id = this.options.responses.previousResponseId;
-    }
-
-    // Tools
-    if (options?.tools?.length) {
-      body.tools = options.tools.map(toOpenAIResponsesTool);
-    }
-
-    const response = await withRetry(
-      () => this.client.post('/responses', body)
-    );
-
-    return fromOpenAIResponsesResponse(response);
-  }
-
-  private deriveCapabilities(): ProviderCapabilities {
-    const isResponses = this.options.api === 'responses';
-
-    return {
-      supportsThinking: isResponses,
-      supportsInterleavedThinking: false,
-      supportsImages: true,
-      supportsAudio: !isResponses,  // Audio not yet in Responses API
-      supportsFiles: isResponses,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 128000,
-      maxOutputTokens: 16384,
-      minCacheableTokens: 1024,
-    };
-  }
-}
-```
-
----
-
-## Part 10: Factory and Registry
-
-### 10.1 Provider Factory
-
-```typescript
-// providers/factory.ts
-
-export interface ProviderConfig {
-  provider: ProviderType;
-  apiKey: string;
-  model: string;
-  baseUrl?: string;
-  options?: Record<string, unknown>;
-}
-
-export type ProviderType =
-  | 'anthropic'
-  | 'openai'
-  | 'openai-responses'
-  | 'gemini'
-  | 'deepseek'
-  | 'qwen'
-  | 'glm'
-  | 'kimi'
-  | 'minimax'
-  | 'openrouter'
-  | 'groq'
-  | 'cerebras';
-
-export function createProvider(config: ProviderConfig): ModelProvider {
-  switch (config.provider) {
-    case 'anthropic':
-      return new AnthropicProvider(
-        config.apiKey,
-        config.model,
-        config.options as AnthropicProviderOptions
-      );
-
-    case 'openai':
-      return new OpenAIProvider(
-        config.apiKey,
-        config.model,
-        { api: 'chat', ...config.options } as OpenAIProviderOptions
-      );
-
-    case 'openai-responses':
-      return new OpenAIProvider(
-        config.apiKey,
-        config.model,
-        { api: 'responses', ...config.options } as OpenAIProviderOptions
-      );
-
-    case 'gemini':
-      return new GeminiProvider(
-        config.apiKey,
-        config.model,
-        config.options as GeminiProviderOptions
-      );
-
-    case 'deepseek':
-      return new DeepSeekProvider(
-        config.apiKey,
-        config.model,
-        config.options as DeepSeekProviderOptions
-      );
-
-    case 'qwen':
-      return new QwenProvider(
-        config.apiKey,
-        config.model,
-        config.options as QwenProviderOptions
-      );
-
-    // ... other providers
-
-    default:
-      throw new Error(`Unknown provider: ${config.provider}`);
-  }
-}
-```
-
-### 10.2 Model Registry
-
-```typescript
-// providers/registry.ts
-
-export interface ModelInfo {
-  provider: ProviderType;
-  modelId: string;
-  displayName: string;
-
-  // Capabilities
-  capabilities: ProviderCapabilities;
-
-  // Pricing (per 1M tokens)
-  pricing: {
-    input: number;
-    output: number;
-    cacheWrite?: number;
-    cacheRead?: number;
-    reasoning?: number;
-  };
-
-  // Context limits
-  contextWindow: number;
-  maxOutput: number;
-
-  // Feature flags
-  features: {
-    thinking: boolean;
-    vision: boolean;
-    audio: boolean;
-    files: boolean;
-    cache: boolean;
-  };
-}
-
-export const MODEL_REGISTRY: Record<string, ModelInfo> = {
-  'claude-opus-4-5': {
-    provider: 'anthropic',
-    modelId: 'claude-opus-4-5-20251101',
-    displayName: 'Claude Opus 4.5',
-    capabilities: {
-      supportsThinking: true,
-      supportsInterleavedThinking: true,
-      supportsImages: true,
-      supportsAudio: false,
-      supportsFiles: true,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 200000,
-      maxOutputTokens: 8192,
-      minCacheableTokens: 4096,
-      maxCacheBreakpoints: 4,
-    },
-    pricing: {
-      input: 5.0,
-      output: 25.0,
-      cacheWrite: 6.25,
-      cacheRead: 0.5,
-    },
-    contextWindow: 200000,
-    maxOutput: 8192,
-    features: {
-      thinking: true,
-      vision: true,
-      audio: false,
-      files: true,
-      cache: true,
-    },
-  },
-
-  'gpt-5.2': {
-    provider: 'openai-responses',
-    modelId: 'gpt-5.2',
-    displayName: 'GPT-5.2',
-    capabilities: {
-      supportsThinking: true,
-      supportsInterleavedThinking: false,
-      supportsImages: true,
-      supportsAudio: false,
-      supportsFiles: true,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 128000,
-      maxOutputTokens: 16384,
-      minCacheableTokens: 1024,
-    },
-    pricing: {
-      input: 5.0,
-      output: 15.0,
-    },
-    contextWindow: 128000,
-    maxOutput: 16384,
-    features: {
-      thinking: true,
-      vision: true,
-      audio: false,
-      files: true,
-      cache: true,
-    },
-  },
-
-  'gemini-3-pro': {
-    provider: 'gemini',
-    modelId: 'gemini-3-pro',
-    displayName: 'Gemini 3 Pro',
-    capabilities: {
-      supportsThinking: true,
-      supportsInterleavedThinking: false,
-      supportsImages: true,
-      supportsAudio: true,
-      supportsFiles: true,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 1000000,
-      maxOutputTokens: 8192,
-      minCacheableTokens: 4096,
-    },
-    pricing: {
-      input: 2.5,
-      output: 10.0,
-    },
-    contextWindow: 1000000,
-    maxOutput: 8192,
-    features: {
-      thinking: true,
-      vision: true,
-      audio: true,
-      files: true,
-      cache: true,
-    },
-  },
-
-  'deepseek-reasoner': {
-    provider: 'deepseek',
-    modelId: 'deepseek-reasoner',
-    displayName: 'DeepSeek Reasoner (V3.2)',
-    capabilities: {
-      supportsThinking: true,
-      supportsInterleavedThinking: false,
-      supportsImages: false,
-      supportsAudio: false,
-      supportsFiles: false,
-      supportsTools: true,
-      supportsStreaming: true,
-      supportsCache: true,
-      maxContextTokens: 64000,
-      maxOutputTokens: 64000,
-    },
-    pricing: {
-      input: 0.28,
-      output: 1.10,
-      cacheRead: 0.028,
-    },
-    contextWindow: 64000,
-    maxOutput: 64000,
-    features: {
-      thinking: true,
-      vision: false,
-      audio: false,
-      files: false,
-      cache: true,
-    },
-  },
-
-  // ... more models
-};
-
-export function getModelInfo(modelId: string): ModelInfo | undefined {
-  return MODEL_REGISTRY[modelId];
-}
-
-export function getModelsForProvider(provider: ProviderType): ModelInfo[] {
-  return Object.values(MODEL_REGISTRY).filter(m => m.provider === provider);
-}
-```
-
----
-
-## Appendix A: Provider Compatibility Matrix
-
-| Feature | Anthropic | OpenAI Chat | OpenAI Responses | Gemini 3 | DeepSeek | Qwen | GLM | Kimi | Minimax |
-|---------|-----------|-------------|------------------|----------|----------|------|-----|------|---------|
-| Thinking | Yes | No | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
-| Interleaved Thinking | Yes | No | No | No | No | No | No | No | Yes |
-| Thinking Signature | Yes | N/A | ID-based | Yes | N/A | N/A | N/A | N/A | N/A |
-| Images | Yes | Yes | Yes | Yes | No | Yes | Yes | No | No |
-| Audio | No | Yes | No | Yes | No | Yes | No | No | No |
-| Files API | Yes | No | Yes | Yes | No | No | No | No | No |
-| Prompt Cache | Explicit | Auto | Auto | Both | Auto | Explicit | No | Yes | No |
-| Cache Breakpoints | 4 | N/A | N/A | 1 | N/A | 1 | N/A | 1 | N/A |
-| Cache TTL | 5m/1h | 24h | 24h | Custom | Auto | Custom | N/A | N/A | N/A |
-| Min Cache Tokens | 1024-4096 | 1024 | 1024 | 2048 | 64 | 2048 | N/A | N/A | N/A |
-| Max Context | 200K | 128K | 128K | 1M | 64K | 32K | 200K | 256K | 32K |
-| Tool Calling | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
-| Parallel Tools | Yes | Yes | Yes | Yes | Yes | Yes | Yes | N/A | Yes |
-| Streaming | SSE | SSE | Semantic | SSE | SSE | SSE | SSE | SSE | SSE |
-
-## Appendix B: Multi-Turn Thinking Requirements
-
-| Provider | Thinking in History | Signature Required | Notes |
-|----------|---------------------|-------------------|-------|
-| Anthropic | Yes (with signature) | Yes | Claude Opus 4.5 preserves by default |
-| OpenAI Responses | Via ID | N/A | Use previous_response_id |
-| Gemini | Optional | Yes (Flash) | thoughtSignature for function calls |
-| DeepSeek | **NO** | N/A | Returns 400 if reasoning_content included |
-| Qwen | No | N/A | Similar to DeepSeek |
-| Minimax | Yes | N/A | Must preserve full response |
-
-## Appendix C: Pricing Reference (per 1M tokens, USD)
-
-| Provider | Model | Input | Output | Cache Write | Cache Read |
-|----------|-------|-------|--------|-------------|------------|
-| Anthropic | Opus 4.5 | $5.00 | $25.00 | $6.25 | $0.50 |
-| Anthropic | Sonnet 4.5 | $3.00 | $15.00 | $3.75 | $0.30 |
-| Anthropic | Haiku 4.5 | $1.00 | $5.00 | $1.25 | $0.10 |
-| OpenAI | GPT-5.2 | $5.00 | $15.00 | Auto | 75% off |
-| Gemini | 3 Pro | $2.50 | $10.00 | N/A | 75% off |
-| Gemini | 3 Flash | $0.075 | $0.30 | N/A | 75% off |
-| DeepSeek | Reasoner | $0.28 | $1.10 | N/A | $0.028 |
-| Qwen | 3 Max | $0.80 | $2.00 | N/A | Varies |
-
----
-
-## Summary
-
-This architecture provides:
-
-1. **Unified Type System**: Anthropic-style ContentBlocks as canonical format
-2. **Provider-Specific Options**: No pseudo-abstractions, each provider has typed options
-3. **Usage Statistics**: Normalized across all providers with cache metrics
-4. **Error Hierarchy**: Typed errors with retry logic
-5. **Logging/Audit**: Comprehensive logging with audit trail
-6. **Resume/Fork Support**: Provider-aware message preparation
-7. **Pure Transformations**: Testable conversion functions
-8. **Registry Pattern**: Centralized model information
-
-Key design decisions:
-- DeepSeek/Qwen: Must NOT include reasoning_content in history (returns 400)
-- Anthropic: Preserve thinking signatures for multi-turn
-- OpenAI Responses: Use previous_response_id for state
-- Gemini: Use thinkingLevel (not thinkingBudget) for 3.x models
-- Cache strategies vary significantly by provider
diff --git a/docs/quickstart.md b/docs/quickstart.md
deleted file mode 100644
index 30362df..0000000
--- a/docs/quickstart.md
+++ /dev/null
@@ -1,215 +0,0 @@
-# Quickstart：10 分钟搭建事件驱动 Agent 收件箱
-
-本文演示如何快速完成从“依赖启动 → Agent 创建 → 事件推送 → 审批回调 → Resume”的闭环。示例使用 Node.js + Express，但同样适用于 Next.js、Fastify、NestJS 等框架。
-
-> 所有代码均基于 `@kode/sdk` v2.7。目录结构与示例可参考 `examples/01-agent-inbox.ts` 与 `examples/nextjs-api-route.ts`。
-
-> **环境变量**：示例默认直连 Anthropic。请预先设置 `ANTHROPIC_API_KEY`（或 `ANTHROPIC_API_TOKEN`），可选配置 `ANTHROPIC_BASE_URL` 与 `ANTHROPIC_MODEL_ID`（默认 `claude-sonnet-4.5-20250929`）。缺失密钥时示例会直接报错，防止误用 Mock 数据。
-
----
-
-## 1. 初始化依赖容器
-
-```typescript
-// bootstrap/runtime.ts
-import { createRuntime } from '../examples/shared/runtime';
-
-const modelId = process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4.5-20250929';
-
-export const deps = createRuntime(({ templates, registerBuiltin }) => {
-  registerBuiltin('fs', 'bash', 'todo');
-
-  templates.register({
-    id: 'repo-assistant',
-    systemPrompt: 'You are the repo teammate. Always reason step-by-step.',
-    tools: ['fs_read', 'fs_write', 'fs_edit', 'bash_run', 'todo_read', 'todo_write'],
-    model: modelId,
-    runtime: {
-      todo: { enabled: true, reminderOnStart: true, remindIntervalSteps: 20 },
-      metadata: { exposeThinking: false },
-    },
-  });
-});
-```
-
-`createRuntime` 会自动注入 JSONStore、SandboxFactory、ToolRegistry，并使用 `.env` 中的 Anthropic 配置构建模型 Provider。
-> 提示：该 helper 示例位于仓库的 `examples/shared/runtime.ts`，可复制到你的项目中使用。
-
----
-
-## 2. Resume or Create Agent
-
-```typescript
-// bootstrap/agents.ts
-import { Agent, AgentConfig } from '@kode/sdk';
-import { createDependencies } from './dependencies';
-
-const templateId = 'repo-assistant';
-
-export async function resumeOrCreate(agentId: string, overrides?: Partial<AgentConfig>) {
-  const exists = await deps.store.exists(agentId);
-  if (exists) {
-    return Agent.resumeFromStore(agentId, deps, { overrides });
-  }
-
-  const base: AgentConfig = {
-    agentId,
-    templateId,
-    sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
-  };
-
-  return Agent.create({ ...base, ...overrides }, deps);
-}
-```
-
----
-
-## 3. Progress → 前端（SSE/WebSocket）
-
-```typescript
-// api/agents/[id]/stream.ts (Express 版本)
-import express from 'express';
-import { resumeOrCreate } from '../bootstrap/agents';
-
-export const router = express.Router();
-
-router.get('/:agentId/stream', async (req, res) => {
-  const agentId = req.params.agentId;
-  const agent = await resumeOrCreate(agentId);
-
-  res.setHeader('Content-Type', 'text/event-stream');
-  res.setHeader('Cache-Control', 'no-cache');
-  res.flushHeaders();
-
-  const iterator = agent.subscribe(['progress', 'monitor'], {
-    since: req.query.since ? { seq: Number(req.query.since), timestamp: Date.now() } : undefined,
-  })[Symbol.asyncIterator]();
-
-  (async () => {
-    for await (const envelope of { [Symbol.asyncIterator]: () => iterator }) {
-      res.write(`data: ${JSON.stringify(envelope)}\n\n`);
-    }
-  })().catch((error) => {
-    console.error('stream error', error);
-    res.end();
-  });
-});
-```
-
-前端即可用 `EventSource` / `WebSocket` 订阅数据面事件。
-
----
-
-## 4. Control → 审批服务
-
-```typescript
-// api/agents/[id]/control.ts
-router.post('/:agentId/decision', async (req, res) => {
-  const { agentId } = req.params;
-  const { callId, decision, note } = req.body; // 由审批 UI 提交
-
-  const agent = await resumeOrCreate(agentId);
-  await agent.decide(callId, decision, note);
-
-  res.status(204).end();
-});
-
-async function bindControl(agent: Agent) {
-  agent.on('permission_required', (event) => {
-    // 推送到审批系统（webhook/bus），或写入数据库待审批
-    enqueueApprovalTask({
-      agentId: agent.agentId,
-      callId: event.call.id,
-      tool: event.call.name,
-      inputPreview: event.call.inputPreview,
-      note: event.respond.toString(),
-    });
-  });
-}
-```
-
-`permission_required` 回调只会在必要时触发，配合策略/Hook 可细化审批逻辑。
-
----
-
-## 5. 发送消息 & 断点续播
-
-```typescript
-router.post('/:agentId/messages', async (req, res) => {
-  const { agentId } = req.params;
-  const { text } = req.body;
-
-  const agent = await resumeOrCreate(agentId);
-  await agent.send(text);
-
-  res.status(202).json({ status: 'queued' });
-});
-
-// Progress 流中的 `bookmark` 可写入数据库，前端断线后携带 ?since=cursor 续播。
-```
-
----
-
-## 6. Monitor → 告警/审计
-
-```typescript
-async function bindMonitor(agent: Agent) {
-  agent.on('error', (event) => {
-    logger.warn({ agentId: agent.agentId, phase: event.phase, detail: event.detail }, 'agent error');
-  });
-
-  agent.on('tool_executed', (event) => {
-    auditStore.write({
-      agentId: agent.agentId,
-      tool: event.call.name,
-      durationMs: event.call.durationMs,
-      approval: event.call.approval,
-    });
-  });
-}
-```
-
-Monitor 事件只在必要时推送，日志/告警系统可以聚合这些事件。
-
----
-
-## 7. Resume / Fork
-
-- 服务重启或实例迁移时，通过 `Agent.resumeFromStore` 恢复。
-- 如果希望“分叉”出新任务，调用 `agent.snapshot()` → `agent.fork()`，新的 Agent 会继承工具配置与 lineage。
-- `monitor.agent_resumed` 事件会告知自动封口的工具列表，可用于报表或人工确认。
-
-```typescript
-const forked = await agent.fork();
-await forked.send('这是分叉后的新任务，请从 snapshot 接着处理。');
-```
-
----
-
-## 8. 测试建议
-
-- 使用 `MockModelProvider`（自定义）或 `AnthropicProvider` 的测试 key 做集成测试。
-- 针对审批流程模拟 `permission_required` → `decide` 的正反用例。
-- 断线重连：模拟 SSE 中断后继续携带 `since`。
-- 恢复测试：`snapshot → Agent.resumeFromStore → agent.status()`，确认断点与工具记录完整。
-
----
-
-## 9. 常见问题排查
-
-| 现象 | 排查建议 |
-| --- | --- |
-| Resume 报模板缺失 | 确认服务启动时已注册模板，并与 metadata 中的 `templateId` 一致。|
-| 工具未找到 | ToolRegistry 未注册对应名称。请确保注册工厂返回 `ToolInstance`。|
-| 事件流无输出 | 检查是否调用了 `agent.send`；确认前端 SSE 连接未被代理裁剪。|
-| 提醒过多 | 调整模板的 `runtime.todo.remindIntervalSteps` 或使用 Hook 抑制提醒。|
-| Bash 工具被拒绝 | `LocalSandbox` 默认阻止危险命令，可通过模板 overrides 放宽 `allowPaths` 或自定义 Sandbox。|
-
----
-
-完成上述步骤，你已经拥有一个"协作收件箱"级别的 Agent 服务。接下来可以继续阅读：
-
-- [`docs/playbooks.md`](./playbooks.md)：针对审批、团队协作、调度的进阶脚本。
-- [`docs/events.md`](./events.md)：三通道事件流的心智模型与最佳实践。
-- [`docs/tools.md`](./tools.md)：如何扩展自定义工具、接入 MCP。
-- [`docs/skills.md`](./skills.md)：Skills系统，支持模块化、可重用的能力单元。
diff --git a/docs/resume.md b/docs/resume.md
deleted file mode 100644
index 8d96a88..0000000
--- a/docs/resume.md
+++ /dev/null
@@ -1,119 +0,0 @@
-# Resume / Fork 指南
-
-长时运行的 Agent 必须具备“随时恢复、可分叉、可审计”的能力。KODE SDK 在内核层实现了统一的持久化协议（消息、工具调用、Todo、事件、断点、Lineage），业务侧只需正确注入依赖并重绑事件即可。
-
----
-
-## 关键概念
-
-- **Metadata**：`persistInfo()` 会序列化模板、工具描述符、权限、Todo、沙箱配置、上下文策略、断点、lineage 等信息写入 Store。
-- **Safe-Fork-Point (SFP)**：每次用户消息或工具结果都会形成可恢复节点，`snapshot`/`fork` 都基于 SFP。
-- **BreakpointState**：标记当前执行阶段（`READY` → `PRE_MODEL` → ... → `POST_TOOL`），Resume 时用于自愈与治理事件。
-- **Auto-Seal**：当崩溃或中断发生在工具执行阶段，Resume 时会自动封口，落下一条 `tool_result`，并通过 `monitor.agent_resumed` 报告。
-
----
-
-## Resume 的两种方式
-
-```typescript
-import { Agent, AgentConfig } from '@kode/sdk';
-import { createDependencies } from '../bootstrap/dependencies';
-
-const deps = createDependencies();
-
-// 方式一：显式配置
-const agent = await Agent.resume('agt-demo', {
-  templateId: 'repo-assistant',
-  modelConfig: { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022', apiKey: process.env.ANTHROPIC_API_KEY! },
-  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
-}, deps, {
-  strategy: 'crash',  // 自动封口未完成工具
-  autoRun: true,      // 恢复后继续处理队列
-});
-
-// 方式二：读取 metadata（推荐）
-const agent2 = await Agent.resumeFromStore('agt-demo', deps, {
-  overrides: {
-    modelConfig: { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022', apiKey: process.env.ANTHROPIC_API_KEY! },
-  },
-});
-```
-
-- `strategy: 'manual' | 'crash'`：`crash` 会封口未完成工具并触发 `monitor.agent_resumed`。
-- `autoRun`：恢复后立即继续处理消息队列。
-- `overrides`：对 metadata 进行最小化覆盖（模型升级、权限调整、沙箱迁移等）。
-
-Resume 后**必须**重新绑定事件监听（Control/Monitor 回调不会自动恢复）。
-
----
-
-## 业务 vs SDK 的职责分界
-
-| 能力 | SDK | 业务方 |
-| --- | --- | --- |
-| 模板、工具、沙箱恢复 | ✅ 自动重建 | ❌ 无需处理 |
-| 消息、工具记录、Todo、Lineage | ✅ 自动加载 | ❌ |
-| FilePool 监听 | ✅ 自动恢复（需支持 `sandbox.watchFiles`） | ❌ |
-| Hooks | ✅ 自动重新注册 | ❌ |
-| Control/Monitor 监听 | ❌ | ✅ Resume 后需重新绑定 |
-| 审批流程、告警 | ❌ | ✅ 结合业务系统处理 |
-| 依赖单例管理 | ❌ | ✅ 确保 `store` / `registry` 全局复用 |
-
----
-
-## Safe-Fork-Point 与分叉
-
-```typescript
-const bookmarkId = await agent.snapshot('pre-release-audit');
-const forked = await agent.fork(bookmarkId);
-
-await forked.send('这是一个基于原对话分叉出的新任务。');
-```
-
-- `snapshot(label?)` 返回 `SnapshotId`（默认为 `sfp-{index}`）。
-- `fork(sel?)` 创建新 Agent：继承工具/权限配置与 lineage，并把消息复制到新 Store 命名空间。
-- 分叉后的 Agent 需要独立绑定事件监听。
-
----
-
-## 自动封口（Auto-Seal）
-
-当崩溃发生在以下阶段，Resume 会自动写入补偿性的 `tool_result`：
-
-| 阶段 | 封口信息 | 推荐处理 |
-| --- | --- | --- |
-| `PENDING` | 工具尚未执行 | 验证参数后重新触发工具。|
-| `APPROVAL_REQUIRED` | 等待审批 | 再次触发审批或手动完成审批。|
-| `APPROVED` | 准备执行 | 确认输入仍然有效后重试。|
-| `EXECUTING` | 执行中断 | 检查副作用，必要时人工确认再重试。|
-
-封口会触发：
-
-- `monitor.agent_resumed`：包含 `sealed` 列表与 `strategy`。
-- `progress.tool:end`：补上一条失败的 `tool_result`，附带 `recommendations`。
-
----
-
-## 多实例 / Serverless 环境建议
-
-1. **依赖单例**：在模块级创建 `AgentDependencies`，避免多个实例写入同一 Store 目录。
-2. **事件重绑**：每次 `resume` 后立刻调用 `bindProgress/Control/Monitor`。
-3. **并发控制**：同一个 AgentId 最好只在单实例中运行，可通过外部锁或队列保证。
-4. **持久化目录**：`JSONStore` 适用于单机/有共享磁盘环境。分布式部署请实现自定义 Store（例如 S3 + DynamoDB）。
-5. **可观测性**：监听 `monitor.state_changed` 与 `monitor.error`，在异常时迅速定位。
-
----
-
-## 常见问题排查
-
-| 现象 | 排查方向 |
-| --- | --- |
-| Resume 报 `AGENT_NOT_FOUND` | Store 目录缺失或未持久化。确认 `store.baseDir` 是否正确挂载。|
-| Resume 报 `TEMPLATE_NOT_FOUND` | 启动时未注册模板；确保模板 ID 与 metadata 中一致。|
-| 工具缺失 | ToolRegistry 未注册对应名称；内置工具需手动注册。|
-| FilePool 未恢复 | 自定义 Sandbox 未实现 `watchFiles`；可关闭 watch 或补齐实现。|
-| 事件监听失效 | Resume 后未重新调用 `agent.on(...)` 绑定。|
-
----
-
-掌握 Resume/Fork 心智后，就可以构建“永不断线”的 Agent 服务：随时恢复、随时分叉、随时审计。
diff --git a/docs/scenarios/cli-tools.md b/docs/scenarios/cli-tools.md
deleted file mode 100644
index ac1113a..0000000
--- a/docs/scenarios/cli-tools.md
+++ /dev/null
@@ -1,352 +0,0 @@
-# Scenario: CLI Agent Tools
-
-> Build command-line AI assistants like Claude Code, Cursor, or custom developer tools.
-
----
-
-## Why CLI is Perfect for KODE SDK
-
-| Feature | Benefit |
-|---------|---------|
-| Single process | No distributed complexity |
-| Local filesystem | JSONStore works perfectly |
-| Long-running | Agent loops run naturally |
-| Single user | No multi-tenancy needed |
-
-**Compatibility: 100%** - This is KODE SDK's sweet spot.
-
----
-
-## Quick Start: Minimal CLI Agent
-
-```typescript
-// cli-agent.ts
-import { Agent, AnthropicProvider, LocalSandbox } from '@anthropic/kode-sdk';
-import * as readline from 'readline';
-
-async function main() {
-  // Create agent with local persistence
-  const agent = await Agent.create({
-    agentId: 'cli-assistant',
-    template: {
-      systemPrompt: `You are a helpful CLI assistant.
-You can execute bash commands and help with file operations.`,
-    },
-    deps: {
-      modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-      sandbox: new LocalSandbox({ workDir: process.cwd() }),
-    },
-  });
-
-  // Stream output to terminal
-  agent.subscribeProgress({ kinds: ['text_chunk'] }, (event) => {
-    process.stdout.write(event.text);
-  });
-
-  // Show tool execution
-  agent.subscribeProgress({ kinds: ['tool:start', 'tool:complete'] }, (event) => {
-    if (event.kind === 'tool:start') {
-      console.log(`\n[Running: ${event.name}]`);
-    }
-  });
-
-  // Interactive loop
-  const rl = readline.createInterface({
-    input: process.stdin,
-    output: process.stdout,
-  });
-
-  console.log('CLI Agent ready. Type your message (Ctrl+C to exit)\n');
-
-  const askQuestion = () => {
-    rl.question('You: ', async (input) => {
-      if (input.trim()) {
-        console.log('\nAssistant: ');
-        await agent.chat(input);
-        console.log('\n');
-      }
-      askQuestion();
-    });
-  };
-
-  askQuestion();
-}
-
-main().catch(console.error);
-```
-
-Run it:
-```bash
-npx ts-node cli-agent.ts
-```
-
----
-
-## Production CLI: Resume & Persistence
-
-For a production CLI tool, you want:
-1. **Session persistence** - Resume conversations across runs
-2. **Crash recovery** - Don't lose progress
-3. **Multiple sessions** - Switch between contexts
-
-```typescript
-// production-cli.ts
-import { Agent, AgentPool, AnthropicProvider, LocalSandbox, JSONStore } from '@anthropic/kode-sdk';
-import * as path from 'path';
-import * as os from 'os';
-
-// Store data in user's home directory
-const DATA_DIR = path.join(os.homedir(), '.my-cli-agent');
-const store = new JSONStore(DATA_DIR);
-
-async function getOrCreateAgent(sessionId: string): Promise<Agent> {
-  const pool = new AgentPool({ store, maxAgents: 10 });
-
-  // Try to resume existing session
-  try {
-    const agent = await pool.resume(sessionId, {
-      template: { systemPrompt: '...' },
-    }, {
-      modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-      sandbox: new LocalSandbox({ workDir: process.cwd() }),
-    });
-    console.log(`Resumed session: ${sessionId}`);
-    return agent;
-  } catch {
-    // Create new session
-    const agent = await pool.create(sessionId, {
-      template: { systemPrompt: '...' },
-    }, {
-      modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-      sandbox: new LocalSandbox({ workDir: process.cwd() }),
-    });
-    console.log(`Created new session: ${sessionId}`);
-    return agent;
-  }
-}
-
-// Usage
-const sessionId = process.argv[2] || 'default';
-const agent = await getOrCreateAgent(sessionId);
-```
-
----
-
-## Tool Approval Flow
-
-For dangerous operations, implement approval:
-
-```typescript
-import { PermissionMode } from '@anthropic/kode-sdk';
-
-const agent = await Agent.create({
-  agentId: 'safe-cli',
-  config: {
-    permission: {
-      mode: 'approval',  // Require approval for all tools
-      // Or custom mode:
-      // mode: 'custom',
-      // customMode: async (call, ctx) => {
-      //   if (call.name === 'bash_run') {
-      //     return { decision: 'ask' };  // Prompt user
-      //   }
-      //   return { decision: 'allow' };
-      // }
-    },
-  },
-  // ...
-});
-
-// Handle approval requests
-agent.subscribeControl((event) => {
-  if (event.kind === 'permission_required') {
-    console.log(`\nTool requires approval: ${event.toolName}`);
-    console.log(`Input: ${JSON.stringify(event.input, null, 2)}`);
-
-    const rl = readline.createInterface({
-      input: process.stdin,
-      output: process.stdout,
-    });
-
-    rl.question('Approve? (y/n): ', (answer) => {
-      agent.approveToolUse(event.callId, answer.toLowerCase() === 'y');
-      rl.close();
-    });
-  }
-});
-```
-
----
-
-## Example: Developer Assistant CLI
-
-Complete example with file operations, git commands, and safety:
-
-```typescript
-// dev-assistant.ts
-import {
-  Agent,
-  AnthropicProvider,
-  LocalSandbox,
-  JSONStore,
-  defineSimpleTool,
-} from '@anthropic/kode-sdk';
-
-// Custom tools
-const gitStatusTool = defineSimpleTool({
-  name: 'git_status',
-  description: 'Check git repository status',
-  parameters: {},
-  execute: async () => {
-    const { execSync } = await import('child_process');
-    return execSync('git status --porcelain').toString();
-  },
-});
-
-const searchCodeTool = defineSimpleTool({
-  name: 'search_code',
-  description: 'Search for patterns in code files',
-  parameters: {
-    type: 'object',
-    properties: {
-      pattern: { type: 'string', description: 'Search pattern (regex)' },
-      fileType: { type: 'string', description: 'File extension (e.g., ts, js)' },
-    },
-    required: ['pattern'],
-  },
-  execute: async ({ pattern, fileType }) => {
-    const { execSync } = await import('child_process');
-    const glob = fileType ? `--include="*.${fileType}"` : '';
-    return execSync(`grep -r ${glob} "${pattern}" . 2>/dev/null || echo "No matches"`).toString();
-  },
-});
-
-async function main() {
-  const agent = await Agent.create({
-    agentId: 'dev-assistant',
-    template: {
-      systemPrompt: `You are a developer assistant.
-You help with:
-- Code navigation and search
-- Git operations
-- File management
-- Running tests and builds
-
-Always explain what you're doing before executing commands.
-Be cautious with destructive operations.`,
-      tools: [gitStatusTool, searchCodeTool],  // Add custom tools
-    },
-    config: {
-      permission: {
-        mode: 'auto',  // Auto-approve safe operations
-        autoApprove: ['git_status', 'search_code', 'file_read'],
-        requireApproval: ['bash_run', 'file_write', 'file_delete'],
-      },
-    },
-    deps: {
-      modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!, 'claude-sonnet-4-20250514'),
-      sandbox: new LocalSandbox({ workDir: process.cwd() }),
-      store: new JSONStore('./.dev-assistant'),
-    },
-  });
-
-  // ... rest of CLI implementation
-}
-```
-
----
-
-## Best Practices for CLI Agents
-
-### 1. Progress Indication
-
-```typescript
-// Show spinner during model calls
-agent.subscribeMonitor((event) => {
-  if (event.kind === 'model_start') {
-    process.stdout.write('Thinking...');
-  }
-  if (event.kind === 'model_complete') {
-    process.stdout.write('\r          \r');  // Clear spinner
-  }
-});
-```
-
-### 2. Graceful Shutdown
-
-```typescript
-// Handle Ctrl+C
-process.on('SIGINT', async () => {
-  console.log('\nSaving session...');
-  await agent.persistInfo();
-  process.exit(0);
-});
-
-process.on('SIGTERM', async () => {
-  await agent.persistInfo();
-  process.exit(0);
-});
-```
-
-### 3. Token Usage Tracking
-
-```typescript
-let totalTokens = 0;
-
-agent.subscribeMonitor((event) => {
-  if (event.kind === 'token_usage') {
-    totalTokens += event.inputTokens + event.outputTokens;
-    // Show in status bar or on exit
-  }
-});
-
-process.on('exit', () => {
-  console.log(`\nTotal tokens used: ${totalTokens}`);
-});
-```
-
-### 4. History Navigation
-
-```typescript
-// Show conversation history on start
-const messages = await agent.getMessages();
-console.log(`Session has ${messages.length} messages`);
-
-// Allow user to see recent context
-if (messages.length > 0) {
-  const last = messages[messages.length - 1];
-  console.log(`Last message: ${last.role}: ${last.content.slice(0, 100)}...`);
-}
-```
-
----
-
-## File Structure
-
-Recommended project structure for a CLI agent:
-
-```
-my-cli-agent/
-├── src/
-│   ├── index.ts          # Entry point
-│   ├── agent.ts          # Agent configuration
-│   ├── tools/            # Custom tools
-│   │   ├── git.ts
-│   │   ├── search.ts
-│   │   └── index.ts
-│   └── ui/               # Terminal UI
-│       ├── spinner.ts
-│       ├── prompt.ts
-│       └── colors.ts
-├── data/                 # Agent persistence (gitignored)
-├── package.json
-└── tsconfig.json
-```
-
----
-
-## Next Steps
-
-- See [Tools Guide](../tools.md) for building custom tools
-- See [Events Guide](../events.md) for advanced event handling
-- See [Playbooks](../playbooks.md) for common patterns
diff --git a/docs/scenarios/desktop-apps.md b/docs/scenarios/desktop-apps.md
deleted file mode 100644
index 0c9e96b..0000000
--- a/docs/scenarios/desktop-apps.md
+++ /dev/null
@@ -1,234 +0,0 @@
-# Scenario: Desktop Applications
-
-> Build Electron/Tauri apps with embedded AI agents.
-
----
-
-## Why Desktop is Perfect for KODE SDK
-
-| Feature | Benefit |
-|---------|---------|
-| Full filesystem access | JSONStore works natively |
-| Long-running process | Agent loops run without timeout |
-| Local resources | No network latency for persistence |
-| Single user | No multi-tenancy complexity |
-
-**Compatibility: 95%** - Minor adjustments for IPC.
-
----
-
-## Electron Integration
-
-### Main Process Setup
-
-```typescript
-// main/agent-service.ts
-import { Agent, AgentPool, AnthropicProvider, LocalSandbox, JSONStore } from '@anthropic/kode-sdk';
-import { app, ipcMain } from 'electron';
-import * as path from 'path';
-
-// Store data in app's user data directory
-const DATA_DIR = path.join(app.getPath('userData'), 'agents');
-const store = new JSONStore(DATA_DIR);
-const pool = new AgentPool({ store, maxAgents: 20 });
-
-// Create agent
-ipcMain.handle('agent:create', async (event, { agentId, systemPrompt }) => {
-  const agent = await pool.create(agentId, {
-    template: { systemPrompt },
-  }, {
-    modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-    sandbox: new LocalSandbox({ workDir: app.getPath('documents') }),
-  });
-
-  // Forward events to renderer
-  agent.subscribeProgress({ kinds: ['text_chunk', 'tool:start', 'tool:complete', 'done'] }, (event) => {
-    event.sender.send(`agent:progress:${agentId}`, event);
-  });
-
-  return { success: true, agentId };
-});
-
-// Send message
-ipcMain.handle('agent:chat', async (event, { agentId, message }) => {
-  const agent = pool.get(agentId);
-  if (!agent) throw new Error('Agent not found');
-
-  await agent.chat(message);
-  return { success: true };
-});
-
-// List agents
-ipcMain.handle('agent:list', async () => {
-  const agents = await store.listAgents();
-  return agents;
-});
-
-// Graceful shutdown
-app.on('before-quit', async (event) => {
-  event.preventDefault();
-  for (const [id, agent] of pool.agents) {
-    await agent.persistInfo();
-  }
-  app.quit();
-});
-```
-
-### Renderer Process (React)
-
-```typescript
-// renderer/hooks/useAgent.ts
-import { useState, useEffect, useCallback } from 'react';
-
-export function useAgent(agentId: string) {
-  const [messages, setMessages] = useState<Message[]>([]);
-  const [isProcessing, setIsProcessing] = useState(false);
-
-  useEffect(() => {
-    // Listen for progress events
-    const handler = (event: any, data: ProgressEvent) => {
-      if (data.kind === 'text_chunk') {
-        setMessages(prev => {
-          const last = prev[prev.length - 1];
-          if (last?.role === 'assistant') {
-            return [...prev.slice(0, -1), {
-              ...last,
-              content: last.content + data.text,
-            }];
-          }
-          return [...prev, { role: 'assistant', content: data.text }];
-        });
-      }
-      if (data.kind === 'done') {
-        setIsProcessing(false);
-      }
-    };
-
-    window.electron.on(`agent:progress:${agentId}`, handler);
-    return () => window.electron.off(`agent:progress:${agentId}`, handler);
-  }, [agentId]);
-
-  const sendMessage = useCallback(async (text: string) => {
-    setMessages(prev => [...prev, { role: 'user', content: text }]);
-    setIsProcessing(true);
-    await window.electron.invoke('agent:chat', { agentId, message: text });
-  }, [agentId]);
-
-  return { messages, isProcessing, sendMessage };
-}
-```
-
----
-
-## Tauri Integration
-
-```rust
-// src-tauri/src/main.rs
-use tauri::Manager;
-
-#[tauri::command]
-async fn create_agent(app: tauri::AppHandle, agent_id: String) -> Result<(), String> {
-    // Use sidecar process for Node.js agent runtime
-    let sidecar = app.shell()
-        .sidecar("agent-runtime")
-        .expect("failed to create sidecar");
-
-    sidecar.spawn().expect("failed to spawn sidecar");
-    Ok(())
-}
-```
-
-```typescript
-// agent-runtime/index.ts (sidecar)
-// Same KODE SDK code as Electron main process
-// Communicate via Tauri's shell commands
-```
-
----
-
-## Best Practices
-
-### 1. Data Directory
-
-```typescript
-// Cross-platform data directory
-import { app } from 'electron';
-
-const getDataDir = () => {
-  // macOS: ~/Library/Application Support/YourApp/agents
-  // Windows: %APPDATA%/YourApp/agents
-  // Linux: ~/.config/YourApp/agents
-  return path.join(app.getPath('userData'), 'agents');
-};
-```
-
-### 2. Workspace Integration
-
-```typescript
-// Let user choose workspace
-const workspace = await dialog.showOpenDialog({
-  properties: ['openDirectory'],
-  title: 'Select Agent Workspace',
-});
-
-const sandbox = new LocalSandbox({
-  workDir: workspace.filePaths[0],
-  allowedPaths: [workspace.filePaths[0]],  // Restrict to selected folder
-});
-```
-
-### 3. Auto-update Agents
-
-```typescript
-// On app update, migrate agent data if needed
-app.on('ready', async () => {
-  const version = app.getVersion();
-  const lastVersion = store.get('lastVersion');
-
-  if (lastVersion !== version) {
-    await migrateAgentData(lastVersion, version);
-    store.set('lastVersion', version);
-  }
-});
-```
-
----
-
-## Example: AI Writing Assistant
-
-```typescript
-// Complete desktop writing assistant
-const writingAssistant = await Agent.create({
-  agentId: 'writing-assistant',
-  template: {
-    systemPrompt: `You are a writing assistant embedded in a desktop app.
-You help users write, edit, and improve their documents.
-You can read and write files in the user's workspace.`,
-    tools: [
-      // Custom tool to interact with the editor
-      defineSimpleTool({
-        name: 'insert_text',
-        description: 'Insert text at cursor position in the editor',
-        parameters: {
-          type: 'object',
-          properties: {
-            text: { type: 'string' },
-            position: { type: 'number' },
-          },
-          required: ['text'],
-        },
-        execute: async ({ text, position }) => {
-          // Send to renderer via IPC
-          mainWindow.webContents.send('editor:insert', { text, position });
-          return 'Text inserted';
-        },
-      }),
-    ],
-  },
-  // ...
-});
-```
-
----
-
-See [CLI Tools Guide](./cli-tools.md) for more patterns that apply to desktop apps.
diff --git a/docs/scenarios/ide-plugins.md b/docs/scenarios/ide-plugins.md
deleted file mode 100644
index d27719c..0000000
--- a/docs/scenarios/ide-plugins.md
+++ /dev/null
@@ -1,359 +0,0 @@
-# Scenario: IDE Plugins
-
-> Build VSCode, JetBrains, or other IDE extensions with AI coding assistants.
-
----
-
-## Why IDE Plugins Work Well
-
-| Feature | Benefit |
-|---------|---------|
-| Extension host process | Long-running, like desktop |
-| workspace.fs API | File operations available |
-| Single user context | No multi-tenancy |
-| Rich UI integration | WebView for chat, decorations for highlights |
-
-**Compatibility: 85%** - Requires workspace.fs integration.
-
----
-
-## VSCode Extension
-
-### Extension Activation
-
-```typescript
-// src/extension.ts
-import * as vscode from 'vscode';
-import { Agent, AgentPool, AnthropicProvider } from '@anthropic/kode-sdk';
-import { VSCodeSandbox } from './vscode-sandbox';
-import { VSCodeStore } from './vscode-store';
-
-let pool: AgentPool;
-
-export async function activate(context: vscode.ExtensionContext) {
-  // Store in extension's global storage
-  const store = new VSCodeStore(context.globalStorageUri);
-  pool = new AgentPool({ store, maxAgents: 5 });
-
-  // Register commands
-  context.subscriptions.push(
-    vscode.commands.registerCommand('myExtension.startChat', startChat),
-    vscode.commands.registerCommand('myExtension.explainCode', explainCode),
-    vscode.commands.registerCommand('myExtension.refactor', refactorCode),
-  );
-
-  // Create chat webview panel provider
-  context.subscriptions.push(
-    vscode.window.registerWebviewViewProvider('myExtension.chatView', new ChatViewProvider(pool))
-  );
-}
-
-export async function deactivate() {
-  // Save all agents before deactivation
-  for (const [id, agent] of pool.agents) {
-    await agent.persistInfo();
-  }
-}
-```
-
-### VSCode-Specific Sandbox
-
-```typescript
-// src/vscode-sandbox.ts
-import * as vscode from 'vscode';
-import { Sandbox, SandboxConfig } from '@anthropic/kode-sdk';
-
-export class VSCodeSandbox implements Sandbox {
-  private workspaceFolder: vscode.WorkspaceFolder;
-
-  constructor(workspaceFolder: vscode.WorkspaceFolder) {
-    this.workspaceFolder = workspaceFolder;
-  }
-
-  async readFile(relativePath: string): Promise<string> {
-    const uri = vscode.Uri.joinPath(this.workspaceFolder.uri, relativePath);
-    const content = await vscode.workspace.fs.readFile(uri);
-    return new TextDecoder().decode(content);
-  }
-
-  async writeFile(relativePath: string, content: string): Promise<void> {
-    const uri = vscode.Uri.joinPath(this.workspaceFolder.uri, relativePath);
-    await vscode.workspace.fs.writeFile(uri, new TextEncoder().encode(content));
-  }
-
-  async listFiles(pattern: string): Promise<string[]> {
-    const files = await vscode.workspace.findFiles(pattern);
-    return files.map(f => vscode.workspace.asRelativePath(f));
-  }
-
-  async executeCommand(command: string): Promise<{ stdout: string; stderr: string }> {
-    // Use VSCode's terminal API for command execution
-    const terminal = vscode.window.createTerminal({
-      name: 'Agent Command',
-      cwd: this.workspaceFolder.uri,
-    });
-
-    // Note: VSCode terminal doesn't return output directly
-    // Consider using child_process if extension has Node.js access
-    terminal.sendText(command);
-
-    return { stdout: 'Command sent to terminal', stderr: '' };
-  }
-}
-```
-
-### VSCode-Specific Store
-
-```typescript
-// src/vscode-store.ts
-import * as vscode from 'vscode';
-import { Store, Message, AgentInfo } from '@anthropic/kode-sdk';
-
-export class VSCodeStore implements Store {
-  constructor(private storageUri: vscode.Uri) {}
-
-  private getAgentUri(agentId: string, file: string): vscode.Uri {
-    return vscode.Uri.joinPath(this.storageUri, agentId, file);
-  }
-
-  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
-    const uri = this.getAgentUri(agentId, 'messages.json');
-    const content = JSON.stringify(messages, null, 2);
-    await vscode.workspace.fs.writeFile(uri, new TextEncoder().encode(content));
-  }
-
-  async loadMessages(agentId: string): Promise<Message[]> {
-    try {
-      const uri = this.getAgentUri(agentId, 'messages.json');
-      const content = await vscode.workspace.fs.readFile(uri);
-      return JSON.parse(new TextDecoder().decode(content));
-    } catch {
-      return [];
-    }
-  }
-
-  // ... implement other Store methods
-}
-```
-
-### Chat WebView
-
-```typescript
-// src/chat-view-provider.ts
-import * as vscode from 'vscode';
-
-export class ChatViewProvider implements vscode.WebviewViewProvider {
-  constructor(private pool: AgentPool) {}
-
-  resolveWebviewView(webviewView: vscode.WebviewView) {
-    webviewView.webview.options = { enableScripts: true };
-    webviewView.webview.html = this.getHtmlContent();
-
-    // Handle messages from webview
-    webviewView.webview.onDidReceiveMessage(async (message) => {
-      if (message.type === 'chat') {
-        const agent = await this.getOrCreateAgent();
-
-        // Stream responses to webview
-        agent.subscribeProgress({ kinds: ['text_chunk'] }, (event) => {
-          webviewView.webview.postMessage({
-            type: 'text_chunk',
-            text: event.text,
-          });
-        });
-
-        await agent.chat(message.text);
-
-        webviewView.webview.postMessage({ type: 'done' });
-      }
-    });
-  }
-
-  private getHtmlContent(): string {
-    return `<!DOCTYPE html>
-    <html>
-      <head>
-        <style>
-          /* Chat UI styles */
-        </style>
-      </head>
-      <body>
-        <div id="chat-container"></div>
-        <input id="input" type="text" placeholder="Ask about the code...">
-        <script>
-          const vscode = acquireVsCodeApi();
-
-          document.getElementById('input').addEventListener('keypress', (e) => {
-            if (e.key === 'Enter') {
-              vscode.postMessage({ type: 'chat', text: e.target.value });
-              e.target.value = '';
-            }
-          });
-
-          window.addEventListener('message', (e) => {
-            if (e.data.type === 'text_chunk') {
-              // Append to chat
-            }
-          });
-        </script>
-      </body>
-    </html>`;
-  }
-}
-```
-
----
-
-## Context-Aware Coding Assistant
-
-```typescript
-// Provide code context to the agent
-async function explainCode() {
-  const editor = vscode.window.activeTextEditor;
-  if (!editor) return;
-
-  const selection = editor.selection;
-  const selectedText = editor.document.getText(selection);
-  const fileName = editor.document.fileName;
-  const languageId = editor.document.languageId;
-
-  const agent = await getOrCreateAgent();
-
-  // Include file context
-  const context = `
-File: ${fileName}
-Language: ${languageId}
-
-Selected code:
-\`\`\`${languageId}
-${selectedText}
-\`\`\`
-`;
-
-  await agent.chat(`Explain this code:\n${context}`);
-}
-
-// Inline code actions
-async function refactorCode() {
-  const editor = vscode.window.activeTextEditor;
-  if (!editor) return;
-
-  const selection = editor.selection;
-  const selectedText = editor.document.getText(selection);
-
-  const agent = await getOrCreateAgent();
-
-  // Custom tool to apply edits
-  agent.registerTool({
-    name: 'apply_edit',
-    description: 'Replace the selected code with improved version',
-    parameters: {
-      type: 'object',
-      properties: {
-        newCode: { type: 'string', description: 'The refactored code' },
-      },
-      required: ['newCode'],
-    },
-    execute: async ({ newCode }) => {
-      await editor.edit(editBuilder => {
-        editBuilder.replace(selection, newCode);
-      });
-      return 'Code replaced successfully';
-    },
-  });
-
-  await agent.chat(`Refactor this code to be cleaner and more efficient:\n${selectedText}`);
-}
-```
-
----
-
-## JetBrains Plugin (Kotlin)
-
-```kotlin
-// For JetBrains, run KODE SDK as a sidecar Node.js process
-// and communicate via JSON-RPC or WebSocket
-
-class AgentService(private val project: Project) {
-    private var process: Process? = null
-
-    fun start() {
-        val nodeScript = PluginUtil.getPluginPath() + "/agent-runtime/index.js"
-        process = ProcessBuilder("node", nodeScript)
-            .directory(File(project.basePath))
-            .start()
-
-        // Read output
-        thread {
-            process?.inputStream?.bufferedReader()?.forEachLine { line ->
-                handleAgentOutput(line)
-            }
-        }
-    }
-
-    fun sendMessage(message: String) {
-        process?.outputStream?.let {
-            it.write("$message\n".toByteArray())
-            it.flush()
-        }
-    }
-}
-```
-
----
-
-## Best Practices for IDE Plugins
-
-### 1. Workspace-Scoped Agents
-
-```typescript
-// One agent per workspace
-function getAgentId(workspaceFolder: vscode.WorkspaceFolder): string {
-  return `workspace-${hashString(workspaceFolder.uri.toString())}`;
-}
-```
-
-### 2. Respect User Settings
-
-```typescript
-const config = vscode.workspace.getConfiguration('myExtension');
-const apiKey = config.get<string>('apiKey');
-const modelId = config.get<string>('model') || 'claude-sonnet-4-20250514';
-```
-
-### 3. Progress Indication
-
-```typescript
-await vscode.window.withProgress({
-  location: vscode.ProgressLocation.Notification,
-  title: 'AI Assistant',
-  cancellable: true,
-}, async (progress, token) => {
-  token.onCancellationRequested(() => {
-    agent.stop();
-  });
-
-  progress.report({ message: 'Thinking...' });
-  await agent.chat(message);
-});
-```
-
-### 4. Diagnostic Integration
-
-```typescript
-// Show agent suggestions as diagnostics
-const diagnosticCollection = vscode.languages.createDiagnosticCollection('ai-suggestions');
-
-agent.subscribeProgress({ kinds: ['suggestion'] }, (event) => {
-  const diagnostic = new vscode.Diagnostic(
-    new vscode.Range(event.line, 0, event.line, 100),
-    event.message,
-    vscode.DiagnosticSeverity.Information
-  );
-  diagnosticCollection.set(editor.document.uri, [diagnostic]);
-});
-```
-
----
-
-See [Desktop Apps Guide](./desktop-apps.md) for more patterns that apply to IDE plugins.
diff --git a/docs/scenarios/large-scale-toc.md b/docs/scenarios/large-scale-toc.md
deleted file mode 100644
index 3470b5e..0000000
--- a/docs/scenarios/large-scale-toc.md
+++ /dev/null
@@ -1,749 +0,0 @@
-# Scenario: Large-Scale ToC Applications
-
-> Build ChatGPT/Manus-like applications serving thousands of concurrent users with hundreds of agents each.
-
----
-
-## The Challenge
-
-Building a consumer-facing AI application at scale requires solving:
-
-| Challenge | Description |
-|-----------|-------------|
-| **High Concurrency** | 10K+ users, each with multiple agents |
-| **Agent Hibernation** | Inactive agents must sleep to save resources |
-| **Crash Recovery** | Server restart must restore all running agents |
-| **Fork Exploration** | Users fork agents to explore different paths |
-| **Multi-Machine** | Scale horizontally across servers |
-| **Serverless Frontend** | Deploy UI on Vercel/Cloudflare |
-
-**Direct KODE SDK Usage: Not Suitable**
-
-KODE SDK is designed as a runtime kernel, not a distributed platform. For large-scale ToC, you need the **Worker Microservice Pattern**.
-
----
-
-## Recommended Architecture
-
-```
-+------------------------------------------------------------------+
-|                        User Devices                               |
-+------------------------------------------------------------------+
-                               |
-                               v
-+------------------------------------------------------------------+
-|                    CDN / Edge (Cloudflare)                       |
-+------------------------------------------------------------------+
-                               |
-                               v
-+------------------------------------------------------------------+
-|                 API Gateway (Vercel/Cloudflare)                  |
-|                                                                   |
-|   /api/agents        - List user's agents                        |
-|   /api/agents/:id    - Get agent status                          |
-|   /api/chat          - Send message (enqueue)                    |
-|   /api/fork          - Fork agent (enqueue)                      |
-|   /api/stream/:id    - SSE stream (from Redis)                   |
-+------------------------------------------------------------------+
-                               |
-                               v
-+------------------------------------------------------------------+
-|                    Message Queue (Upstash Redis)                 |
-|                                                                   |
-|   Queue: agent:messages      - Chat messages                     |
-|   Queue: agent:commands      - Fork, hibernate, resume           |
-|   PubSub: agent:events:{id}  - Real-time events                  |
-+------------------------------------------------------------------+
-                               |
-              +----------------+----------------+
-              |                |                |
-              v                v                v
-+------------------+  +------------------+  +------------------+
-|   Worker Pool 1  |  |   Worker Pool 2  |  |   Worker Pool N  |
-|   (Railway)      |  |   (Railway)      |  |   (Railway)      |
-|                  |  |                  |  |                  |
-|   +----------+   |  |   +----------+   |  |   +----------+   |
-|   | KODE SDK |   |  |   | KODE SDK |   |  |   | KODE SDK |   |
-|   | Scheduler|   |  |   | Scheduler|   |  |   | Scheduler|   |
-|   +----------+   |  |   +----------+   |  |   +----------+   |
-|                  |  |                  |  |                  |
-|   Agents: 0-999  |  |  Agents: 1K-2K   |  |  Agents: 2K-3K   |
-+--------+---------+  +--------+---------+  +--------+---------+
-         |                     |                     |
-         +---------------------+---------------------+
-                               |
-                               v
-+------------------------------------------------------------------+
-|                    Distributed Store                              |
-|                                                                   |
-|   PostgreSQL (Supabase)     - Agent state, messages, metadata    |
-|   Redis Cluster             - Locks, sessions, hot cache         |
-|   S3/R2                     - File attachments, archives         |
-+------------------------------------------------------------------+
-```
-
----
-
-## Component Implementation
-
-### 1. API Layer (Serverless)
-
-```typescript
-// app/api/chat/route.ts (Next.js App Router)
-import { NextRequest } from 'next/server';
-import { Redis } from '@upstash/redis';
-import { createClient } from '@supabase/supabase-js';
-
-const redis = new Redis({ url: process.env.UPSTASH_URL!, token: process.env.UPSTASH_TOKEN! });
-const supabase = createClient(process.env.SUPABASE_URL!, process.env.SUPABASE_KEY!);
-
-export async function POST(req: NextRequest) {
-  // 1. Authenticate user
-  const user = await authenticateUser(req);
-  if (!user) {
-    return Response.json({ error: 'Unauthorized' }, { status: 401 });
-  }
-
-  // 2. Parse request
-  const { agentId, message } = await req.json();
-
-  // 3. Verify agent ownership
-  const { data: agent } = await supabase
-    .from('agents')
-    .select('id, user_id, state')
-    .eq('id', agentId)
-    .single();
-
-  if (!agent || agent.user_id !== user.id) {
-    return Response.json({ error: 'Agent not found' }, { status: 404 });
-  }
-
-  // 4. Create task and enqueue
-  const taskId = crypto.randomUUID();
-
-  await redis.lpush('agent:messages', JSON.stringify({
-    taskId,
-    agentId,
-    userId: user.id,
-    message,
-    timestamp: Date.now(),
-  }));
-
-  // 5. Update agent state
-  await supabase
-    .from('agents')
-    .update({ state: 'QUEUED', last_activity: new Date() })
-    .eq('id', agentId);
-
-  // 6. Return task ID for polling/streaming
-  return Response.json({
-    taskId,
-    streamUrl: `/api/stream/${taskId}`,
-  });
-}
-```
-
-### 2. SSE Stream Endpoint
-
-```typescript
-// app/api/stream/[taskId]/route.ts
-import { Redis } from '@upstash/redis';
-
-const redis = new Redis({ url: process.env.UPSTASH_URL!, token: process.env.UPSTASH_TOKEN! });
-
-export async function GET(
-  req: NextRequest,
-  { params }: { params: { taskId: string } }
-) {
-  const { taskId } = params;
-
-  // Create SSE stream
-  const stream = new ReadableStream({
-    async start(controller) {
-      const encoder = new TextEncoder();
-
-      // Subscribe to Redis PubSub
-      const subscriber = redis.duplicate();
-      await subscriber.subscribe(`task:${taskId}:events`, (message) => {
-        const event = JSON.parse(message);
-        controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`));
-
-        if (event.kind === 'done' || event.kind === 'error') {
-          controller.close();
-          subscriber.unsubscribe();
-        }
-      });
-
-      // Timeout after 5 minutes
-      setTimeout(() => {
-        controller.close();
-        subscriber.unsubscribe();
-      }, 5 * 60 * 1000);
-    },
-  });
-
-  return new Response(stream, {
-    headers: {
-      'Content-Type': 'text/event-stream',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-    },
-  });
-}
-```
-
-### 3. Worker Service
-
-```typescript
-// worker/index.ts
-import { Agent, AgentPool } from '@anthropic/kode-sdk';
-import { Redis } from 'ioredis';
-import { PostgresStore } from './postgres-store';
-import { AgentScheduler } from './scheduler';
-
-const redis = new Redis(process.env.REDIS_URL!);
-const store = new PostgresStore(process.env.DATABASE_URL!);
-
-// Scheduler manages agent lifecycle
-const scheduler = new AgentScheduler({
-  maxActiveAgents: 100,  // Per worker
-  idleTimeout: 5 * 60 * 1000,  // 5 minutes
-  store,
-});
-
-// Process message queue
-async function processMessages() {
-  while (true) {
-    // Blocking pop from queue
-    const result = await redis.brpop('agent:messages', 30);
-
-    if (!result) continue;
-
-    const task = JSON.parse(result[1]);
-
-    try {
-      // Get or resume agent
-      const agent = await scheduler.getOrResume(task.agentId);
-
-      // Subscribe to events and forward to Redis PubSub
-      agent.subscribeProgress({ kinds: ['text_chunk', 'tool:start', 'tool:complete', 'done'] }, async (event) => {
-        await redis.publish(`task:${task.taskId}:events`, JSON.stringify(event));
-      });
-
-      // Process message
-      await agent.chat(task.message);
-
-      // Publish completion
-      await redis.publish(`task:${task.taskId}:events`, JSON.stringify({
-        kind: 'done',
-        taskId: task.taskId,
-      }));
-
-    } catch (error) {
-      // Publish error
-      await redis.publish(`task:${task.taskId}:events`, JSON.stringify({
-        kind: 'error',
-        taskId: task.taskId,
-        error: error.message,
-      }));
-    }
-  }
-}
-
-// Start worker
-processMessages().catch(console.error);
-```
-
-### 4. Agent Scheduler
-
-```typescript
-// worker/scheduler.ts
-import { Agent } from '@anthropic/kode-sdk';
-import { LRUCache } from 'lru-cache';
-
-export class AgentScheduler {
-  private active: LRUCache<string, Agent>;
-  private store: PostgresStore;
-  private config: SchedulerConfig;
-
-  constructor(config: SchedulerConfig) {
-    this.config = config;
-    this.store = config.store;
-
-    this.active = new LRUCache({
-      max: config.maxActiveAgents,
-      dispose: (agent, agentId) => {
-        // Auto-hibernate when evicted
-        this.hibernate(agentId, agent);
-      },
-      ttl: config.idleTimeout,
-    });
-  }
-
-  async getOrResume(agentId: string): Promise<Agent> {
-    // Check active cache
-    if (this.active.has(agentId)) {
-      return this.active.get(agentId)!;
-    }
-
-    // Acquire distributed lock
-    const lockId = await this.store.acquireLock(agentId, 60000);
-    if (!lockId) {
-      throw new Error('Agent is being processed by another worker');
-    }
-
-    try {
-      // Resume from database
-      const agent = await Agent.resume(agentId, this.getConfig(agentId), this.getDeps());
-
-      // Cache in active pool
-      this.active.set(agentId, agent);
-
-      // Setup idle tracking
-      agent.onIdle(() => {
-        this.active.delete(agentId);  // Triggers dispose -> hibernate
-      });
-
-      return agent;
-
-    } finally {
-      await this.store.releaseLock(agentId, lockId);
-    }
-  }
-
-  private async hibernate(agentId: string, agent: Agent): Promise<void> {
-    try {
-      await agent.persistInfo();
-      await this.store.updateAgentState(agentId, 'HIBERNATED');
-      console.log(`Hibernated agent: ${agentId}`);
-    } catch (error) {
-      console.error(`Failed to hibernate ${agentId}:`, error);
-    }
-  }
-}
-```
-
-### 5. PostgreSQL Store
-
-```typescript
-// worker/postgres-store.ts
-import { Pool } from 'pg';
-import { Store, Message, AgentInfo, ToolCallRecord } from '@anthropic/kode-sdk';
-
-export class PostgresStore implements Store {
-  private pool: Pool;
-
-  constructor(connectionString: string) {
-    this.pool = new Pool({
-      connectionString,
-      max: 20,
-      idleTimeoutMillis: 30000,
-    });
-  }
-
-  // Distributed lock using PostgreSQL Advisory Locks
-  async acquireLock(agentId: string, ttlMs: number): Promise<string | null> {
-    const lockKey = this.hashAgentId(agentId);
-    const client = await this.pool.connect();
-
-    try {
-      const result = await client.query(
-        'SELECT pg_try_advisory_lock($1) as acquired',
-        [lockKey]
-      );
-
-      if (result.rows[0].acquired) {
-        const lockId = crypto.randomUUID();
-
-        // Set expiry using a separate table
-        await client.query(
-          `INSERT INTO agent_locks (agent_id, lock_id, expires_at)
-           VALUES ($1, $2, NOW() + interval '${ttlMs} milliseconds')
-           ON CONFLICT (agent_id) DO UPDATE SET lock_id = $2, expires_at = NOW() + interval '${ttlMs} milliseconds'`,
-          [agentId, lockId]
-        );
-
-        return lockId;
-      }
-
-      return null;
-    } finally {
-      client.release();
-    }
-  }
-
-  async releaseLock(agentId: string, lockId: string): Promise<void> {
-    const lockKey = this.hashAgentId(agentId);
-    const client = await this.pool.connect();
-
-    try {
-      // Verify lock ownership
-      const result = await client.query(
-        'SELECT lock_id FROM agent_locks WHERE agent_id = $1',
-        [agentId]
-      );
-
-      if (result.rows[0]?.lock_id === lockId) {
-        await client.query('SELECT pg_advisory_unlock($1)', [lockKey]);
-        await client.query('DELETE FROM agent_locks WHERE agent_id = $1', [agentId]);
-      }
-    } finally {
-      client.release();
-    }
-  }
-
-  // Messages stored as JSONB
-  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
-    await this.pool.query(
-      `INSERT INTO agent_messages (agent_id, messages, updated_at)
-       VALUES ($1, $2, NOW())
-       ON CONFLICT (agent_id) DO UPDATE SET messages = $2, updated_at = NOW()`,
-      [agentId, JSON.stringify(messages)]
-    );
-  }
-
-  async loadMessages(agentId: string): Promise<Message[]> {
-    const result = await this.pool.query(
-      'SELECT messages FROM agent_messages WHERE agent_id = $1',
-      [agentId]
-    );
-    return result.rows[0]?.messages || [];
-  }
-
-  // ... implement other Store methods
-}
-```
-
----
-
-## Database Schema
-
-```sql
--- Agents table
-CREATE TABLE agents (
-  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-  user_id UUID NOT NULL REFERENCES users(id),
-  name TEXT NOT NULL,
-  template_id TEXT NOT NULL,
-  state TEXT DEFAULT 'READY',
-  config JSONB DEFAULT '{}',
-  created_at TIMESTAMPTZ DEFAULT NOW(),
-  updated_at TIMESTAMPTZ DEFAULT NOW(),
-  last_activity TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE INDEX idx_agents_user ON agents(user_id);
-CREATE INDEX idx_agents_state ON agents(state);
-
--- Messages (one row per agent, JSONB array)
-CREATE TABLE agent_messages (
-  agent_id UUID PRIMARY KEY REFERENCES agents(id) ON DELETE CASCADE,
-  messages JSONB NOT NULL DEFAULT '[]',
-  version INTEGER DEFAULT 1,
-  updated_at TIMESTAMPTZ DEFAULT NOW()
-);
-
--- Tool call records
-CREATE TABLE tool_calls (
-  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-  agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
-  name TEXT NOT NULL,
-  input JSONB,
-  result JSONB,
-  state TEXT DEFAULT 'PENDING',
-  started_at TIMESTAMPTZ,
-  completed_at TIMESTAMPTZ
-);
-
-CREATE INDEX idx_tool_calls_agent ON tool_calls(agent_id);
-
--- Checkpoints (for fork)
-CREATE TABLE checkpoints (
-  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-  agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
-  parent_checkpoint_id UUID REFERENCES checkpoints(id),
-  snapshot JSONB NOT NULL,
-  tags TEXT[] DEFAULT '{}',
-  created_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE INDEX idx_checkpoints_agent ON checkpoints(agent_id);
-
--- Distributed locks
-CREATE TABLE agent_locks (
-  agent_id UUID PRIMARY KEY,
-  lock_id UUID NOT NULL,
-  worker_id TEXT,
-  expires_at TIMESTAMPTZ NOT NULL
-);
-
--- Row Level Security
-ALTER TABLE agents ENABLE ROW LEVEL SECURITY;
-
-CREATE POLICY "Users can only access own agents" ON agents
-  FOR ALL USING (auth.uid() = user_id);
-
-ALTER TABLE agent_messages ENABLE ROW LEVEL SECURITY;
-
-CREATE POLICY "Users can only access own agent messages" ON agent_messages
-  FOR ALL USING (
-    agent_id IN (SELECT id FROM agents WHERE user_id = auth.uid())
-  );
-```
-
----
-
-## Handling Special Scenarios
-
-### Agent Hibernation (Inactive Users)
-
-```typescript
-// Cron job: Check for idle agents every 5 minutes
-async function hibernateIdleAgents() {
-  const idleThreshold = new Date(Date.now() - 30 * 60 * 1000);  // 30 minutes
-
-  const { data: idleAgents } = await supabase
-    .from('agents')
-    .select('id')
-    .eq('state', 'ACTIVE')
-    .lt('last_activity', idleThreshold.toISOString());
-
-  for (const agent of idleAgents || []) {
-    await redis.lpush('agent:commands', JSON.stringify({
-      command: 'hibernate',
-      agentId: agent.id,
-    }));
-  }
-}
-```
-
-### Server Crash Recovery
-
-```typescript
-// On worker startup
-async function recoverFromCrash() {
-  // Find agents that were being processed by this worker
-  const { data: orphanedAgents } = await supabase
-    .from('agents')
-    .select('id')
-    .eq('state', 'PROCESSING')
-    .eq('worker_id', WORKER_ID);
-
-  for (const agent of orphanedAgents || []) {
-    console.log(`Recovering agent: ${agent.id}`);
-
-    // Resume with crash strategy
-    const recovered = await Agent.resume(agent.id, config, deps, {
-      strategy: 'crash',  // Auto-seal incomplete tool calls
-    });
-
-    // Re-queue for processing
-    await redis.lpush('agent:messages', JSON.stringify({
-      taskId: `recovery-${agent.id}`,
-      agentId: agent.id,
-      message: null,  // No new message, just recover
-      isRecovery: true,
-    }));
-  }
-}
-
-// Call on startup
-recoverFromCrash();
-```
-
-### Fork Multiple Agents
-
-```typescript
-// API endpoint for forking
-export async function POST(req: NextRequest) {
-  const { agentId, checkpointId, count = 1 } = await req.json();
-
-  // Validate: max 10 forks at once
-  if (count > 10) {
-    return Response.json({ error: 'Max 10 forks at once' }, { status: 400 });
-  }
-
-  const forkIds: string[] = [];
-
-  for (let i = 0; i < count; i++) {
-    const forkId = `${agentId}-fork-${Date.now()}-${i}`;
-
-    await redis.lpush('agent:commands', JSON.stringify({
-      command: 'fork',
-      agentId,
-      checkpointId,
-      forkId,
-    }));
-
-    forkIds.push(forkId);
-  }
-
-  return Response.json({ forkIds });
-}
-
-// Worker handles fork command
-async function handleForkCommand(command: ForkCommand) {
-  const parent = await scheduler.getOrResume(command.agentId);
-
-  const forked = await parent.fork(command.checkpointId);
-
-  // Store forked agent
-  await supabase.from('agents').insert({
-    id: command.forkId,
-    user_id: parent.userId,
-    parent_agent_id: command.agentId,
-    // ... copy other fields
-  });
-}
-```
-
-### Membership Expiry
-
-```typescript
-// Webhook from payment provider
-export async function POST(req: NextRequest) {
-  const event = await req.json();
-
-  if (event.type === 'subscription.cancelled') {
-    const userId = event.data.user_id;
-
-    // Pause all user's agents
-    await supabase
-      .from('agents')
-      .update({ state: 'MEMBERSHIP_PAUSED' })
-      .eq('user_id', userId);
-
-    // Hibernate any active agents
-    const { data: activeAgents } = await supabase
-      .from('agents')
-      .select('id')
-      .eq('user_id', userId)
-      .eq('state', 'ACTIVE');
-
-    for (const agent of activeAgents || []) {
-      await redis.lpush('agent:commands', JSON.stringify({
-        command: 'hibernate',
-        agentId: agent.id,
-        reason: 'membership_expired',
-      }));
-    }
-  }
-
-  if (event.type === 'subscription.renewed') {
-    const userId = event.data.user_id;
-
-    // Unpause all agents
-    await supabase
-      .from('agents')
-      .update({ state: 'HIBERNATED' })
-      .eq('user_id', userId)
-      .eq('state', 'MEMBERSHIP_PAUSED');
-  }
-}
-```
-
----
-
-## Performance Considerations
-
-### Message Storage Optimization
-
-```typescript
-// Instead of storing full messages array
-// Use append-only log with periodic compaction
-
-class OptimizedMessageStore {
-  async appendMessage(agentId: string, message: Message) {
-    // Append to log table (fast)
-    await this.pool.query(
-      `INSERT INTO message_log (agent_id, seq, message)
-       VALUES ($1, nextval('message_seq'), $2)`,
-      [agentId, JSON.stringify(message)]
-    );
-
-    // Increment message count
-    await this.pool.query(
-      `UPDATE agents SET message_count = message_count + 1 WHERE id = $1`,
-      [agentId]
-    );
-  }
-
-  async loadMessages(agentId: string, limit = 100): Promise<Message[]> {
-    // Load latest messages (pagination)
-    const result = await this.pool.query(
-      `SELECT message FROM message_log
-       WHERE agent_id = $1
-       ORDER BY seq DESC
-       LIMIT $2`,
-      [agentId, limit]
-    );
-
-    return result.rows.reverse().map(r => r.message);
-  }
-}
-```
-
-### Fork Optimization (Copy-on-Write)
-
-```typescript
-// Fork without copying all messages
-async function forkAgentCOW(agentId: string, checkpointId: string): Promise<string> {
-  const forkId = generateForkId();
-
-  // Copy only metadata, reference same message log
-  await this.pool.query(
-    `INSERT INTO agents (id, user_id, template_id, config, fork_base_checkpoint_id)
-     SELECT $1, user_id, template_id, config, $2
-     FROM agents WHERE id = $3`,
-    [forkId, checkpointId, agentId]
-  );
-
-  // New messages go to fork's own log
-  // Old messages read from checkpoint reference
-
-  return forkId;
-}
-```
-
----
-
-## Deployment Checklist
-
-- [ ] API layer deployed to Vercel/Cloudflare
-- [ ] Workers deployed to Railway/Render/Fly.io
-- [ ] PostgreSQL (Supabase) configured with RLS
-- [ ] Redis (Upstash) for queues and pub/sub
-- [ ] S3/R2 for file attachments
-- [ ] Monitoring (Sentry, DataDog, etc.)
-- [ ] Rate limiting configured
-- [ ] Graceful shutdown handlers
-- [ ] Health check endpoints
-- [ ] Auto-scaling rules for workers
-
----
-
-## Cost Estimation
-
-| Component | ~10K Users | ~100K Users |
-|-----------|------------|-------------|
-| Vercel (API) | $20/mo | $100/mo |
-| Railway (Workers) | $50/mo | $500/mo |
-| Supabase (PostgreSQL) | $25/mo | $100/mo |
-| Upstash (Redis) | $10/mo | $50/mo |
-| **Total** | **~$100/mo** | **~$750/mo** |
-
-*Excludes LLM API costs*
-
----
-
-## Summary
-
-Building a large-scale ToC application with KODE SDK requires:
-
-1. **Separate concerns**: Stateless API + Stateful workers
-2. **Queue-based communication**: Decouple request handling from agent execution
-3. **Distributed store**: PostgreSQL for persistence, Redis for real-time
-4. **Agent scheduling**: LRU cache for active agents, hibernate inactive
-5. **Crash recovery**: WAL + checkpoint for resilience
-
-KODE SDK provides the agent runtime kernel. You build the platform around it.
diff --git a/docs/scenarios/web-backend.md b/docs/scenarios/web-backend.md
deleted file mode 100644
index 3b81000..0000000
--- a/docs/scenarios/web-backend.md
+++ /dev/null
@@ -1,444 +0,0 @@
-# Scenario: Web Backend (Self-Hosted)
-
-> Deploy KODE SDK on your own servers for small to medium web applications.
-
----
-
-## When to Use This Pattern
-
-| Criteria | Threshold |
-|----------|-----------|
-| Concurrent users | < 1,000 |
-| Concurrent agents | < 100 |
-| Infrastructure | Single server / small cluster |
-| Complexity | Moderate |
-
-**Compatibility: 80%** - Need to add HTTP layer and user isolation.
-
----
-
-## Architecture
-
-```
-+------------------------------------------------------------------+
-|                       Your Server                                 |
-+------------------------------------------------------------------+
-|                                                                   |
-|   +------------------+     +------------------+                   |
-|   |   HTTP Layer     |     |   KODE SDK       |                   |
-|   |   (Express/Hono) |---->|   AgentPool      |                   |
-|   +------------------+     +------------------+                   |
-|                                   |                               |
-|   +------------------+     +------v------+                        |
-|   |   Auth Layer     |     |    Store    |                        |
-|   |   (Passport/etc) |     | (Redis/PG)  |                        |
-|   +------------------+     +-------------+                        |
-|                                                                   |
-+------------------------------------------------------------------+
-```
-
----
-
-## Express.js Integration
-
-```typescript
-// server.ts
-import express from 'express';
-import { Agent, AgentPool, AnthropicProvider, LocalSandbox } from '@anthropic/kode-sdk';
-import { RedisStore } from './redis-store';  // Custom implementation
-
-const app = express();
-const store = new RedisStore(process.env.REDIS_URL!);
-const pool = new AgentPool({ store, maxAgents: 100 });
-
-app.use(express.json());
-
-// Middleware: Auth
-app.use(async (req, res, next) => {
-  const token = req.headers.authorization?.replace('Bearer ', '');
-  req.user = await verifyToken(token);
-  next();
-});
-
-// Create agent for user
-app.post('/api/agents', async (req, res) => {
-  const { name, systemPrompt } = req.body;
-  const agentId = `${req.user.id}-${Date.now()}`;
-
-  const agent = await pool.create(agentId, {
-    template: { systemPrompt },
-    config: {
-      metadata: { userId: req.user.id, name },
-    },
-  }, {
-    modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-    sandbox: new LocalSandbox({ workDir: `/tmp/agents/${agentId}` }),
-  });
-
-  res.json({ agentId, name });
-});
-
-// List user's agents
-app.get('/api/agents', async (req, res) => {
-  const agents = await store.listAgentsByUser(req.user.id);
-  res.json(agents);
-});
-
-// Chat with agent
-app.post('/api/agents/:agentId/chat', async (req, res) => {
-  const { agentId } = req.params;
-  const { message } = req.body;
-
-  // Verify ownership
-  const info = await store.loadInfo(agentId);
-  if (info?.metadata?.userId !== req.user.id) {
-    return res.status(403).json({ error: 'Forbidden' });
-  }
-
-  // Get or resume agent
-  let agent = pool.get(agentId);
-  if (!agent) {
-    agent = await pool.resume(agentId, {
-      template: { systemPrompt: info.systemPrompt },
-    }, {
-      modelProvider: new AnthropicProvider(process.env.ANTHROPIC_API_KEY!),
-      sandbox: new LocalSandbox({ workDir: `/tmp/agents/${agentId}` }),
-    });
-  }
-
-  // Stream response via SSE
-  res.setHeader('Content-Type', 'text/event-stream');
-  res.setHeader('Cache-Control', 'no-cache');
-  res.setHeader('Connection', 'keep-alive');
-
-  agent.subscribeProgress({ kinds: ['text_chunk', 'tool:start', 'tool:complete', 'done'] }, (event) => {
-    res.write(`data: ${JSON.stringify(event)}\n\n`);
-
-    if (event.kind === 'done') {
-      res.end();
-    }
-  });
-
-  await agent.chat(message);
-});
-
-// Graceful shutdown
-process.on('SIGTERM', async () => {
-  console.log('Shutting down...');
-  for (const [id, agent] of pool.agents) {
-    await agent.persistInfo();
-  }
-  process.exit(0);
-});
-
-app.listen(3000, () => console.log('Server running on :3000'));
-```
-
----
-
-## Hono (Edge-Compatible)
-
-```typescript
-// server.ts
-import { Hono } from 'hono';
-import { serve } from '@hono/node-server';
-import { streamSSE } from 'hono/streaming';
-
-const app = new Hono();
-
-app.post('/api/agents/:agentId/chat', async (c) => {
-  const agentId = c.req.param('agentId');
-  const { message } = await c.req.json();
-
-  const agent = await getOrResumeAgent(agentId);
-
-  return streamSSE(c, async (stream) => {
-    agent.subscribeProgress({ kinds: ['text_chunk', 'done'] }, async (event) => {
-      await stream.writeSSE({ data: JSON.stringify(event) });
-
-      if (event.kind === 'done') {
-        await stream.close();
-      }
-    });
-
-    await agent.chat(message);
-  });
-});
-
-serve(app, { port: 3000 });
-```
-
----
-
-## User Isolation
-
-### Per-User Agent Namespace
-
-```typescript
-// Prefix all agent IDs with user ID
-function getAgentId(userId: string, localId: string): string {
-  return `user:${userId}:agent:${localId}`;
-}
-
-// List only user's agents
-async function listUserAgents(userId: string): Promise<AgentInfo[]> {
-  const allAgents = await store.listAgents();
-  return allAgents.filter(a => a.metadata?.userId === userId);
-}
-```
-
-### Per-User Sandbox Isolation
-
-```typescript
-// Each user gets isolated workspace
-function getUserSandbox(userId: string, agentId: string): LocalSandbox {
-  const workDir = path.join('/data/workspaces', userId, agentId);
-
-  return new LocalSandbox({
-    workDir,
-    allowedPaths: [workDir],  // Restrict to user's directory only
-    env: {
-      USER_ID: userId,
-      AGENT_ID: agentId,
-    },
-  });
-}
-```
-
----
-
-## Redis Store Implementation
-
-```typescript
-// redis-store.ts
-import Redis from 'ioredis';
-import { Store, Message, AgentInfo, ToolCallRecord } from '@anthropic/kode-sdk';
-
-export class RedisStore implements Store {
-  private redis: Redis;
-
-  constructor(url: string) {
-    this.redis = new Redis(url);
-  }
-
-  async saveMessages(agentId: string, messages: Message[]): Promise<void> {
-    await this.redis.set(`agent:${agentId}:messages`, JSON.stringify(messages));
-  }
-
-  async loadMessages(agentId: string): Promise<Message[]> {
-    const data = await this.redis.get(`agent:${agentId}:messages`);
-    return data ? JSON.parse(data) : [];
-  }
-
-  async saveInfo(agentId: string, info: AgentInfo): Promise<void> {
-    await this.redis.set(`agent:${agentId}:info`, JSON.stringify(info));
-    // Add to user's agent list
-    if (info.metadata?.userId) {
-      await this.redis.sadd(`user:${info.metadata.userId}:agents`, agentId);
-    }
-  }
-
-  async loadInfo(agentId: string): Promise<AgentInfo | undefined> {
-    const data = await this.redis.get(`agent:${agentId}:info`);
-    return data ? JSON.parse(data) : undefined;
-  }
-
-  async listAgentsByUser(userId: string): Promise<AgentInfo[]> {
-    const agentIds = await this.redis.smembers(`user:${userId}:agents`);
-    const infos = await Promise.all(agentIds.map(id => this.loadInfo(id)));
-    return infos.filter(Boolean) as AgentInfo[];
-  }
-
-  async deleteAgent(agentId: string): Promise<void> {
-    const info = await this.loadInfo(agentId);
-    if (info?.metadata?.userId) {
-      await this.redis.srem(`user:${info.metadata.userId}:agents`, agentId);
-    }
-    await this.redis.del(
-      `agent:${agentId}:messages`,
-      `agent:${agentId}:info`,
-      `agent:${agentId}:tools`,
-    );
-  }
-}
-```
-
----
-
-## Rate Limiting
-
-```typescript
-import rateLimit from 'express-rate-limit';
-
-// Global rate limit
-app.use(rateLimit({
-  windowMs: 60 * 1000,  // 1 minute
-  max: 60,  // 60 requests per minute
-}));
-
-// Per-user rate limit for chat
-const chatLimiter = rateLimit({
-  windowMs: 60 * 1000,
-  max: 20,  // 20 chat requests per minute per user
-  keyGenerator: (req) => req.user?.id || req.ip,
-});
-
-app.post('/api/agents/:agentId/chat', chatLimiter, async (req, res) => {
-  // ...
-});
-
-// Token-based rate limiting
-const tokenTracker = new Map<string, number>();
-
-agent.subscribeMonitor((event) => {
-  if (event.kind === 'token_usage') {
-    const userId = agent.metadata?.userId;
-    const current = tokenTracker.get(userId) || 0;
-    tokenTracker.set(userId, current + event.totalTokens);
-
-    if (current + event.totalTokens > DAILY_TOKEN_LIMIT) {
-      agent.stop();
-      throw new Error('Daily token limit exceeded');
-    }
-  }
-});
-```
-
----
-
-## Health Checks
-
-```typescript
-// Health check endpoint
-app.get('/health', async (req, res) => {
-  const checks = {
-    redis: await checkRedis(),
-    agents: pool.agents.size,
-    memory: process.memoryUsage(),
-  };
-
-  const healthy = checks.redis;
-  res.status(healthy ? 200 : 503).json(checks);
-});
-
-async function checkRedis(): Promise<boolean> {
-  try {
-    await redis.ping();
-    return true;
-  } catch {
-    return false;
-  }
-}
-
-// Kubernetes readiness probe
-app.get('/ready', (req, res) => {
-  res.sendStatus(200);
-});
-
-// Kubernetes liveness probe
-app.get('/live', (req, res) => {
-  res.sendStatus(200);
-});
-```
-
----
-
-## Docker Deployment
-
-```dockerfile
-# Dockerfile
-FROM node:20-alpine
-
-WORKDIR /app
-
-COPY package*.json ./
-RUN npm ci --production
-
-COPY dist ./dist
-
-ENV NODE_ENV=production
-
-EXPOSE 3000
-
-CMD ["node", "dist/server.js"]
-```
-
-```yaml
-# docker-compose.yml
-version: '3.8'
-
-services:
-  api:
-    build: .
-    ports:
-      - "3000:3000"
-    environment:
-      - REDIS_URL=redis://redis:6379
-      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
-    depends_on:
-      - redis
-    deploy:
-      replicas: 2
-      restart_policy:
-        condition: on-failure
-
-  redis:
-    image: redis:7-alpine
-    volumes:
-      - redis-data:/data
-
-volumes:
-  redis-data:
-```
-
----
-
-## Scaling to Multiple Instances
-
-When running multiple server instances:
-
-### 1. Use Redis for Session Affinity
-
-```typescript
-// Store which server handles which agent
-await redis.set(`agent:${agentId}:server`, SERVER_ID, 'EX', 3600);
-
-// Check before resuming
-const currentServer = await redis.get(`agent:${agentId}:server`);
-if (currentServer && currentServer !== SERVER_ID) {
-  // Agent is on another server, redirect or wait
-}
-```
-
-### 2. Distributed Locking
-
-```typescript
-import Redlock from 'redlock';
-
-const redlock = new Redlock([redis]);
-
-app.post('/api/agents/:agentId/chat', async (req, res) => {
-  const lock = await redlock.acquire([`lock:agent:${agentId}`], 30000);
-
-  try {
-    // Only one server can process this agent at a time
-    const agent = await getOrResumeAgent(agentId);
-    await agent.chat(message);
-  } finally {
-    await lock.release();
-  }
-});
-```
-
----
-
-## Migration Path to Large Scale
-
-When you outgrow single-server deployment:
-
-1. **Add message queue** - Decouple API from processing
-2. **Separate workers** - Run agents in dedicated processes
-3. **Use PostgreSQL** - Replace Redis for primary storage
-4. **Add agent scheduler** - Manage agent lifecycle
-
-See [Large-Scale ToC Guide](./large-scale-toc.md) for the full architecture.
diff --git a/docs/simplified-tools.md b/docs/simplified-tools.md
deleted file mode 100644
index faddd72..0000000
--- a/docs/simplified-tools.md
+++ /dev/null
@@ -1,613 +0,0 @@
-# 简化的工具定义 API
-
-## 概述
-
-Kode SDK v2.7 引入了全新的工具定义 API，大幅简化了开发体验：
-
-- ✅ **自动 Schema 生成**：从类型定义自动生成 JSON Schema，无需手动编写
-- ✅ **简化的属性标记**：用 `readonly`/`noEffect` 替代复杂的 `access`/`mutates`/`safe`
-- ✅ **自定义事件支持**：工具内可发射自定义事件到 monitor 通道
-- ✅ **向后兼容**：完全兼容现有的 `ToolInstance` 接口
-
-## 快速开始
-
-### 旧方式（v2.6 及之前）
-
-```typescript
-import { ToolInstance } from '@kode/sdk';
-
-const weatherTool: ToolInstance = {
-  name: 'get_weather',
-  description: 'Get weather information',
-
-  // ❌ 手动编写 JSON Schema - 繁琐且易错
-  input_schema: {
-    type: 'object',
-    properties: {
-      city: { type: 'string', description: 'City name' },
-      units: {
-        type: 'string',
-        description: 'Temperature units',
-        enum: ['celsius', 'fahrenheit']
-      }
-    },
-    required: ['city']
-  },
-
-  async exec(args, ctx) {
-    return { temperature: 22, condition: 'sunny' };
-  },
-
-  toDescriptor() {
-    return {
-      source: 'registered',
-      name: 'get_weather',
-      registryId: 'get_weather',
-      metadata: {
-        access: 'read',    // ❌ 复杂的三字段系统
-        mutates: false,
-        safe: true
-      }
-    };
-  }
-};
-```
-
-### 新方式（v2.7+）
-
-```typescript
-import { defineTool } from '@kode/sdk';
-
-const weatherTool = defineTool({
-  name: 'get_weather',
-  description: 'Get weather information',
-
-  // ✅ 简洁的参数定义 - 自动生成 Schema
-  params: {
-    city: {
-      type: 'string',
-      description: 'City name'
-    },
-    units: {
-      type: 'string',
-      description: 'Temperature units',
-      enum: ['celsius', 'fahrenheit'],
-      required: false,
-      default: 'celsius'
-    }
-  },
-
-  // ✅ 简化的属性标记
-  attributes: {
-    readonly: true,   // 只读工具
-    noEffect: true    // 无副作用，可安全重试
-  },
-
-  async exec(args, ctx) {
-    // ✅ 自定义事件
-    ctx.emit('weather_fetched', { city: args.city });
-
-    return { temperature: 22, condition: 'sunny' };
-  }
-});
-```
-
-## 核心 API
-
-### `defineTool()` - 定义单个工具（推荐）
-
-```typescript
-import { defineTool, EnhancedToolContext } from '@kode/sdk';
-
-const myTool = defineTool({
-  // 基本信息
-  name: 'tool_name',
-  description: 'Tool description',
-
-  // 参数定义（自动生成 schema）
-  params: {
-    param1: { type: 'string', description: '...' },
-    param2: { type: 'number', required: false, default: 10 }
-  },
-
-  // 工具属性
-  attributes: {
-    readonly: true,   // 可选，默认 false
-    noEffect: true    // 可选，默认 false
-  },
-
-  // Prompt 说明书（可选）
-  prompt: 'Usage instructions for the model...',
-
-  // 执行函数
-  async exec(args, ctx: EnhancedToolContext) {
-    // 发射自定义事件
-    ctx.emit('custom_event', { data: 'value' });
-
-    return result;
-  }
-});
-```
-
-### `defineTools()` - 批量定义
-
-```typescript
-import { defineTools } from '@kode/sdk';
-
-const calculatorTools = defineTools([
-  {
-    name: 'add',
-    description: 'Add two numbers',
-    params: {
-      a: { type: 'number' },
-      b: { type: 'number' }
-    },
-    attributes: { readonly: true, noEffect: true },
-    async exec(args, ctx) {
-      return args.a + args.b;
-    }
-  },
-  {
-    name: 'multiply',
-    description: 'Multiply two numbers',
-    params: {
-      a: { type: 'number' },
-      b: { type: 'number' }
-    },
-    attributes: { readonly: true, noEffect: true },
-    async exec(args, ctx) {
-      return args.a * args.b;
-    }
-  }
-]);
-```
-
-### `@tool` 装饰器（实验性）
-
-需要在 `tsconfig.json` 启用 `experimentalDecorators`：
-
-```json
-{
-  "compilerOptions": {
-    "experimentalDecorators": true
-  }
-}
-```
-
-```typescript
-import { tool, extractTools } from '@kode/sdk';
-
-class MyToolset {
-  @tool({
-    description: 'Query database',
-    params: {
-      sql: { type: 'string' },
-      limit: { type: 'number', required: false, default: 100 }
-    },
-    attributes: { readonly: true }
-  })
-  async query(args: { sql: string; limit?: number }, ctx) {
-    return await db.query(args.sql, { limit: args.limit });
-  }
-
-  @tool({
-    description: 'Insert data',
-    params: {
-      table: { type: 'string' },
-      data: { type: 'object' }
-    }
-  })
-  async insert(args, ctx) {
-    return await db.insert(args.table, args.data);
-  }
-}
-
-// 提取工具
-const tools = extractTools(new MyToolset());
-```
-
-## 参数定义详解
-
-### 基础类型
-
-```typescript
-params: {
-  str: { type: 'string', description: 'A string' },
-  num: { type: 'number', description: 'A number' },
-  bool: { type: 'boolean', description: 'A boolean' },
-
-  // 可选参数
-  optional: { type: 'string', required: false },
-
-  // 默认值
-  withDefault: { type: 'number', default: 42 },
-
-  // 枚举
-  choice: {
-    type: 'string',
-    enum: ['option1', 'option2', 'option3']
-  }
-}
-```
-
-### 复杂类型
-
-```typescript
-params: {
-  // 数组
-  tags: {
-    type: 'array',
-    description: 'List of tags',
-    items: { type: 'string' }
-  },
-
-  // 嵌套对象
-  profile: {
-    type: 'object',
-    description: 'User profile',
-    properties: {
-      email: { type: 'string' },
-      age: { type: 'number', required: false },
-      roles: {
-        type: 'array',
-        items: { type: 'string' }
-      }
-    }
-  }
-}
-```
-
-### 兼容老方式
-
-如果需要更精细的 Schema 控制，仍可直接提供 `input_schema`：
-
-```typescript
-defineTool({
-  name: 'advanced_tool',
-  description: 'Advanced tool',
-
-  // 直接提供 JSON Schema
-  input_schema: {
-    type: 'object',
-    properties: {
-      data: {
-        type: 'string',
-        pattern: '^[A-Z]{3}$',
-        minLength: 3,
-        maxLength: 3
-      }
-    },
-    required: ['data']
-  },
-
-  async exec(args, ctx) {
-    // ...
-  }
-});
-```
-
-## 工具属性
-
-### `readonly` - 只读工具
-
-表示工具不修改任何状态（文件、数据库、外部 API）：
-
-```typescript
-attributes: {
-  readonly: true
-}
-
-// 等价于旧方式的：
-metadata: {
-  access: 'read',
-  mutates: false
-}
-```
-
-**用途**：
-- `readonly` 权限模式会自动放行只读工具
-- 适用于查询、读取、计算等操作
-
-### `noEffect` - 无副作用
-
-表示工具可以安全重试，多次执行结果相同：
-
-```typescript
-attributes: {
-  noEffect: true
-}
-
-// 等价于旧方式的：
-metadata: {
-  safe: true
-}
-```
-
-**用途**：
-- Resume 时可安全重新执行
-- 适用于幂等操作（GET 请求、纯计算等）
-
-### 默认行为
-
-不设置 `attributes` 时，工具被视为：
-- 非只读（可能写入）
-- 有副作用（不可重试）
-
-```typescript
-// 写入工具 - 无需设置 attributes
-defineTool({
-  name: 'create_file',
-  description: 'Create a file',
-  params: { path: { type: 'string' } },
-  async exec(args, ctx) {
-    await ctx.sandbox.fs.write(args.path, 'content');
-    return { ok: true };
-  }
-});
-```
-
-## 自定义事件
-
-### 基本用法
-
-```typescript
-defineTool({
-  name: 'process_data',
-  description: 'Process data',
-  params: { input: { type: 'string' } },
-
-  async exec(args, ctx: EnhancedToolContext) {
-    // 发射处理开始事件
-    ctx.emit('processing_started', { input: args.input });
-
-    const result = await heavyComputation(args.input);
-
-    // 发射处理完成事件
-    ctx.emit('processing_completed', {
-      result,
-      duration: 1234
-    });
-
-    return result;
-  }
-});
-```
-
-### 监听自定义事件
-
-```typescript
-agent.on('tool_custom_event', (event) => {
-  console.log(`[${event.toolName}] ${event.eventType}:`, event.data);
-
-  // 示例输出：
-  // [process_data] processing_started: { input: 'hello' }
-  // [process_data] processing_completed: { result: {...}, duration: 1234 }
-});
-```
-
-### 事件结构
-
-自定义事件会自动包装为 `MonitorToolCustomEvent`：
-
-```typescript
-interface MonitorToolCustomEvent {
-  channel: 'monitor';
-  type: 'tool_custom_event';
-  toolName: string;        // 工具名称
-  eventType: string;       // 自定义事件类型
-  data?: any;              // 事件数据
-  timestamp: number;       // 时间戳
-  bookmark?: Bookmark;
-}
-```
-
-### 实际应用场景
-
-```typescript
-// 示例：带进度报告的长时间工具
-defineTool({
-  name: 'batch_process',
-  description: 'Process items in batch',
-  params: {
-    items: { type: 'array', items: { type: 'string' } }
-  },
-
-  async exec(args, ctx: EnhancedToolContext) {
-    const total = args.items.length;
-    const results = [];
-
-    for (let i = 0; i < args.items.length; i++) {
-      const item = args.items[i];
-
-      // 报告进度
-      ctx.emit('batch_progress', {
-        current: i + 1,
-        total,
-        percentage: Math.round(((i + 1) / total) * 100)
-      });
-
-      const result = await processItem(item);
-      results.push(result);
-
-      // 检查超时信号
-      if (ctx.signal?.aborted) {
-        throw new Error('Operation aborted');
-      }
-    }
-
-    ctx.emit('batch_completed', { count: results.length });
-
-    return results;
-  }
-});
-```
-
-## 完整示例
-
-```typescript
-import { Agent, defineTool, defineTools } from '@kode/sdk';
-
-// 定义工具
-const weatherTool = defineTool({
-  name: 'get_weather',
-  description: 'Get weather for a city',
-  params: {
-    city: { type: 'string' },
-    units: { type: 'string', enum: ['C', 'F'], required: false, default: 'C' }
-  },
-  attributes: { readonly: true, noEffect: true },
-  prompt: 'Use this to fetch weather. Always specify city name.',
-
-  async exec(args, ctx) {
-    ctx.emit('weather_request', { city: args.city });
-    const data = await fetch(`/api/weather?city=${args.city}`);
-    ctx.emit('weather_response', { city: args.city, temp: data.temp });
-    return data;
-  }
-});
-
-const calculatorTools = defineTools([
-  {
-    name: 'add',
-    description: 'Add numbers',
-    params: {
-      a: { type: 'number' },
-      b: { type: 'number' }
-    },
-    attributes: { readonly: true, noEffect: true },
-    async exec(args, ctx) {
-      return args.a + args.b;
-    }
-  }
-]);
-
-// 创建 Agent
-const agent = await Agent.create({
-  agentId: 'my-agent',
-  templateId: 'default',
-  provider: { apiKey: process.env.ANTHROPIC_API_KEY! },
-  tools: [weatherTool, ...calculatorTools]
-});
-
-// 监听自定义事件
-agent.on('tool_custom_event', (event) => {
-  if (event.eventType === 'weather_request') {
-    console.log(`Fetching weather for ${event.data.city}...`);
-  }
-  if (event.eventType === 'weather_response') {
-    console.log(`Weather: ${event.data.temp}°`);
-  }
-});
-
-// 使用
-await agent.chat('What is the weather in Tokyo?');
-```
-
-## 迁移指南
-
-### 从旧 API 迁移
-
-#### 1. 转换基本工具
-
-**旧方式：**
-```typescript
-const tool: ToolInstance = {
-  name: 'my_tool',
-  description: 'Does something',
-  input_schema: {
-    type: 'object',
-    properties: {
-      input: { type: 'string' }
-    },
-    required: ['input']
-  },
-  async exec(args, ctx) { return args.input; },
-  toDescriptor() { /* ... */ }
-};
-```
-
-**新方式：**
-```typescript
-const tool = defineTool({
-  name: 'my_tool',
-  description: 'Does something',
-  params: {
-    input: { type: 'string' }
-  },
-  async exec(args, ctx) { return args.input; }
-});
-```
-
-#### 2. 转换 metadata
-
-| 旧方式 | 新方式 |
-|--------|--------|
-| `{ access: 'read', mutates: false }` | `{ readonly: true }` |
-| `{ access: 'write', mutates: true }` | （默认，无需设置） |
-| `{ safe: true }` | `{ noEffect: true }` |
-
-#### 3. 添加自定义事件
-
-```typescript
-// 旧方式 - 无法发射事件
-async exec(args, ctx: ToolContext) {
-  // 只能返回结果
-  return result;
-}
-
-// 新方式 - 可以发射事件
-async exec(args, ctx: EnhancedToolContext) {
-  ctx.emit('event_name', { data: 'value' });
-  return result;
-}
-```
-
-## 常见问题
-
-### Q: 必须使用新 API 吗？
-
-A: 不，旧的 `ToolInstance` 接口完全兼容。新 API 是可选的增强功能。
-
-### Q: `readonly` 和 `noEffect` 有什么区别？
-
-A:
-- `readonly`: 工具不修改任何状态（文件、数据库等）
-- `noEffect`: 工具可以安全重试，多次执行结果相同
-
-一个只读工具通常也是无副作用的，但反之不一定成立。
-
-### Q: 自定义事件会被持久化吗？
-
-A: 是的，自定义事件作为 `MonitorToolCustomEvent` 被完整持久化到 WAL，Resume 时可恢复。
-
-### Q: 装饰器方式稳定吗？
-
-A: 装饰器方式是实验性功能，需要 `experimentalDecorators`。推荐使用 `defineTool()` 函数式 API。
-
-### Q: 如何混用新旧 API？
-
-A: 可以自由混用，Agent 接受任何 `ToolInstance`：
-
-```typescript
-const agent = await Agent.create({
-  tools: [
-    oldStyleTool,           // 旧方式
-    defineTool({ ... }),    // 新方式
-    new FsRead(),           // 内置工具
-  ]
-});
-```
-
-## 最佳实践
-
-1. **优先使用 `defineTool()`**：最简洁、类型安全
-2. **合理设置 `attributes`**：帮助权限系统正确判断
-3. **善用自定义事件**：提供工具执行的可观测性
-4. **复杂 Schema 仍用 `input_schema`**：需要 `pattern`、`minLength` 等高级约束时
-5. **批量定义用 `defineTools()`**：保持代码整洁
-
-## 参考
-
-- 示例代码：`examples/tooling/simplified-tools.ts`
-- 类型定义：`src/tools/define.ts`
-- 事件系统：`docs/events.md`
diff --git a/docs/skills.md b/docs/skills.md
deleted file mode 100644
index dae73b3..0000000
--- a/docs/skills.md
+++ /dev/null
@@ -1,457 +0,0 @@
-# Skills 系统
-
-KODE SDK 提供了一个完整的Skills系统，支持模块化、可重用的能力单元，使Agent能够动态加载和执行特定技能。
-
-## 核心特性
-
-- **热重载 (Hot Reload)**：Skills代码修改后自动重新加载，无需重启Agent
-- **元数据注入**：自动将技能描述注入到系统提示，提升AI理解
-- **沙箱隔离**：每个技能有独立的文件系统空间
-- **操作队列**：确保技能更新的原子性
-- **白名单机制**：支持选择性加载特定技能
-- **中文友好**：支持中文名称和描述
-
-## Skills 目录结构
-
-```
-skills/
-├── skill-name/              # 技能目录
-│   ├── SKILL.md            # 技能定义（必需）
-│   ├── metadata.json       # 技能元数据（可选）
-│   ├── references/         # 参考资料
-│   ├── scripts/            # 可执行脚本
-│   └── assets/             # 静态资源
-└── .archived/              # 已归档技能
-    └── archived-skill/
-```
-
-### SKILL.md 格式
-
-```markdown
-<!-- skill: skill-name -->
-<!-- version: 1.0.0 -->
-<!-- author: Your Name -->
-
-# 技能名称
-
-简短描述技能的功能。
-
-## 使用场景
-
-- 场景1
-- 场景2
-
-## 使用指南
-
-使用此技能的详细说明...
-```
-
-### metadata.json 格式
-
-```json
-{
-  "name": "skill-name",
-  "description": "技能描述",
-  "version": "1.0.0",
-  "author": "作者",
-  "baseDir": "/path/to/skill"
-}
-```
-
-## Agent运行时使用 (SkillsManager)
-
-SkillsManager是Agent在运行时使用的技能管理器，支持热更新和动态加载。
-
-### 基本用法
-
-```typescript
-import { SkillsManager } from '@kode/sdk';
-
-// 创建Skills管理器
-const skillsManager = new SkillsManager(
-  './skills',  // 技能目录路径
-  ['skill1', 'skill2']  // 可选：白名单
-);
-
-// 扫描所有技能
-const skills = await skillsManager.getSkillsMetadata();
-console.log(`Found ${skills.length} skills`);
-
-// 加载特定技能内容
-const skillContent = await skillsManager.loadSkillContent('skill-name');
-if (skillContent) {
-  console.log('Metadata:', skillContent.metadata);
-  console.log('Content:', skillContent.content);
-  console.log('References:', skillContent.references);
-  console.log('Scripts:', skillContent.scripts);
-}
-```
-
-### 热更新机制
-
-SkillsManager每次调用都会重新扫描文件系统，确保数据最新：
-
-```typescript
-// 每次调用都会重新扫描
-await skillsManager.getSkillsMetadata();  // 扫描1
-// ... 修改文件 ...
-await skillsManager.getSkillsMetadata();  // 扫描2，获取最新数据
-```
-
-### 白名单过滤
-
-通过白名单机制，可以限制Agent只加载特定技能：
-
-```typescript
-// 只加载白名单中的技能
-const manager = new SkillsManager('./skills', ['allowed-skill-1', 'allowed-skill-2']);
-
-const skills = await manager.getSkillsMetadata();
-// 只返回白名单中的技能
-```
-
-### 环境变量配置
-
-可以通过环境变量配置技能目录：
-
-```bash
-export SKILLS_DIR=/path/to/skills
-```
-
-```typescript
-// 自动使用 SKILLS_DIR 环境变量
-const manager = new SkillsManager();
-```
-
-## 技能管理 (SkillsManagementManager)
-
-SkillsManagementManager提供技能的CRUD操作，包括创建、更新、归档等。
-
-### 基本操作
-
-```typescript
-import { SkillsManagementManager } from '@kode/sdk';
-
-const manager = new SkillsManagementManager('./skills');
-
-// 列出所有在线技能
-const skills = await manager.listSkills();
-
-// 获取技能详细信息
-const skillDetail = await manager.getSkillInfo('skill-name');
-
-// 创建新技能
-await manager.createSkill('new-skill', {
-  description: '新技能描述',
-  content: '# 新技能\n\n详细内容...'
-});
-
-// 更新技能
-await manager.updateSkill('skill-name', {
-  content: '# 更新后的内容'
-});
-
-// 删除技能（移动到归档）
-await manager.deleteSkill('skill-name');
-
-// 列出已归档技能
-const archived = await manager.listArchivedSkills();
-
-// 恢复已归档技能
-await manager.restoreSkill('archived-skill');
-```
-
-### 文件操作
-
-```typescript
-// 获取技能文件树
-const files = await manager.getSkillFileTree('skill-name');
-
-// 读取技能文件
-const content = await manager.readSkillFile('skill-name', 'SKILL.md');
-
-// 写入技能文件
-await manager.writeSkillFile('skill-name', 'references/doc.md', '内容');
-
-// 删除技能文件
-await manager.deleteSkillFile('skill-name', 'references/old-doc.md');
-
-// 上传文件到技能目录
-await manager.uploadSkillFile('skill-name', 'assets/image.png', fileBuffer);
-```
-
-## Agent集成
-
-### 注册Skills工具
-
-```typescript
-import { Agent, builtin } from '@kode/sdk';
-import { createSkillsTool } from '@kode/sdk';
-import { SkillsManager } from '@kode/sdk';
-
-const deps = createDependencies();
-
-// 创建Skills管理器
-const skillsManager = new SkillsManager('./skills');
-
-// 注册Skills工具
-const skillsTool = createSkillsTool(skillsManager);
-deps.toolRegistry.register('skills', () => skillsTool);
-
-// 创建Agent
-const agent = await Agent.create({
-  templateId: 'my-agent',
-  tools: ['skills', 'fs_read', 'fs_write'],
-}, deps);
-```
-
-### 元数据自动注入
-
-Agent会自动收集所有工具的prompt并注入到系统提示中：
-
-```typescript
-// Agent内部自动执行
-const toolPrompts = this.tools
-  .map(tool => tool.descriptor.prompt)
-  .filter(Boolean);
-
-const manual = `\n\n### Tools Manual\n\n${toolPrompts.join('\n\n')}`;
-this.template.systemPrompt += manual;
-
-// 触发 Monitor 事件
-this.events.emitMonitor({
-  channel: 'monitor',
-  type: 'tool_manual_updated',
-  tools: this.tools.map(t => t.descriptor.name),
-  timestamp: Date.now()
-});
-```
-
-### Skills工具使用
-
-Agent可以通过`skills`工具动态加载技能：
-
-```
-用户: 我需要处理代码格式化
-
-Agent: 我来加载代码格式化技能。
-
-[调用 skills 工具，action=load, skill_name=code-formatter]
-
-Agent: 已加载代码格式化技能。现在我可以帮你格式化代码了。
-```
-
-## 沙箱文件隔离
-
-每个技能的文件操作都在独立的沙箱环境中进行：
-
-```typescript
-// SandboxFileManager 确保技能文件隔离
-const sandboxFileManager = new SandboxFileManager(sandboxFactory);
-
-// 技能文件读写都在沙箱中
-await sandboxFileManager.readFile(skillPath, 'SKILL.md');
-await sandboxFileManager.writeFile(skillPath, 'references/doc.md', content);
-```
-
-## 操作队列
-
-SkillsManagementManager使用操作队列确保更新的原子性：
-
-```typescript
-// OperationQueue 确保操作顺序
-await operationQueue.enqueue({
-  type: OperationType.Update,
-  skillName,
-  data: updateData,
-});
-
-// 同一技能的更新会排队执行
-await operationQueue.enqueue({
-  type: OperationType.Update,
-  skillName,
-  data: anotherUpdateData,  // 等待上一个更新完成
-});
-```
-
-## 最佳实践
-
-### 1. 技能设计原则
-
-- **单一职责**：每个技能只做一件事
-- **可组合**：技能之间可以互相调用
-- **文档完整**：提供清晰的使用说明
-- **版本控制**：使用语义化版本号
-
-### 2. 热更新利用
-
-```typescript
-// 定期刷新技能列表
-setInterval(async () => {
-  const skills = await skillsManager.getSkillsMetadata();
-  console.log('Skills updated:', skills.length);
-}, 60000);  // 每分钟刷新
-```
-
-### 3. 白名单管理
-
-```typescript
-// 生产环境使用白名单
-const allowedSkills = ['safe-skill-1', 'safe-skill-2'];
-const manager = new SkillsManager('./skills', allowedSkills);
-
-// 开发环境加载所有技能
-const devManager = new SkillsManager('./skills');
-```
-
-### 4. 错误处理
-
-```typescript
-// 处理技能加载失败
-const content = await skillsManager.loadSkillContent('skill-name');
-if (!content) {
-  console.error('Skill not found or failed to load');
-  // 降级处理
-}
-```
-
-## 高级特性
-
-### 1. 技能归档
-
-不再使用的技能可以归档而不是删除：
-
-```typescript
-// 归档技能
-await manager.deleteSkill('old-skill');  // 移动到 .archived/
-
-// 查看已归档技能
-const archived = await manager.listArchivedSkills();
-
-// 恢复技能
-await manager.restoreSkill('old-skill');  // 从 .archived/ 移回
-```
-
-### 2. 技能依赖
-
-技能可以引用其他技能的资源：
-
-```markdown
-# Main Skill
-
-参见 [参考文档](references/shared-knowledge.md) 了解更多。
-
-使用脚本：
-- `scripts/setup.sh` - 环境配置
-- `scripts/deploy.sh` - 部署脚本
-```
-
-### 3. 动态技能加载
-
-Agent可以根据需要动态加载技能：
-
-```
-用户: 我需要分析日志
-
-Agent: [检测到需要日志分析技能]
-[调用 skills 工具加载 log-analyzer]
-[使用日志分析技能处理任务]
-```
-
-## 监控与调试
-
-### Monitor事件
-
-```typescript
-// 监听技能工具调用
-agent.on('tool_executed', (event) => {
-  if (event.call.name === 'skills') {
-    console.log('Skill loaded:', event.call.input.skill_name);
-  }
-});
-
-// 监听工具说明书更新
-agent.on('tool_manual_updated', (event) => {
-  console.log('Tools manual updated:', event.tools);
-});
-```
-
-### 日志输出
-
-Skills系统会输出详细的日志信息：
-
-```
-[SkillsManager] Initialized with skills directory: ./skills
-[SkillsManager] Scanned 5 skill(s)
-[SkillsManagementManager] Created skill: new-skill
-[SandboxFileManager] Reading file: skills/new-skill/SKILL.md
-```
-
-## 性能优化
-
-### 1. 缓存策略
-
-```typescript
-// 首次扫描会缓存元数据
-await skillsManager.getSkillsMetadata();  // 扫描文件系统
-
-// 后续调用使用缓存（如果文件未变化）
-await skillsManager.getSkillsMetadata();  // 快速返回
-```
-
-### 2. 按需加载
-
-```typescript
-// 只加载需要的技能
-const content = await skillsManager.loadSkillContent('specific-skill');
-// 而不是加载所有技能
-```
-
-### 3. 并发扫描
-
-```typescript
-// 并发扫描多个技能目录
-const [skills1, skills2] = await Promise.all([
-  manager1.getSkillsMetadata(),
-  manager2.getSkillsMetadata(),
-]);
-```
-
-## 故障排除
-
-### 常见问题
-
-1. **技能未找到**
-   - 检查技能目录路径是否正确
-   - 确认SKILL.md文件存在
-   - 检查白名单配置
-
-2. **热更新不生效**
-   - 确认文件保存成功
-   - 检查文件系统权限
-   - 查看日志确认扫描时间
-
-3. **沙箱权限错误**
-   - 检查沙箱工作目录配置
-   - 确认文件路径在允许范围内
-   - 查看沙箱日志
-
-### 调试技巧
-
-```typescript
-// 启用详细日志
-process.env.LOG_LEVEL = 'debug';
-
-// 检查技能元数据
-console.log(JSON.stringify(skills, null, 2));
-
-// 验证技能目录
-const fs = require('fs');
-console.log(fs.readdirSync('./skills'));
-```
-
-## 相关文档
-
-- [`tools.md`](./tools.md)：工具系统详解
-- [`api.md`](./api.md)：API参考
-- [`events.md`](./events.md)：事件系统
diff --git a/docs/tools.md b/docs/tools.md
deleted file mode 100644
index a7a913b..0000000
--- a/docs/tools.md
+++ /dev/null
@@ -1,290 +0,0 @@
-# 工具体系与治理
-
-Kode SDK 提供一组内置工具，并允许通过 ToolRegistry 注册自定义/MCP 工具。所有工具在设计上遵循以下规范：
-
-- **Prompt 说明书**：每个工具都提供详细 Prompt，引导模型安全使用。
-- **结构化返回**：工具返回 JSON 结构（例如 `fs_read` 返回 `content/offset/limit/truncated`）。
-- **FilePool 集成**：文件类工具自动调用 FilePool 校验与记录，防止新鲜度冲突。
-- **工具状态审计**：ToolCallRecord 记录审批、耗时、错误信息，Resume 时完整恢复。
-
-> **🆕 v2.7 新增**：简化的工具定义 API，自动生成 Schema、简化 metadata、支持自定义事件。
-> 详见 [simplified-tools.md](./simplified-tools.md) 或 `examples/tooling/simplified-tools.ts`
-
-## 文件系统工具
-
-| 名称 | 说明 | 返回字段 |
-| --- | --- | --- |
-| `fs_read` | 读取文件片段 | `{ path, offset, limit, truncated, content }` |
-| `fs_write` | 创建/覆写文件，写前校验新鲜度 | `{ ok, path, bytes, length }` |
-| `fs_edit` | 精确替换文本（支持 `replace_all`） | `{ ok, path, replacements, length }` |
-| `fs_glob` | 使用 glob 模式匹配文件 | `{ ok, pattern, cwd, matches, truncated }` |
-| `fs_grep` | 在文件/通配符集合中搜索文本/正则 | `{ ok, pattern, path, matches[] }` |
-| `fs_multi_edit` | 批量编辑多个文件 | `{ ok, results[{ path, status, replacements, message? }] }` |
-
-### FilePool 说明
-
-- `recordRead` / `recordEdit`：记录最近读取/写入时间，用于冲突检测。
-- `validateWrite`：写入前校验文件是否在此 Agent 读取后被外部修改。
-- `watchFiles`：自动监听文件变更，触发 `monitor.file_changed` 事件，并通过 `agent.remind` 提醒。
-
-## Bash 工具
-
-- `bash_run`：支持前台/后台执行，可通过 Hook 或 `permission.mode='approval'` 控制敏感命令。
-- `bash_logs`：读取后台命令输出。
-- `bash_kill`：终止后台命令。
-
-### 推荐策略
-
-```typescript
-const agent = await Agent.create({
-  templateId: 'secure-runner',
-  modelConfig: { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022', apiKey: process.env.ANTHROPIC_API_KEY! },
-  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
-  overrides: {
-    hooks: {
-      preToolUse(call) {
-        if (call.name === 'bash_run' && !/^git /.test(call.args.cmd)) {
-          return { decision: 'ask', meta: { reason: '非白名单命令' } };
-        }
-        return undefined;
-      },
-    },
-  },
-}, deps);
-```
-
-## Todo 工具
-
-- `todo_read`：返回 Todo 列表。
-- `todo_write`：写入完整 Todo 列表（校验 ID 唯一、进行中 <=1）。结合 `TodoManager` 自动提醒与事件。
-
-## Task（子代理）
-
-- `task_run`：根据模板池派发子 Agent，支持 `subagent_type`、`context`、`model_name` 参数。
-- 模板可以通过 `runtime.subagents` 限制深度与可选模板。
-
-## Skills 工具
-
-- `skills`：加载特定技能的详细内容（包含指令、references、scripts、assets）。
-  - **参数**：
-    - `action`: 操作类型（目前仅支持 `load`）
-    - `skill_name`: 技能名称（当action=load时必需）
-  - **返回**：
-    ```typescript
-    {
-      ok: true,
-      data: {
-        name: string,           // 技能名称
-        description: string,    // 技能描述
-        content: string,        // SKILL.md 内容
-        base_dir: string,       // 技能基础目录
-        references: string[],   // 参考文档列表
-        scripts: string[],      // 可用脚本列表
-        assets: string[]        // 资源文件列表
-      }
-    }
-    ```
-
-### Skills 系统特性
-
-- **热重载 (Hot Reload)**：Skills代码修改后自动重新加载，无需重启Agent
-- **元数据注入**：自动将技能描述注入到系统提示，提升AI理解
-- **沙箱隔离**：每个技能有独立的文件系统空间
-- **白名单机制**：支持选择性加载特定技能
-- **中文友好**：支持中文名称和描述
-
-### 使用示例
-
-```typescript
-import { createSkillsTool } from '@kode/sdk';
-import { SkillsManager } from '@kode/sdk';
-
-// 创建Skills管理器
-const skillsManager = new SkillsManager('./skills');
-
-// 注册Skills工具
-const skillsTool = createSkillsTool(skillsManager);
-deps.toolRegistry.register('skills', () => skillsTool);
-```
-
-Agent使用示例：
-```
-用户: 我需要代码格式化帮助
-
-Agent: [调用 skills 工具，action=load, skill_name=code-formatter]
-已加载代码格式化技能，现在我可以帮你格式化代码了。
-```
-
-更多详情请参考 [`docs/skills.md`](./skills.md)。
-
-## 工具注册与 resume 支持
-
-```typescript
-const registry = new ToolRegistry();
-
-registry.register('greet', () => ({
-  name: 'greet',
-  description: '向指定对象问好',
-  input_schema: { type: 'object', properties: { name: { type: 'string' } }, required: ['name'] },
-  prompt: 'Use this tool to greet teammates by name.',
-  async exec(args) {
-    return `Hello, ${args.name}!`;
-  },
-  toDescriptor() {
-    return { source: 'registered', name: 'greet', registryId: 'greet' };
-  },
-}));
-```
-
-Resume 会根据 `ToolDescriptor` 自动重建工具；若工具依赖外部资源，请在 `exec` 里自行注入。
-
-## MCP / 自定义驱动
-
-- 可以在 ToolRegistry 注册 MCP loader，将 `registryId` 指向 MCP 服务。
-- 配合 TemplateRegistry 指定哪些模板启用 MCP 工具，Resume 时即可正常恢复。
-
-更多示例可参考 `examples/tooling/fs-playground.ts`、`examples/u5-sub-agent.ts`。
-
-## 工具超时与 AbortSignal 最佳实践
-
-### 超时配置
-
-默认工具执行超时为 **60 秒**，可通过 Agent 配置自定义：
-
-```typescript
-const agent = await Agent.create({
-  // ...
-  metadata: {
-    toolTimeoutMs: 120000, // 2 分钟
-  }
-});
-```
-
-### 必须处理 AbortSignal
-
-所有自定义工具的 `exec()` 方法都会收到 `context.signal`，**必须**在耗时操作中检查：
-
-```typescript
-export class MyLongRunningTool implements ToolInstance {
-  async exec(args: any, context: ToolContext) {
-    // ✅ 正确：在长时间操作前检查
-    if (context.signal?.aborted) {
-      throw new Error('Operation aborted');
-    }
-
-    // ✅ 正确：将 signal 传递给底层 API
-    const response = await fetch(url, { signal: context.signal });
-
-    // ✅ 正确：在循环中定期检查
-    for (const item of items) {
-      if (context.signal?.aborted) {
-        throw new Error('Operation aborted');
-      }
-      await processItem(item);
-    }
-
-    return result;
-  }
-}
-```
-
-**错误示例**（不响应 signal）：
-
-```typescript
-// ❌ 错误：忽略 signal，超时后仍会继续执行
-export class BadTool implements ToolInstance {
-  async exec(args: any, context: ToolContext) {
-    // 长时间操作，完全不检查 signal
-    for (let i = 0; i < 10000; i++) {
-      await heavyComputation();
-    }
-    return result;
-  }
-}
-```
-
-### 超时事件监听
-
-可以监听工具超时事件以进行告警或降级处理：
-
-```typescript
-agent.on('error', (event) => {
-  if (event.phase === 'tool' && event.message.includes('aborted')) {
-    console.log('Tool execution timed out:', event.detail);
-    // 发送告警、记录日志等
-  }
-});
-```
-
-### CPU 密集型任务的超时处理
-
-对于纯计算任务（无 I/O），需要主动在循环中检查：
-
-```typescript
-export class CPUIntensiveTool implements ToolInstance {
-  async exec(args: any, context: ToolContext) {
-    const result = [];
-
-    for (let i = 0; i < args.iterations; i++) {
-      // 每 100 次迭代检查一次 signal
-      if (i % 100 === 0 && context.signal?.aborted) {
-        throw new Error('Computation aborted');
-      }
-
-      result.push(this.compute(i));
-    }
-
-    return result;
-  }
-}
-```
-
-### 超时恢复策略
-
-工具超时后，Agent 会：
-1. 发送 `abort` 信号
-2. 标记工具调用为 `FAILED` 状态
-3. 生成 `tool_result` 包含超时信息
-4. 继续下一轮 `runStep`
-
-Resume 时，超时的工具调用会被自动封口（Auto-Seal），不会重新执行。
-
-### 测试工具超时
-
-```typescript
-// tests/tool-timeout.test.ts
-import { Agent } from '@kode/sdk';
-
-const slowTool = {
-  name: 'slow_tool',
-  description: 'A tool that takes too long',
-  input_schema: { type: 'object', properties: {} },
-  async exec(args: any, context: ToolContext) {
-    // 模拟长时间操作
-    await new Promise(resolve => setTimeout(resolve, 180000)); // 3 分钟
-    return 'done';
-  }
-};
-
-// 设置短超时时间进行测试
-const agent = await Agent.create({
-  // ...
-  metadata: { toolTimeoutMs: 5000 }, // 5 秒超时
-});
-
-agent.registerTool(slowTool);
-
-// 预期：工具会在 5 秒后超时
-const result = await agent.chat('Please use slow_tool');
-console.assert(result.status === 'ok'); // Agent 继续运行
-```
-
-### 工具超时最佳实践总结
-
-1. ✅ **始终检查 `context.signal?.aborted`**
-2. ✅ **将 signal 传递给支持 AbortSignal 的 API（fetch、axios 等）**
-3. ✅ **在循环中定期检查（建议每 100 次迭代或每秒）**
-4. ✅ **设置合理的超时时间（根据工具复杂度）**
-5. ✅ **监听超时事件进行告警**
-6. ❌ **不要忽略 signal**
-7. ❌ **不要依赖工具内部的超时机制（应由 Agent 统一管理）**
diff --git a/docs/zh-CN/advanced/architecture.md b/docs/zh-CN/advanced/architecture.md
new file mode 100644
index 0000000..5758d40
--- /dev/null
+++ b/docs/zh-CN/advanced/architecture.md
@@ -0,0 +1,359 @@
+# 架构指南
+
+> 深入了解 KODE SDK 的心智模型、设计决策和运行时特性。
+
+---
+
+## 目录
+
+1. [心智模型](#心智模型)
+2. [核心架构](#核心架构)
+3. [运行时特性](#运行时特性)
+4. [决策框架](#决策框架)
+
+---
+
+## 心智模型
+
+### KODE SDK 是什么
+
+```
+将 KODE SDK 类比为：
+
++------------------+     +------------------+     +------------------+
+|       V8         |     |     SQLite       |     |    KODE SDK      |
+|  JS 运行时       |     |  数据库引擎      |     |  Agent 运行时    |
++------------------+     +------------------+     +------------------+
+        |                        |                        |
+        v                        v                        v
++------------------+     +------------------+     +------------------+
+|    Express.js    |     |     Prisma       |     |   你的应用       |
+|  Web 框架        |     |       ORM        |     | (CLI/桌面/Web)   |
++------------------+     +------------------+     +------------------+
+        |                        |                        |
+        v                        v                        v
++------------------+     +------------------+     +------------------+
+|      Vercel      |     |   PlanetScale    |     |   你的基础设施   |
+|  云平台          |     |  云数据库        |     | (K8s/EC2/本地)   |
++------------------+     +------------------+     +------------------+
+```
+
+**KODE SDK 是引擎，不是平台。**
+
+它提供：
+- Agent 生命周期管理（创建、运行、暂停、恢复、分叉）
+- 状态持久化（通过可插拔的 Store 接口）
+- 工具执行与权限治理
+- 事件流用于可观测性
+
+它不提供：
+- HTTP 路由或 API 框架
+- 用户认证或授权
+- 多租户或资源隔离
+- 水平扩展或负载均衡
+
+### 单一职责
+
+```
+                     KODE SDK 的职责
+                           |
+                           v
+    +----------------------------------------------+
+    |                                              |
+    |   "保持这个 Agent 运行，从崩溃中恢复，      |
+    |    让它可以分叉，并通过事件告诉我发生了什么" |
+    |                                              |
+    +----------------------------------------------+
+                           |
+                           v
+                     你的应用的职责
+                           |
+                           v
+    +----------------------------------------------+
+    |                                              |
+    |   "处理用户，路由请求，管理权限，           |
+    |    扩展基础设施，与我的系统集成"            |
+    |                                              |
+    +----------------------------------------------+
+```
+
+---
+
+## 核心架构
+
+### 组件概览
+
+```
++------------------------------------------------------------------+
+|                         Agent 实例                                |
++------------------------------------------------------------------+
+|                                                                   |
+|  +------------------+  +------------------+  +------------------+ |
+|  |  MessageQueue    |  | ContextManager   |  |   ToolRunner     | |
+|  |  (用户输入)      |  | (Token 管理)     |  | (并行执行)       | |
+|  +--------+---------+  +--------+---------+  +--------+---------+ |
+|           |                     |                     |           |
+|           +---------------------+---------------------+           |
+|                                 |                                 |
+|                    +------------v------------+                    |
+|                    |    BreakpointManager    |                    |
+|                    |   (8 阶段状态跟踪)      |                    |
+|                    +------------+------------+                    |
+|                                 |                                 |
+|  +------------------+  +--------v---------+  +------------------+ |
+|  | PermissionManager|  |     EventBus     |  |   TodoManager    | |
+|  | (审批流程)       |  | (三通道事件)     |  | (任务跟踪)       | |
+|  +------------------+  +------------------+  +------------------+ |
+|                                                                   |
++----------------------------------+--------------------------------+
+                                   |
+                    +--------------+--------------+
+                    |              |              |
+           +--------v------+ +----v----+ +-------v-------+
+           |     Store     | | Sandbox | | ModelProvider |
+           | (持久化)      | | (执行)  | | (LLM 调用)    |
+           +---------------+ +---------+ +---------------+
+```
+
+### 关键类和接口
+
+| 组件 | 类 | 描述 |
+|-----------|-------|-------------|
+| Agent | `Agent` | 管理对话和工具执行的核心协调器 |
+| Pool | `AgentPool` | 管理多个 Agent 实例的生命周期 |
+| Room | `Room` | 多 Agent 消息传递和协作 |
+| Store | `Store`, `JSONStore`, `SqliteStore`, `PostgresStore` | 持久化后端 |
+| Sandbox | `LocalSandbox` | 隔离的执行环境 |
+| Provider | `AnthropicProvider`, `OpenAIProvider`, `GeminiProvider` | LLM API 适配器 |
+| Events | `EventBus` | 三通道事件分发 |
+| Hooks | `HookManager` | 执行前/后拦截 |
+
+### 数据流
+
+```
+用户消息
+     |
+     v
++----+----+     +-----------+     +------------+
+| Message |---->|  Context  |---->|   Model    |
+|  Queue  |     |  Manager  |     |  Provider  |
++---------+     +-----------+     +-----+------+
+                                        |
+                              +---------+---------+
+                              |                   |
+                         文本响应            工具调用
+                              |                   |
+                              v                   v
+                    +---------+------+    +------+-------+
+                    |    EventBus    |    |  ToolRunner  |
+                    | (text_chunk)   |    | (并行执行)   |
+                    +----------------+    +------+-------+
+                                                 |
+                              +------------------+------------------+
+                              |                  |                  |
+                         权限检查            执行              结果处理
+                              |           (Sandbox)                |
+                              v                  v                  v
+                    +--------------------+  +---------+  +------------------+
+                    | PermissionManager  |  | Sandbox |  |    EventBus      |
+                    | (Control 通道)     |  | (exec)  |  | (tool:end)       |
+                    +--------------------+  +---------+  +------------------+
+```
+
+### 断点状态机
+
+`BreakpointManager` 跟踪 8 个状态用于崩溃恢复：
+
+```
+Agent 执行流程：
+
+  READY -> PRE_MODEL -> STREAMING_MODEL -> TOOL_PENDING -> AWAITING_APPROVAL
+    |         |              |                 |                |
+    +-------- WAL 保护状态 --+-- 等待审批 -----+
+                                                                |
+                        +---------------------------------------+
+                        |
+                        v
+            PRE_TOOL -> TOOL_EXECUTING -> POST_TOOL -> READY
+                |             |              |
+                +---- 工具执行 -------------+
+
+崩溃恢复：从最后一个安全断点恢复，自动封印未完成的工具调用
+```
+
+**BreakpointState 值**（来自 `src/core/types.ts:69`）：
+- `READY` - Agent 空闲，等待输入
+- `PRE_MODEL` - 即将调用 LLM
+- `STREAMING_MODEL` - 接收 LLM 响应
+- `TOOL_PENDING` - 工具调用已解析，等待执行
+- `AWAITING_APPROVAL` - 等待权限决策
+- `PRE_TOOL` - 即将执行工具
+- `TOOL_EXECUTING` - 工具运行中
+- `POST_TOOL` - 工具完成，处理结果
+
+### 三通道事件系统
+
+```
++-------------+     +-------------+     +-------------+
+|  Progress   |     |   Control   |     |   Monitor   |
++-------------+     +-------------+     +-------------+
+| text_chunk  |     | permission  |     | state_changed|
+| tool:start  |     | _required   |     | token_usage |
+| tool:end    |     | permission  |     | tool_executed|
+| done        |     | _decided    |     | error       |
++-------------+     +-------------+     +-------------+
+      |                   |                   |
+      v                   v                   v
+   你的 UI         审批服务            可观测性
+```
+
+**使用模式：**
+
+```typescript
+// Progress: 实时流式输出用于 UI
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+}
+
+// Control: 审批工作流
+agent.on('permission_required', async (event) => {
+  await event.respond('allow');
+});
+
+// Monitor: 可观测性
+agent.on('token_usage', (event) => {
+  console.log('Tokens:', event.totalTokens);
+});
+```
+
+---
+
+## 运行时特性
+
+### 内存模型
+
+```
+Agent 内存占用（典型值）：
+
++---------------------------+
+|     Agent 实例            |
++---------------------------+
+| messages[]: 10KB - 2MB    |  <-- 随对话增长
+| toolRecords: 1KB - 100KB  |  <-- 随工具使用增长
+| eventTimeline: 5KB - 500KB|  <-- 缓存最近事件
+| mediaCache: 0 - 10MB      |  <-- 如果涉及图片/文件
+| baseObjects: ~50KB        |  <-- 固定开销
++---------------------------+
+
+典型范围：每个 Agent 100KB - 5MB
+AgentPool (50 个 Agent)：5MB - 250MB
+```
+
+### I/O 模式
+
+```
+每个 Agent 步骤：
+
++-------------------+     +-------------------+     +-------------------+
+| persistMessages() |     | persistToolRecs() |     | emitEvents()      |
+| ~20-50ms (SSD)    |     | ~5-10ms           |     | ~1-5ms (缓冲)     |
++-------------------+     +-------------------+     +-------------------+
+
+每步总计：30-70ms I/O 开销
+
+大规模（100 个并发 Agent）：
+- JSONStore 存在顺序瓶颈
+- 需要 SqliteStore/PostgresStore 支持并行写入
+```
+
+---
+
+## 决策框架
+
+### 何时使用 KODE SDK
+
+```
++------------------+
+|    决策树        |
++------------------+
+         |
+         v
++------------------+
+| 单用户/          |----是---> 直接使用（CLI/桌面）
+| 本地机器？       |
++--------+---------+
+         | 否
+         v
++----------------------+
+| < 100 并发用户？     |----是---> 单服务器（AgentPool）
++--------+-------------+
+         | 否
+         v
++----------------------+
+| 可以运行长进程？     |----是---> Worker 微服务模式
++--------+-------------+
+         | 否
+         v
++----------------------+
+| 只能 Serverless？    |----是---> 混合模式（API + Workers）
++--------+-------------+
+```
+
+### 平台兼容性矩阵
+
+| 平台 | 兼容性 | 备注 |
+|----------|------------|-------|
+| Node.js | 100% | 主要目标 |
+| Bun | 95% | 需要少量调整 |
+| Deno | 80% | 需要权限标志 |
+| Electron | 90% | 在主进程中使用 |
+| VSCode Extension | 85% | 需要 workspace.fs 集成 |
+| Vercel Functions | 20% | 仅 API 层，不适合 Agent |
+| Cloudflare Workers | 5% | 不兼容 |
+| 浏览器 | 10% | 无 fs/process，非常受限 |
+
+### Store 选择指南
+
+| Store | 使用场景 | 吞吐量 | 扩展性 |
+|-------|----------|------------|---------|
+| `JSONStore` | 开发、CLI | 低 | 单节点 |
+| `SqliteStore` | 桌面应用、小型服务器 | 中 | 单节点 |
+| `PostgresStore` | 生产环境、多节点 | 高 | 多节点 |
+
+**Store 接口层级**（来自 `src/infra/store/types.ts`）：
+
+```
+Store（基础）
+  └── QueryableStore（添加查询方法）
+        └── ExtendedStore（添加健康检查、指标、分布式锁）
+```
+
+---
+
+## 总结
+
+### 核心原则
+
+1. **KODE SDK 是运行时内核** - 它管理 Agent 生命周期，而不是应用基础设施
+
+2. **Agent 是有状态的** - 它们需要持久化存储和长时间运行的进程
+
+3. **通过架构扩展** - 使用 Worker 模式进行大规模部署
+
+4. **Store 可插拔** - 为你的基础设施实现自定义 Store
+
+### 快速参考
+
+| 场景 | 模式 | Store | 规模 |
+|----------|---------|-------|-------|
+| CLI 工具 | 单进程 | JSONStore | 1 用户 |
+| 桌面应用 | 单进程 | SqliteStore | 1 用户 |
+| 内部工具 | 单服务器 | SqliteStore/PostgresStore | ~100 用户 |
+| SaaS 产品 | Worker 微服务 | PostgresStore | 10K+ 用户 |
+| Serverless 应用 | 混合 | 外部 DB | 视情况 |
+
+---
+
+*另请参阅：[生产部署](./production.md) | [数据库指南](../guides/database.md)*
diff --git a/docs/zh-CN/advanced/multi-agent.md b/docs/zh-CN/advanced/multi-agent.md
new file mode 100644
index 0000000..57cd1c0
--- /dev/null
+++ b/docs/zh-CN/advanced/multi-agent.md
@@ -0,0 +1,452 @@
+# 多 Agent 系统
+
+本指南介绍如何使用 KODE SDK 的协调原语构建多 Agent 系统：AgentPool、Room 和 task_run。
+
+---
+
+## 概览
+
+| 组件 | 用途 |
+|------|------|
+| `AgentPool` | 使用共享依赖管理多个 Agent 实例 |
+| `Room` | 使用 @提及 协调 Agent 之间的通信 |
+| `task_run` | 将子任务委派给专业 Agent |
+
+---
+
+## AgentPool
+
+管理多个 Agent 实例的生命周期操作。
+
+### 基本用法
+
+```typescript
+import { AgentPool } from '@shareai-lab/kode-sdk';
+
+const pool = new AgentPool({
+  dependencies: deps,
+  maxAgents: 50,  // 默认：50
+});
+
+// 创建 agents
+const agent1 = await pool.create('agent-1', {
+  templateId: 'researcher',
+  modelConfig: { provider: 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY! },
+});
+
+const agent2 = await pool.create('agent-2', {
+  templateId: 'coder',
+  modelConfig: { provider: 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY! },
+});
+
+// 通过 ID 获取 agent
+const agent = pool.get('agent-1');
+
+// 列出所有 agents
+const agentIds = pool.list(); // ['agent-1', 'agent-2']
+
+// 使用前缀过滤
+const researchers = pool.list({ prefix: 'researcher-' });
+```
+
+### AgentPool API
+
+```typescript
+class AgentPool {
+  constructor(opts: AgentPoolOptions);
+
+  // 创建新 agent
+  async create(agentId: string, config: AgentConfig): Promise<Agent>;
+
+  // 获取已有 agent
+  get(agentId: string): Agent | undefined;
+
+  // 列出 agent ID
+  list(opts?: { prefix?: string }): string[];
+
+  // 获取 agent 状态
+  async status(agentId: string): Promise<AgentStatus | undefined>;
+
+  // 分叉 agent
+  async fork(agentId: string, snapshotSel?: SnapshotId | { at?: string }): Promise<Agent>;
+
+  // 从存储恢复
+  async resume(agentId: string, config: AgentConfig, opts?: {
+    autoRun?: boolean;
+    strategy?: 'crash' | 'manual';
+  }): Promise<Agent>;
+
+  // 销毁 agent
+  async destroy(agentId: string): Promise<void>;
+}
+```
+
+---
+
+## Room
+
+使用广播和定向消息协调 Agent 之间的通信。
+
+### 基本用法
+
+```typescript
+import { AgentPool, Room } from '@shareai-lab/kode-sdk';
+
+const pool = new AgentPool({ dependencies: deps });
+const room = new Room(pool);
+
+// 创建并加入 agents
+const alice = await pool.create('alice', config);
+const bob = await pool.create('bob', config);
+const charlie = await pool.create('charlie', config);
+
+room.join('Alice', 'alice');
+room.join('Bob', 'bob');
+room.join('Charlie', 'charlie');
+
+// 广播给所有人（发送者除外）
+await room.say('Alice', 'Hello everyone!');
+// Bob 和 Charlie 收到："[from:Alice] Hello everyone!"
+
+// 使用 @提及 定向消息
+await room.say('Alice', '@Bob What do you think about this?');
+// 只有 Bob 收到："[from:Alice] @Bob What do you think about this?"
+
+// 多个提及
+await room.say('Alice', '@Bob @Charlie Please review.');
+// Bob 和 Charlie 都收到消息
+
+// 离开房间
+room.leave('Charlie');
+
+// 获取当前成员
+const members = room.getMembers();
+// [{ name: 'Alice', agentId: 'alice' }, { name: 'Bob', agentId: 'bob' }]
+```
+
+### Room API
+
+```typescript
+class Room {
+  constructor(pool: AgentPool);
+
+  // 加入房间
+  join(name: string, agentId: string): void;
+
+  // 离开房间
+  leave(name: string): void;
+
+  // 发送消息（广播或定向）
+  async say(from: string, text: string): Promise<void>;
+
+  // 获取成员
+  getMembers(): RoomMember[];
+}
+
+interface RoomMember {
+  name: string;
+  agentId: string;
+}
+```
+
+---
+
+## task_run 工具
+
+将任务委派给专业子 Agent。
+
+### 设置
+
+```typescript
+import { createTaskRunTool, AgentTemplate } from '@shareai-lab/kode-sdk';
+
+// 定义可用模板
+const templates: AgentTemplate[] = [
+  {
+    id: 'researcher',
+    whenToUse: '研究和收集信息',
+    tools: ['fs_read', 'fs_glob', 'fs_grep'],
+  },
+  {
+    id: 'coder',
+    whenToUse: '编写和修改代码',
+    tools: ['fs_read', 'fs_write', 'fs_edit', 'bash_run'],
+  },
+  {
+    id: 'reviewer',
+    whenToUse: '审查代码并提供反馈',
+    tools: ['fs_read', 'fs_glob', 'fs_grep'],
+  },
+];
+
+// 创建 task_run 工具
+const taskRunTool = createTaskRunTool(templates);
+
+// 注册
+deps.toolRegistry.register('task_run', () => taskRunTool);
+```
+
+### task_run 工作原理
+
+当 Agent 调用 `task_run` 时：
+
+1. Agent 指定 `agentTemplateId`、`prompt` 和可选的 `context`
+2. SDK 使用指定模板创建子 Agent
+3. 子 Agent 处理任务
+4. 结果返回给父 Agent
+
+**工具参数：**
+
+```typescript
+interface TaskRunParams {
+  description: string;      // 简短任务描述（3-5 词）
+  prompt: string;           // 详细指令
+  agentTemplateId: string;  // 使用的模板 ID
+  context?: string;         // 额外上下文
+}
+```
+
+**工具结果：**
+
+```typescript
+interface TaskRunResult {
+  status: 'ok' | 'paused';
+  template: string;
+  text?: string;
+  permissionIds?: string[];
+}
+```
+
+### 子 Agent 配置
+
+在模板中配置子 agent 行为：
+
+```typescript
+const template: AgentTemplateDefinition = {
+  id: 'coordinator',
+  systemPrompt: '你负责协调专家之间的任务...',
+  tools: ['task_run', 'fs_read'],
+  runtime: {
+    subagents: {
+      depth: 2,           // 最大嵌套深度
+      templates: ['researcher', 'coder'],  // 允许的模板
+      inheritConfig: true,
+      overrides: {
+        permission: { mode: 'auto' },
+      },
+    },
+  },
+};
+```
+
+---
+
+## 模式
+
+### 协调者模式
+
+一个 Agent 协调多个专家。
+
+```typescript
+// 协调者模板
+const coordinatorTemplate: AgentTemplateDefinition = {
+  id: 'coordinator',
+  systemPrompt: `你是项目协调者。分解复杂任务并委派给专家：
+- 使用 'researcher' 进行信息收集
+- 使用 'coder' 进行实现
+- 使用 'reviewer' 进行代码审查
+
+协调工作并综合结果。`,
+  tools: ['task_run', 'fs_read', 'fs_write'],
+  runtime: {
+    subagents: {
+      depth: 1,
+      templates: ['researcher', 'coder', 'reviewer'],
+    },
+  },
+};
+
+// 使用
+const coordinator = await Agent.create({
+  templateId: 'coordinator',
+  ...
+}, deps);
+
+await coordinator.send('实现一个用户认证系统');
+// 协调者将委派：
+// 1. researcher: "研究认证最佳实践"
+// 2. coder: "实现认证模块"
+// 3. reviewer: "审查认证实现"
+```
+
+### 流水线模式
+
+按顺序链接 Agent。
+
+```typescript
+async function pipeline(input: string) {
+  // 步骤 1：研究
+  const researcher = await pool.create('researcher-1', {
+    templateId: 'researcher',
+    ...
+  });
+  const research = await researcher.send(`研究：${input}`);
+
+  // 步骤 2：实现
+  const coder = await pool.create('coder-1', {
+    templateId: 'coder',
+    ...
+  });
+  const implementation = await coder.send(`
+    基于此研究：
+    ${research}
+
+    实现解决方案。
+  `);
+
+  // 步骤 3：审查
+  const reviewer = await pool.create('reviewer-1', {
+    templateId: 'reviewer',
+    ...
+  });
+  const review = await reviewer.send(`
+    审查此实现：
+    ${implementation}
+  `);
+
+  return { research, implementation, review };
+}
+```
+
+### 辩论模式
+
+多个 Agent 讨论一个话题。
+
+```typescript
+const room = new Room(pool);
+
+// 创建辩论者
+const alice = await pool.create('alice', {
+  templateId: 'debater',
+  metadata: { position: 'pro' },
+  ...
+});
+const bob = await pool.create('bob', {
+  templateId: 'debater',
+  metadata: { position: 'con' },
+  ...
+});
+
+room.join('Alice', 'alice');
+room.join('Bob', 'bob');
+
+// 开始辩论
+await room.say('Moderator', '话题：我们应该使用微服务吗？');
+
+// 继续辩论轮次
+for (let round = 0; round < 3; round++) {
+  await room.say('Alice', `@Bob [第 ${round + 1} 轮] 这是我的论点...`);
+  await room.say('Bob', `@Alice [第 ${round + 1} 轮] 我的反驳...`);
+}
+```
+
+---
+
+## 最佳实践
+
+### 1. 限制深度
+
+防止无限子 agent 链：
+
+```typescript
+runtime: {
+  subagents: {
+    depth: 2,  // 最大嵌套深度
+  },
+}
+```
+
+### 2. 清晰的模板
+
+每个模板应有清晰的职责：
+
+```typescript
+const templates: AgentTemplate[] = [
+  {
+    id: 'data-analyst',
+    whenToUse: '分析数据模式并生成洞察',
+    tools: ['fs_read', 'fs_glob'],
+  },
+  // 避免职责重叠
+];
+```
+
+### 3. 资源管理
+
+完成后清理 agents：
+
+```typescript
+try {
+  const agent = await pool.create('temp-agent', config);
+  const result = await agent.send(message);
+  return result;
+} finally {
+  await pool.destroy('temp-agent');
+}
+```
+
+### 4. 权限继承
+
+考虑子 agent 的权限设置：
+
+```typescript
+runtime: {
+  subagents: {
+    inheritConfig: true,
+    overrides: {
+      permission: { mode: 'approval' },  // 需要审批
+    },
+  },
+}
+```
+
+---
+
+## 监控多 Agent 系统
+
+### 追踪子 Agent 事件
+
+```typescript
+agent.on('tool_executed', (event) => {
+  if (event.call.name === 'task_run') {
+    console.log('子 agent 完成:', {
+      template: event.call.result?.template,
+      status: event.call.result?.status,
+    });
+  }
+});
+```
+
+### 聚合指标
+
+```typescript
+const allAgentIds = pool.list();
+const stats = await Promise.all(
+  allAgentIds.map(async (id) => {
+    const status = await pool.status(id);
+    return { id, ...status };
+  })
+);
+
+console.log('总 agents:', stats.length);
+console.log('工作中:', stats.filter(s => s.state === 'WORKING').length);
+console.log('已暂停:', stats.filter(s => s.state === 'PAUSED').length);
+```
+
+---
+
+## 参考资料
+
+- [API 参考](../reference/api.md)
+- [事件指南](../guides/events.md)
+- [生产部署](./production.md)
diff --git a/docs/zh-CN/advanced/production.md b/docs/zh-CN/advanced/production.md
new file mode 100644
index 0000000..429b230
--- /dev/null
+++ b/docs/zh-CN/advanced/production.md
@@ -0,0 +1,456 @@
+# 生产部署
+
+本指南介绍 KODE SDK 的生产配置、监控和最佳实践。
+
+---
+
+## 数据库选择
+
+### 开发 vs 生产
+
+| Store | 使用场景 | 特性 |
+|-------|----------|------|
+| `JSONStore` | 开发环境、单机 | 简单文件存储 |
+| `SqliteStore` | 开发环境、中等规模 | QueryableStore + ExtendedStore |
+| `PostgresStore` | 生产环境、多 Worker | 完整 ExtendedStore、分布式锁 |
+
+### PostgreSQL 配置
+
+```typescript
+import { createStore } from '@shareai-lab/kode-sdk';
+
+const store = await createStore({
+  type: 'postgres',
+  connection: {
+    host: process.env.PG_HOST!,
+    port: 5432,
+    database: 'kode_agents',
+    user: process.env.PG_USER!,
+    password: process.env.PG_PASSWORD!,
+    ssl: { rejectUnauthorized: true },
+
+    // 连接池设置
+    max: 20,                       // 连接池大小
+    idleTimeoutMillis: 30000,      // 空闲连接超时
+    connectionTimeoutMillis: 5000, // 连接超时
+  },
+  fileStoreBaseDir: '/data/kode-files',
+});
+```
+
+---
+
+## 健康检查
+
+ExtendedStore 提供内置健康检查能力。
+
+### 健康检查 API
+
+```typescript
+const health = await store.healthCheck();
+
+// 响应：
+// {
+//   healthy: true,
+//   database: { connected: true, latencyMs: 5 },
+//   fileSystem: { writable: true },
+//   checkedAt: 1706000000000
+// }
+```
+
+### HTTP 健康端点
+
+```typescript
+import express from 'express';
+
+const app = express();
+
+app.get('/health', async (req, res) => {
+  const status = await store.healthCheck();
+  res.status(status.healthy ? 200 : 503).json(status);
+});
+
+// Kubernetes 就绪探针
+app.get('/ready', async (req, res) => {
+  const status = await store.healthCheck();
+  res.status(status.healthy ? 200 : 503).send();
+});
+```
+
+### 数据一致性检查
+
+```typescript
+const consistency = await store.checkConsistency(agentId);
+
+if (!consistency.consistent) {
+  console.error('一致性问题:', consistency.issues);
+}
+```
+
+---
+
+## 指标与监控
+
+### Store 指标
+
+```typescript
+const metrics = await store.getMetrics();
+
+// {
+//   operations: { saves: 1234, loads: 5678, queries: 910, deletes: 11 },
+//   performance: { avgLatencyMs: 15.5, maxLatencyMs: 250, minLatencyMs: 2 },
+//   storage: { totalAgents: 100, totalMessages: 50000, dbSizeBytes: 104857600 },
+//   collectedAt: 1706000000000
+// }
+```
+
+### Prometheus 集成
+
+```typescript
+import { register, Gauge, Histogram } from 'prom-client';
+
+const agentCount = new Gauge({ name: 'kode_agents_total', help: 'Agent 总数' });
+const toolLatency = new Histogram({
+  name: 'kode_tool_duration_seconds',
+  help: '工具执行耗时',
+  buckets: [0.1, 0.5, 1, 2, 5, 10],
+});
+
+agent.on('tool_executed', (event) => {
+  if (event.call.durationMs) {
+    toolLatency.observe(event.call.durationMs / 1000);
+  }
+});
+
+app.get('/metrics', async (req, res) => {
+  res.set('Content-Type', register.contentType);
+  res.send(await register.metrics());
+});
+```
+
+---
+
+## 重试策略
+
+### 内置重试配置
+
+```typescript
+import { withRetry, DEFAULT_RETRY_CONFIG } from '@shareai-lab/kode-sdk/provider';
+
+// 默认配置: { maxRetries: 3, baseDelayMs: 1000, maxDelayMs: 60000, jitterFactor: 0.2 }
+
+const result = await withRetry(
+  () => callExternalAPI(),
+  { maxRetries: 5, baseDelayMs: 500, provider: 'myservice' },
+  (error, attempt, delay) => console.log(`重试 ${attempt} 等待 ${delay}ms`)
+);
+```
+
+### 可重试错误
+
+| 错误类型 | 可重试 | 说明 |
+|----------|--------|------|
+| `RateLimitError` | 是 | 遵循 `retry-after` 头 |
+| `TimeoutError` | 是 | 请求超时 |
+| `ServiceUnavailableError` | 是 | 5xx 服务器错误 |
+| `AuthenticationError` | 否 | 无效凭证 |
+| `QuotaExceededError` | 否 | 账单限额 |
+
+---
+
+## 分布式锁
+
+### 使用 Agent 锁
+
+```typescript
+const release = await store.acquireAgentLock(agentId, 30000);
+
+try {
+  const agent = await Agent.resumeFromStore(agentId, deps);
+  await agent.send('处理此任务');
+} finally {
+  await release();
+}
+```
+
+- **SQLite**: 内存锁（仅单进程有效）
+- **PostgreSQL**: 数据库级咨询锁（多 Worker 安全）
+
+---
+
+## 优雅关闭
+
+```typescript
+async function gracefulShutdown() {
+  // 1. 停止接受新请求
+  server.close();
+
+  // 2. 中断运行中的 Agent
+  for (const agentId of pool.list()) {
+    const agent = pool.get(agentId);
+    if (agent) await agent.interrupt();
+  }
+
+  // 3. 关闭数据库连接
+  await store.close();
+
+  process.exit(0);
+}
+
+process.on('SIGTERM', gracefulShutdown);
+process.on('SIGINT', gracefulShutdown);
+```
+
+---
+
+## 日志与成本管理
+
+### Logger 接口
+
+```typescript
+const config: DebugConfig = {
+  verbose: false,
+  logTokenUsage: true,
+  logCache: true,
+  logRetries: true,
+  redactSensitive: true,
+};
+```
+
+### 成本限制
+
+```typescript
+let sessionCost = 0;
+const COST_LIMIT = 10.0;
+
+agent.on('token_usage', (event) => {
+  const cost = (event.inputTokens * 0.003 + event.outputTokens * 0.015) / 1000;
+  sessionCost += cost;
+
+  if (sessionCost > COST_LIMIT) {
+    agent.interrupt();
+  }
+});
+```
+
+---
+
+## 安全最佳实践
+
+```typescript
+// 权限配置
+const agent = await Agent.create({
+  permission: {
+    mode: 'approval',
+    requireApprovalTools: ['bash_run', 'fs_write'],
+    allowTools: ['fs_read', 'fs_glob'],
+  },
+}, deps);
+
+// 沙箱边界
+const sandbox = new LocalSandbox({
+  workDir: '/app/workspace',
+  enforceBoundary: true,
+  allowPaths: ['/app/workspace', '/tmp'],
+});
+```
+
+---
+
+## 部署清单
+
+- [ ] 生产环境使用 PostgreSQL
+- [ ] 配置连接池
+- [ ] 设置健康检查端点
+- [ ] 配置指标收集
+- [ ] 实现优雅关闭
+- [ ] 使用环境变量存储密钥
+- [ ] 启用数据库 SSL 连接
+- [ ] 设置沙箱边界
+
+---
+
+## 部署模式
+
+### 决策树
+
+```
++------------------+
+|    决策树        |
++------------------+
+         |
+         v
++----------------------+
+| 单用户/本地机器？    |----是---> 模式 1: 单进程
++--------+-------------+
+         | 否
+         v
++----------------------+
+| < 100 并发用户？     |----是---> 模式 2: 单服务器
++--------+-------------+
+         | 否
+         v
++----------------------+
+| 可以运行长进程？     |----是---> 模式 3: Worker 微服务
++--------+-------------+
+         | 否
+         v
++----------------------+
+| 只能 Serverless？    |----是---> 模式 4: 混合架构
++--------+-------------+
+```
+
+### 模式 1: 单进程（CLI/桌面）
+
+**适用于：** CLI 工具、Electron 应用、VSCode 扩展
+
+```typescript
+import { Agent, AgentPool, JSONStore } from '@shareai-lab/kode-sdk';
+import * as path from 'path';
+import * as os from 'os';
+
+const store = new JSONStore(path.join(os.homedir(), '.my-agent'));
+const pool = new AgentPool({ dependencies: { store, ... } });
+
+// 恢复或创建
+const agent = pool.get('main') ?? await pool.create('main', { templateId: 'cli-assistant' });
+
+// 交互循环
+for await (const line of readline) {
+  await agent.send(line);
+  for await (const env of agent.subscribe(['progress'])) {
+    if (env.event.type === 'text_chunk') process.stdout.write(env.event.delta);
+    if (env.event.type === 'done') break;
+  }
+}
+```
+
+### 模式 2: 单服务器
+
+**适用于：** 内部工具、小型团队（<100 并发用户）
+
+```typescript
+import { Hono } from 'hono';
+import { AgentPool, SqliteStore } from '@shareai-lab/kode-sdk';
+
+const app = new Hono();
+const store = new SqliteStore('./agents.db', './data');
+const pool = new AgentPool({ dependencies: { store, ... }, maxAgents: 50 });
+
+app.post('/api/agents/:id/message', async (c) => {
+  const { id } = c.req.param();
+  const { message } = await c.req.json();
+
+  let agent = pool.get(id);
+  if (!agent) {
+    const exists = await store.exists(id);
+    agent = exists
+      ? await pool.resume(id, getConfig())
+      : await pool.create(id, getConfig());
+  }
+
+  return c.json(await agent.complete(message));
+});
+```
+
+### 模式 3: Worker 微服务
+
+**适用于：** 生产 SaaS、1000+ 并发用户
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        负载均衡器                               │
+└────────────────────────────┬────────────────────────────────────┘
+                             │
+         ┌───────────────────┼───────────────────┐
+         │                   │                   │
+┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
+│   API 服务器 1  │ │   API 服务器 2  │ │   API 服务器 N  │
+│   (无状态)      │ │   (无状态)      │ │   (无状态)      │
+└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
+         │                   │                   │
+         └───────────────────┼───────────────────┘
+                             │
+                    ┌────────▼────────┐
+                    │   任务队列      │
+                    │   (BullMQ)      │
+                    └────────┬────────┘
+                             │
+         ┌───────────────────┼───────────────────┐
+         │                   │                   │
+┌────────▼────────┐ ┌────────▼────────┐ ┌────────▼────────┐
+│   Worker 1      │ │   Worker 2      │ │   Worker N      │
+│  AgentPool(50)  │ │  AgentPool(50)  │ │  AgentPool(50)  │
+└─────────────────┘ └─────────────────┘ └─────────────────┘
+```
+
+详细实现请参阅英文文档 [Production Deployment](../../en/advanced/production.md)。
+
+---
+
+## 扩展策略
+
+### 策略 1: 垂直扩展
+
+**适用于：** 每进程 ~100 个并发 Agent
+
+```typescript
+const pool = new AgentPool({
+  maxAgents: 100,  // 从默认 50 增加
+  store: new SqliteStore('./agents.db', './data'),
+});
+```
+
+### 策略 2: Agent 分片
+
+**适用于：** 100-1000 个并发 Agent
+
+使用一致性哈希将 Agent 路由到特定 Worker。
+
+### 策略 3: LRU 调度
+
+**适用于：** 1000+ 总 Agent，但同时活跃数量有限
+
+```typescript
+class AgentScheduler {
+  private active: LRUCache<string, Agent>;
+
+  async get(agentId: string): Promise<Agent> {
+    if (this.active.has(agentId)) {
+      return this.active.get(agentId)!;
+    }
+    // 从存储恢复
+    const agent = await Agent.resume(agentId, config, deps);
+    this.active.set(agentId, agent);  // LRU 淘汰处理休眠
+    return agent;
+  }
+}
+```
+
+---
+
+## 容量规划
+
+| 部署方式 | Agent/进程 | 内存/Agent | 并发用户 |
+|----------|------------|------------|----------|
+| CLI | 1 | 10-100 MB | 1 |
+| 桌面应用 | 5-10 | 50-200 MB | 1 |
+| 单服务器 | 50 | 2-10 MB | 50-100 |
+| Worker 集群 (10 节点) | 500 | 2-10 MB | 500-1000 |
+| Worker 集群 (50 节点) | 2500 | 2-10 MB | 2500-5000 |
+
+**每个 Agent 内存估算：**
+- 基础对象：~50 KB
+- 消息历史 (100 条消息)：~500 KB - 5 MB
+- 工具调用记录：~50-500 KB
+- 事件时间线：~100 KB - 1 MB
+- **典型总计：1-10 MB**
+
+---
+
+## 参考资料
+
+- [架构指南](./architecture.md)
+- [数据库指南](../guides/database.md)
+- [错误处理](../guides/error-handling.md)
+- [事件指南](../guides/events.md)
diff --git a/docs/zh-CN/examples/playbooks.md b/docs/zh-CN/examples/playbooks.md
new file mode 100644
index 0000000..ee0025d
--- /dev/null
+++ b/docs/zh-CN/examples/playbooks.md
@@ -0,0 +1,188 @@
+# Playbooks：典型场景脚本
+
+本页从实践角度拆解最常见的使用场景，给出心智地图、关键 API、示例文件以及注意事项。示例代码位于 `examples/` 目录，可直接 `ts-node` 运行。
+
+---
+
+## 1. 协作收件箱（事件驱动 UI）
+
+- **目标**：持续运行的单 Agent，UI 通过 Progress 流展示文本/工具进度，Monitor 做轻量告警。
+- **示例**：`examples/01-agent-inbox.ts`
+- **运行**：`npm run example:agent-inbox`
+- **关键步骤**：
+  1. `Agent.create` + `agent.subscribe(['progress'])` 推送文本增量。
+  2. 使用 `bookmark` / `cursor` 做断点续播。
+  3. `agent.on('tool_executed')` / `agent.on('error')` 将治理事件写入日志或监控。
+  4. `agent.todoManager` 自动提醒，UI 可展示 Todo 面板。
+- **注意事项**：
+  - 建议将 Progress 流通过 SSE/WebSocket 暴露给前端。
+  - 若 UI 需要思考过程，可在模板 metadata 中开启 `exposeThinking`。
+
+```typescript
+// 基本事件订阅
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'text_chunk') {
+    process.stdout.write(envelope.event.delta);
+  }
+  if (envelope.event.type === 'done') {
+    break;
+  }
+}
+```
+
+---
+
+## 2. 工具审批 & 治理
+
+- **目标**：对敏感工具（如 `bash_run`、数据库写入）进行审批；结合 Hook 实现策略守卫。
+- **示例**：`examples/02-approval-control.ts`
+- **运行**：`npm run example:approval`
+- **关键步骤**：
+  1. 模板中配置 `permission`（如 `mode: 'approval'` + `requireApprovalTools`）。
+  2. 订阅 `agent.on('permission_required')`，将审批任务推送到业务系统。
+  3. 审批 UI 调用 `agent.decide(id, 'allow' | 'deny', note)`。
+  4. 结合 `HookManager` 的 `preToolUse` / `postToolUse` 做更细粒度的策略（如路径守卫、结果截断）。
+- **注意事项**：
+  - 审批过程中 Agent 处于 `AWAITING_APPROVAL` 断点，恢复后 SDK 自动处理。
+  - 拒绝工具会自动写入 `tool_result`，UI 可以提示用户重试策略。
+
+```typescript
+// 权限配置
+const template = {
+  id: 'secure-runner',
+  permission: {
+    mode: 'approval',
+    requireApprovalTools: ['bash_run'],
+  },
+  // Hook 做额外守卫
+  hooks: {
+    preToolUse(call) {
+      if (call.name === 'bash_run' && /rm -rf|sudo/.test(call.args.cmd)) {
+        return { decision: 'deny', reason: '命令命中禁用关键字' };
+      }
+    },
+  },
+};
+
+// 审批处理
+agent.on('permission_required', async (event) => {
+  const decision = await getApprovalFromAdmin(event.call);
+  await event.respond(decision, { note: '管理员批准' });
+});
+```
+
+---
+
+## 3. 多 Agent 小组协作
+
+- **目标**：一个 Planner 调度多个 Specialist，所有 Agent 长驻且可随时分叉。
+- **示例**：`examples/03-room-collab.ts`
+- **运行**：`npm run example:room`
+- **关键步骤**：
+  1. 使用单例 `AgentPool` 管理 Agent 生命周期（`create` / `resume` / `fork`）。
+  2. 通过 `Room` 实现广播/点名消息；消息带 `[from:name]` 模式进行协作。
+  3. 子 Agent 通过 `task_run` 工具或显式 `pool.create` 拉起。
+  4. 利用 `agent.snapshot()` + `agent.fork()` 在 Safe-Fork-Point 分叉出新任务。
+- **注意事项**：
+  - 模板的 `runtime.subagents` 可限制可分派模板与深度。
+  - 需要持久化 lineage（SDK 默认写入 metadata），便于审计和回放。
+  - 如果不监控外部文件，可在模板中关闭 `watchFiles`。
+
+```typescript
+const pool = new AgentPool({ dependencies: deps, maxAgents: 10 });
+const room = new Room(pool);
+
+const planner = await pool.create('agt-planner', { templateId: 'planner', ... });
+const dev = await pool.create('agt-dev', { templateId: 'executor', ... });
+
+room.join('planner', planner.agentId);
+room.join('dev', dev.agentId);
+
+// 广播到 Room
+await room.say('planner', 'Hi team, let us audit the repository. @dev 请负责执行。');
+await room.say('dev', '收到，开始处理。');
+```
+
+---
+
+## 4. 调度与系统提醒
+
+- **目标**：让 Agent 在长时运行中定期执行任务、监控文件变更、发送系统提醒。
+- **示例**：`examples/04-scheduler-watch.ts`
+- **运行**：`npm run example:scheduler`
+- **关键步骤**：
+  1. `const scheduler = agent.schedule(); scheduler.everySteps(N, callback)` 注册步数触发。
+  2. 使用 `agent.remind(text, options)` 发送系统级提醒（走 Monitor，不污染 Progress）。
+  3. FilePool 默认会监听写入文件，`monitor.file_changed` 触发后可结合 `scheduler.notifyExternalTrigger` 做自动响应。
+  4. Todo 结合 `remindIntervalSteps` 做定期回顾。
+- **注意事项**：
+  - 调度任务应保持幂等，遵循事件驱动思想。
+  - 对高频任务可结合外部 Cron，在触发时调用 `scheduler.notifyExternalTrigger`。
+
+---
+
+## 5. 数据库持久化
+
+- **目标**：将 Agent 状态持久化到 SQLite 或 PostgreSQL，用于生产部署。
+- **示例**：`examples/db-sqlite.ts`、`examples/db-postgres.ts`
+- **关键步骤**：
+  1. 使用 `createExtendedStore` 工厂函数创建 Store。
+  2. 将 Store 传递给 Agent 依赖。
+  3. 使用 Query API 进行会话管理和分析。
+
+```typescript
+import { createExtendedStore, SqliteStore } from '@shareai-lab/kode-sdk';
+
+// 创建 SQLite Store
+const store = createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/files',
+}) as SqliteStore;
+
+// 与 Agent 一起使用
+const agent = await Agent.create(
+  { templateId: 'my-agent', ... },
+  { store, ... }
+);
+
+// Query API
+const sessions = await store.querySessions({ limit: 10 });
+const stats = await store.aggregateStats(agent.agentId);
+```
+
+---
+
+## 6. 组合拳：审批 + 协作 + 调度
+
+- **场景**：代码审查机器人，Planner 负责拆分任务并分配到不同 Specialist，工具操作需审批，定时提醒确保 SLA。
+- **实现路径**：
+  1. **Planner 模板**：具备 `task_run` 工具与调度 Hook，每日早晨自动巡检。
+  2. **Specialist 模板**：聚焦 `fs_*` + `todo_*` 工具，审批策略只对 `bash_run` 开启。
+  3. **统一审批服务**：监听全部 Agent 的 Control 事件，打通企业 IM / 审批流。
+  4. **Room 协作**：Planner 将任务以 `@executor` 形式投递，执行完成再 @planner 汇报。
+  5. **SLA 监控**：Monitor 事件进入 observability pipeline（Prometheus / ELK / Datadog）。
+  6. **调度提醒**：使用 Scheduler 定期检查待办或外部系统信号。
+
+---
+
+## 常用组合 API 速查
+
+| 分类 | API |
+|------|-----|
+| 事件 | `agent.subscribe(['progress'])`、`agent.on('error', handler)`、`agent.on('tool_executed', handler)` |
+| 审批 | `permission_required` → `event.respond()` / `agent.decide()` |
+| 多 Agent | `new AgentPool({ dependencies, maxAgents })`、`const room = new Room(pool)` |
+| 分叉 | `const snapshot = await agent.snapshot(); const fork = await agent.fork(snapshot);` |
+| 调度 | `agent.schedule().everySteps(10, ...)`、`scheduler.notifyExternalTrigger(...)` |
+| Todo | `agent.getTodos()` / `agent.setTodos()` / `todo_read` / `todo_write` |
+| 数据库 | `createExtendedStore({ type: 'sqlite', ... })`、`store.querySessions()` |
+
+---
+
+## 参考资料
+
+- [快速开始](../getting-started/quickstart.md)
+- [事件指南](../guides/events.md)
+- [多 Agent 系统](../advanced/multi-agent.md)
+- [数据库指南](../guides/database.md)
diff --git a/docs/zh-CN/getting-started/concepts.md b/docs/zh-CN/getting-started/concepts.md
new file mode 100644
index 0000000..d9c6a7c
--- /dev/null
+++ b/docs/zh-CN/getting-started/concepts.md
@@ -0,0 +1,285 @@
+# 核心概念
+
+## 什么是 KODE SDK？
+
+KODE SDK 是一个 **Agent 运行时内核** — 它管理 AI Agent 的完整生命周期，包括状态持久化、崩溃恢复和工具执行。
+
+可以把它类比为 **JavaScript 的 V8**，但是针对 AI Agent：
+
+```
++------------------+     +------------------+
+|       V8         |     |    KODE SDK      |
+|  JS 运行时       |     |  Agent 运行时    |
++------------------+     +------------------+
+        |                        |
+        v                        v
++------------------+     +------------------+
+|    Express.js    |     |   你的应用       |
+|  Web 框架        |     | (CLI/桌面/Web)   |
++------------------+     +------------------+
+```
+
+**KODE SDK 提供：**
+- Agent 生命周期管理（创建、运行、暂停、恢复、分叉）
+- 带崩溃恢复的状态持久化（WAL 保护）
+- 带权限治理的工具执行
+- 三通道事件系统用于可观测性
+
+**KODE SDK 不提供：**
+- HTTP 路由或 API 框架
+- 用户认证或授权
+- 多租户或资源隔离
+- 水平扩展（这部分由你来架构）
+
+> 深入了解架构，请参阅 [架构指南](../advanced/architecture.md)
+
+---
+
+## Agent
+
+Agent 是管理与 LLM 模型对话的核心实体。
+
+```typescript
+// 设置依赖
+const templates = new AgentTemplateRegistry();
+templates.register({
+  id: 'assistant',
+  systemPrompt: '你是一个乐于助人的助手。',
+  tools: ['fs_read', 'fs_write'],  // 可选：工具名称
+});
+
+// 创建 Agent
+const agent = await Agent.create(
+  { templateId: 'assistant' },
+  { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory }
+);
+```
+
+核心能力：
+- **发送消息**：`agent.send('...')` 或 `agent.send(contentBlocks)`
+- **订阅事件**：`agent.subscribe(['progress'])` 或 `agent.on('event_type', callback)`
+- **从存储恢复**：`Agent.resume(agentId, config, deps)` 或 `Agent.resumeFromStore(agentId, deps)`
+- **分叉对话**：`agent.fork()`
+
+## 三通道事件系统
+
+KODE SDK 将事件分为三个通道，实现清晰的架构分离：
+
+### Progress 通道
+
+用于 UI 展示的实时流数据。使用 `subscribe()`：
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'text_chunk':      // 模型输出的文本片段
+      process.stdout.write(envelope.event.delta);
+      break;
+    case 'tool:start':      // 工具开始执行
+    case 'tool:end':        // 工具执行完成
+    case 'done':            // 响应完成
+  }
+}
+```
+
+### Control 通道
+
+需要人工或系统决策的审批请求。使用 `on()`：
+
+```typescript
+agent.on('permission_required', async (event) => {
+  // 批准或拒绝工具执行
+  await event.respond('allow');  // 或 event.respond('deny', { note: '原因' })
+});
+```
+
+### Monitor 通道
+
+审计和可观测性事件。使用 `on()`：
+
+```typescript
+agent.on('tool_executed', (event) => {
+  console.log('工具:', event.call.name, '耗时:', event.call.durationMs);
+});
+
+agent.on('token_usage', (event) => {
+  console.log('Token:', event.totalTokens);
+});
+
+agent.on('error', (event) => {
+  console.error('错误:', event.message);
+});
+```
+
+## 工具 (Tools)
+
+工具扩展 Agent 的能力。KODE 提供内置工具并支持自定义工具。
+
+### 内置工具
+
+| 类别 | 工具 |
+|------|------|
+| 文件系统 | `fs_read`, `fs_write`, `fs_edit`, `fs_glob`, `fs_grep` |
+| Shell | `bash_run`, `bash_logs`, `bash_kill` |
+| 任务管理 | `todo_read`, `todo_write` |
+
+### 自定义工具
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const weatherTool = defineTool({
+  name: 'get_weather',
+  description: '获取城市天气',
+  params: {
+    city: { type: 'string', description: '城市名称' }
+  },
+  attributes: { readonly: true },
+  async exec(args, ctx) {
+    return { temp: 22, condition: '晴天' };
+  }
+});
+```
+
+## Store（存储）
+
+Agent 状态的持久化后端。
+
+| Store 类型 | 使用场景 |
+|------------|----------|
+| `JSONStore` | 开发环境、单实例 |
+| `SqliteStore` | 生产环境、单机部署 |
+| `PostgresStore` | 生产环境、多实例部署 |
+
+```typescript
+// JSONStore（默认）
+const store = new JSONStore('./.kode');
+
+// SQLite
+const store = new SqliteStore('./agents.db', './data');
+
+// PostgreSQL
+const store = new PostgresStore(connectionConfig, './data');
+
+// 工厂函数
+const store = createExtendedStore({
+  type: 'sqlite',
+  dbPath: './agents.db',
+  fileStoreBaseDir: './data'
+});
+```
+
+## Sandbox（沙箱）
+
+工具执行的隔离环境。
+
+```typescript
+const agent = await Agent.create({
+  // ...
+  sandbox: {
+    kind: 'local',
+    workDir: './workspace',
+    enforceBoundary: true,  // 限制文件访问在 workDir 内
+  }
+});
+```
+
+## Provider（模型提供者）
+
+模型 Provider 适配器。KODE 内部使用 Anthropic 风格的消息格式。
+
+```typescript
+// Anthropic
+const provider = new AnthropicProvider(apiKey, modelId);
+
+// OpenAI
+const provider = new OpenAIProvider(apiKey, modelId);
+
+// Gemini
+const provider = new GeminiProvider(apiKey, modelId);
+```
+
+## Resume（恢复）与 Fork（分叉）
+
+### Resume（恢复）
+
+从崩溃恢复或稍后继续：
+
+```typescript
+// 恢复已有 Agent
+const agent = await Agent.resume(agentId, config, deps);
+
+// 恢复或创建新的
+const exists = await store.exists(agentId);
+const agent = exists
+  ? await Agent.resume(agentId, config, deps)
+  : await Agent.create(config, deps);
+```
+
+### Fork（分叉）
+
+在检查点处分叉对话：
+
+```typescript
+// 创建快照
+const snapshotId = await agent.snapshot('before-risky-operation');
+
+// 从快照分叉
+const forkedAgent = await agent.fork(snapshotId);
+
+// 每个 Agent 独立继续
+await forkedAgent.send('尝试另一种方案');
+```
+
+## 多模态内容
+
+KODE SDK 支持多模态输入，包括图像、PDF 文件和音频：
+
+```typescript
+import { ContentBlock } from '@shareai-lab/kode-sdk';
+
+// 发送带图片的文本
+const content: ContentBlock[] = [
+  { type: 'text', text: '这张图片里有什么？' },
+  { type: 'image', base64: imageBase64, mime_type: 'image/png' }
+];
+
+await agent.send(content);
+```
+
+配置多模态行为：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'vision-assistant',
+  multimodalContinuation: 'history',      // 在历史中保留多模态内容
+  multimodalRetention: { keepRecent: 3 }, // 保留最近 3 条多模态消息
+}, deps);
+```
+
+## 扩展思维
+
+启用模型通过扩展思维"思考"复杂问题：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'reasoning-assistant',
+  exposeThinking: true,   // 向 Progress 通道发出思维事件
+  retainThinking: true,   // 在消息历史中持久化思维
+}, deps);
+
+// 监听思维事件
+for await (const envelope of agent.subscribe(['progress'])) {
+  if (envelope.event.type === 'think_chunk') {
+    console.log('[思考]', envelope.event.delta);
+  }
+}
+```
+
+## 下一步
+
+- [事件系统](../guides/events.md) - 深入了解事件系统
+- [工具系统](../guides/tools.md) - 内置和自定义工具
+- [数据库存储](../guides/database.md) - 持久化选项
+- [多模态指南](../guides/multimodal.md) - 图像、PDF 和音频
+- [扩展思维指南](../guides/thinking.md) - 扩展思维和推理
diff --git a/docs/zh-CN/getting-started/installation.md b/docs/zh-CN/getting-started/installation.md
new file mode 100644
index 0000000..f1da0ad
--- /dev/null
+++ b/docs/zh-CN/getting-started/installation.md
@@ -0,0 +1,111 @@
+# 安装配置
+
+## 环境要求
+
+- **Node.js**: >= 18.0.0
+- **npm** 或 **pnpm** 或 **yarn**
+
+## 安装
+
+```bash
+npm install @shareai-lab/kode-sdk
+```
+
+或使用 pnpm/yarn：
+
+```bash
+pnpm add @shareai-lab/kode-sdk
+yarn add @shareai-lab/kode-sdk
+```
+
+## 环境变量配置
+
+KODE SDK 使用环境变量配置 API 密钥和模型。
+
+### Anthropic（默认）
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+export ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514  # 可选
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # 可选
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-ant-..."
+$env:ANTHROPIC_MODEL_ID="claude-sonnet-4-20250514"  # 可选
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # 可选
+```
+
+#### **Windows (CMD)**
+```cmd
+set ANTHROPIC_API_KEY=sk-ant-...
+set ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514
+```
+<!-- tabs:end -->
+
+### OpenAI
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export OPENAI_API_KEY=sk-...
+export OPENAI_MODEL_ID=gpt-4o  # 可选
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:OPENAI_API_KEY="sk-..."
+$env:OPENAI_MODEL_ID="gpt-4o"  # 可选
+```
+<!-- tabs:end -->
+
+### Google Gemini
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export GOOGLE_API_KEY=...
+export GEMINI_MODEL_ID=gemini-2.0-flash  # 可选
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:GOOGLE_API_KEY="..."
+$env:GEMINI_MODEL_ID="gemini-2.0-flash"  # 可选
+```
+<!-- tabs:end -->
+
+## 使用 .env 文件
+
+在项目根目录创建 `.env` 文件：
+
+```bash
+# .env
+ANTHROPIC_API_KEY=sk-ant-...
+ANTHROPIC_MODEL_ID=claude-sonnet-4-20250514
+```
+
+在代码中加载：
+
+```typescript
+import 'dotenv/config';
+// 或
+import { config } from 'dotenv';
+config();
+```
+
+## 验证安装
+
+```typescript
+import { Agent, AnthropicProvider, JSONStore } from '@shareai-lab/kode-sdk';
+
+console.log('KODE SDK 安装成功！');
+```
+
+## 下一步
+
+- [快速上手](./quickstart.md) - 创建第一个 Agent
+- [核心概念](./concepts.md) - 理解核心概念
diff --git a/docs/zh-CN/getting-started/quickstart.md b/docs/zh-CN/getting-started/quickstart.md
new file mode 100644
index 0000000..efcf9de
--- /dev/null
+++ b/docs/zh-CN/getting-started/quickstart.md
@@ -0,0 +1,198 @@
+# 快速上手
+
+5 分钟创建你的第一个 Agent。
+
+## 前置条件
+
+- 完成 [安装配置](./installation.md)
+- 设置 `ANTHROPIC_API_KEY` 环境变量
+
+## 第一步：设置依赖
+
+KODE SDK 使用依赖注入模式。首先创建所需的依赖：
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+} from '@shareai-lab/kode-sdk';
+
+// 创建依赖
+const store = new JSONStore('./.kode');
+const templates = new AgentTemplateRegistry();
+const tools = new ToolRegistry();
+const sandboxFactory = new SandboxFactory();
+
+// 创建 Provider
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID  // 可选，不设置则使用默认值
+);
+
+// 注册模板
+templates.register({
+  id: 'assistant',
+  systemPrompt: '你是一个乐于助人的助手。',
+});
+```
+
+## 第二步：创建 Agent
+
+```typescript
+const agent = await Agent.create(
+  { templateId: 'assistant' },
+  {
+    store,
+    templateRegistry: templates,
+    toolRegistry: tools,
+    sandboxFactory,
+    modelFactory: () => provider,
+  }
+);
+```
+
+## 第三步：订阅事件
+
+```typescript
+// 使用 subscribe() 订阅 Progress 事件（文本流）
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'text_chunk':
+      process.stdout.write(envelope.event.delta);
+      break;
+    case 'done':
+      console.log('\n--- 消息完成 ---');
+      break;
+  }
+  if (envelope.event.type === 'done') break;
+}
+
+// 使用 on() 订阅 Control 事件
+agent.on('permission_required', async (event) => {
+  console.log(`工具 ${event.call.name} 需要审批`);
+  // 演示用：自动批准
+  await event.respond('allow');
+});
+```
+
+## 第四步：发送消息
+
+```typescript
+await agent.send('你好！有什么可以帮助你的？');
+```
+
+## 完整示例
+
+```typescript
+// getting-started.ts
+import 'dotenv/config';
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+} from '@shareai-lab/kode-sdk';
+
+async function main() {
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    process.env.ANTHROPIC_MODEL_ID
+  );
+
+  // 设置依赖
+  const store = new JSONStore('./.kode');
+  const templates = new AgentTemplateRegistry();
+  const tools = new ToolRegistry();
+  const sandboxFactory = new SandboxFactory();
+
+  templates.register({
+    id: 'assistant',
+    systemPrompt: '你是一个乐于助人的助手。',
+  });
+
+  const agent = await Agent.create(
+    { templateId: 'assistant' },
+    { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory: () => provider }
+  );
+
+  // 使用异步迭代器订阅 progress
+  const progressTask = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      if (envelope.event.type === 'text_chunk') {
+        process.stdout.write(envelope.event.delta);
+      }
+      if (envelope.event.type === 'done') break;
+    }
+  })();
+
+  await agent.send('你好！');
+  await progressTask;
+  console.log('\n');
+}
+
+main().catch(console.error);
+```
+
+运行：
+
+```bash
+npx ts-node getting-started.ts
+```
+
+## 使用内置工具
+
+通过注册的方式添加文件系统和 Bash 工具：
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+  builtin,
+} from '@shareai-lab/kode-sdk';
+
+const store = new JSONStore('./.kode');
+const templates = new AgentTemplateRegistry();
+const tools = new ToolRegistry();
+const sandboxFactory = new SandboxFactory();
+
+// 注册内置工具
+for (const tool of builtin.fs()) {
+  tools.register(tool.name, () => tool);
+}
+for (const tool of builtin.bash()) {
+  tools.register(tool.name, () => tool);
+}
+for (const tool of builtin.todo()) {
+  tools.register(tool.name, () => tool);
+}
+
+// 注册模板并指定工具名称
+templates.register({
+  id: 'coding-assistant',
+  systemPrompt: '你是一个编程助手。',
+  tools: ['fs_read', 'fs_write', 'fs_edit', 'fs_glob', 'fs_grep', 'bash_run', 'todo_read', 'todo_write'],
+});
+
+const provider = new AnthropicProvider(process.env.ANTHROPIC_API_KEY!);
+
+const agent = await Agent.create(
+  { templateId: 'coding-assistant' },
+  { store, templateRegistry: templates, toolRegistry: tools, sandboxFactory, modelFactory: () => provider }
+);
+```
+
+## 下一步
+
+- [核心概念](./concepts.md) - 理解 Agent、事件、工具
+- [事件系统](../guides/events.md) - 掌握三通道系统
+- [工具系统](../guides/tools.md) - 学习内置和自定义工具
diff --git a/docs/zh-CN/guides/database.md b/docs/zh-CN/guides/database.md
new file mode 100644
index 0000000..7573694
--- /dev/null
+++ b/docs/zh-CN/guides/database.md
@@ -0,0 +1,472 @@
+# 数据库持久化指南
+
+KODE SDK 支持 SQLite 和 PostgreSQL 作为持久化后端，提供高性能的查询、聚合和分析能力。
+
+---
+
+## 支持的后端
+
+| 后端 | 使用场景 | 特性 |
+|------|----------|------|
+| SQLite | 开发、单实例 | 零配置、文件存储 |
+| PostgreSQL | 生产、多实例 | 并发写入、JSONB 查询 |
+
+---
+
+## 环境变量配置
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+# SQLite
+export KODE_STORE_TYPE=sqlite
+export KODE_SQLITE_PATH=./data/agents.db
+export KODE_STORE_PATH=./data/store
+
+# PostgreSQL
+export KODE_STORE_TYPE=postgres
+export POSTGRES_HOST=localhost
+export POSTGRES_PORT=5432
+export POSTGRES_DB=kode_agents
+export POSTGRES_USER=kode
+export POSTGRES_PASSWORD=your_password
+```
+
+#### **Windows (PowerShell)**
+```powershell
+# SQLite
+$env:KODE_STORE_TYPE="sqlite"
+$env:KODE_SQLITE_PATH="./data/agents.db"
+$env:KODE_STORE_PATH="./data/store"
+
+# PostgreSQL
+$env:KODE_STORE_TYPE="postgres"
+$env:POSTGRES_HOST="localhost"
+$env:POSTGRES_PORT="5432"
+$env:POSTGRES_DB="kode_agents"
+$env:POSTGRES_USER="kode"
+$env:POSTGRES_PASSWORD="your_password"
+```
+
+#### **Windows (CMD)**
+```cmd
+set KODE_STORE_TYPE=sqlite
+set KODE_SQLITE_PATH=./data/agents.db
+set KODE_STORE_PATH=./data/store
+```
+<!-- tabs:end -->
+
+---
+
+## 快速开始
+
+### 使用工厂函数（推荐）
+
+```typescript
+import { createExtendedStore } from '@shareai-lab/kode-sdk';
+
+// 根据 KODE_STORE_TYPE 自动选择后端
+const store = await createExtendedStore();
+
+// 或显式指定后端
+const sqliteStore = await createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/store',
+});
+
+const postgresStore = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: process.env.POSTGRES_HOST ?? 'localhost',
+    port: parseInt(process.env.POSTGRES_PORT ?? '5432'),
+    database: process.env.POSTGRES_DB ?? 'kode_agents',
+    user: process.env.POSTGRES_USER ?? 'kode',
+    password: process.env.POSTGRES_PASSWORD!,
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+### 直接使用类
+
+```typescript
+import { SqliteStore, PostgresStore } from '@shareai-lab/kode-sdk';
+
+// SQLite
+const sqliteStore = new SqliteStore('./data/agents.db', './data/store');
+
+// PostgreSQL
+const postgresStore = new PostgresStore(
+  {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+  },
+  './data/store'
+);
+```
+
+### 与 Agent 配合使用
+
+```typescript
+import { Agent, createExtendedStore } from '@shareai-lab/kode-sdk';
+
+const store = await createExtendedStore();
+
+const agent = await Agent.create({
+  provider,
+  store,
+  template: {
+    id: 'assistant',
+    systemPrompt: 'You are a helpful assistant.',
+    tools: [],
+  },
+});
+
+await agent.send('Hello!');
+
+// 完成后关闭数据库
+await store.close();
+```
+
+---
+
+## 查询 API
+
+### 会话查询：`querySessions()`
+
+查询 Agent 会话列表，支持过滤和分页。
+
+```typescript
+interface SessionQueryFilter {
+  templateId?: string;      // 按模板 ID 过滤
+  createdAfter?: Date;      // 创建时间晚于
+  createdBefore?: Date;     // 创建时间早于
+  limit?: number;           // 返回数量限制（默认 100）
+  offset?: number;          // 分页偏移量（默认 0）
+}
+
+const sessions = await store.querySessions({
+  templateId: 'chat-assistant',
+  createdAfter: new Date('2025-01-01'),
+  limit: 20,
+});
+
+sessions.forEach(session => {
+  console.log({
+    agentId: session.agentId,
+    templateId: session.templateId,
+    createdAt: session.createdAt,
+    messageCount: session.messageCount,
+  });
+});
+```
+
+### 消息查询：`queryMessages()`
+
+查询消息记录，支持按角色和内容类型过滤。
+
+```typescript
+interface MessageQueryFilter {
+  agentId?: string;
+  role?: 'user' | 'assistant';
+  contentType?: 'text' | 'tool_use' | 'tool_result';
+  createdAfter?: Date;
+  createdBefore?: Date;
+  limit?: number;
+  offset?: number;
+}
+
+const messages = await store.queryMessages({
+  agentId: 'agt-abc123',
+  role: 'assistant',
+  contentType: 'tool_use',
+  limit: 50,
+});
+```
+
+### 工具调用查询：`queryToolCalls()`
+
+查询工具调用记录，支持按工具名和错误状态过滤。
+
+```typescript
+interface ToolCallQueryFilter {
+  agentId?: string;
+  toolName?: string;        // 按工具名称过滤
+  isError?: boolean;        // 按错误状态过滤
+  hasApproval?: boolean;    // 按审批状态过滤
+  createdAfter?: Date;
+  createdBefore?: Date;
+  limit?: number;
+  offset?: number;
+}
+
+const toolCalls = await store.queryToolCalls({
+  toolName: 'bash_run',
+  isError: true,
+  limit: 10,
+});
+
+toolCalls.forEach(call => {
+  console.log({
+    toolCallId: call.toolCallId,
+    toolName: call.toolName,
+    input: call.input,
+    output: call.output,
+    isError: call.isError,
+    approval: call.approval,
+  });
+});
+```
+
+### 统计聚合：`aggregateStats()`
+
+聚合统计 Agent 的消息数量和工具调用指标。
+
+```typescript
+const stats = await store.aggregateStats('agt-abc123');
+
+console.log({
+  totalMessages: stats.totalMessages,
+  totalToolCalls: stats.totalToolCalls,
+  totalSnapshots: stats.totalSnapshots,
+  toolCallsByState: stats.toolCallsByState,  // { completed: 10, failed: 2, ... }
+});
+
+// 使用 toolCallsByState 计算成功率
+if (stats.toolCallsByState) {
+  const completed = stats.toolCallsByState['completed'] || 0;
+  const successRate = (completed / stats.totalToolCalls * 100).toFixed(2);
+  console.log(`工具调用成功率: ${successRate}%`);
+}
+```
+
+---
+
+## SQLite vs PostgreSQL
+
+### 对比
+
+| 特性 | SQLite | PostgreSQL |
+|------|--------|------------|
+| **部署** | 单文件，零配置 | 需要数据库服务器 |
+| **并发写入** | 单进程 | 多进程 |
+| **查询性能** | 适合小数据集 | 大数据集优化 |
+| **JSON 支持** | JSON 函数 | JSONB + GIN 索引 |
+| **备份** | 复制文件 | pg_dump/restore |
+| **扩展性** | 单机 | 主从复制、分片 |
+
+### 选择 SQLite 当...
+
+- 单实例部署
+- Agent 数量 < 1000
+- 每日消息量 < 10 万条
+- 快速原型开发
+- 零运维成本需求
+
+### 选择 PostgreSQL 当...
+
+- 多实例部署
+- Agent 数量 > 1000
+- 每日消息量 > 10 万条
+- 复杂查询和分析需求
+- 高可用要求
+
+---
+
+## Docker 快速启动
+
+### PostgreSQL
+
+```bash
+# 开发环境
+docker run --name kode-postgres \
+  -e POSTGRES_PASSWORD=kode123 \
+  -e POSTGRES_DB=kode_agents \
+  -p 5432:5432 \
+  -d postgres:16-alpine
+
+# 生产环境（持久化数据）
+docker run --name kode-postgres \
+  -e POSTGRES_PASSWORD=kode123 \
+  -e POSTGRES_DB=kode_agents \
+  -v /data/postgres:/var/lib/postgresql/data \
+  -p 5432:5432 \
+  -d postgres:16-alpine
+```
+
+---
+
+## 性能优化
+
+### 使用分页
+
+```typescript
+// 避免一次加载所有数据
+const PAGE_SIZE = 100;
+let offset = 0;
+
+while (true) {
+  const messages = await store.queryMessages({
+    agentId,
+    limit: PAGE_SIZE,
+    offset,
+  });
+
+  if (messages.length === 0) break;
+  processMessages(messages);
+  offset += PAGE_SIZE;
+}
+```
+
+### 使用时间过滤
+
+```typescript
+// 限制到最近数据
+const messages = await store.queryMessages({
+  agentId,
+  createdAfter: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000), // 最近 7 天
+});
+```
+
+### PostgreSQL 连接池配置
+
+```typescript
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+    max: 20,                    // 最大连接数
+    idleTimeoutMillis: 30000,   // 空闲连接超时
+    connectionTimeoutMillis: 2000,
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## 备份
+
+### SQLite
+
+```bash
+# 在线备份（推荐）
+sqlite3 agents.db ".backup agents.db.backup"
+
+# 导出 SQL
+sqlite3 agents.db .dump > agents.sql
+```
+
+### PostgreSQL
+
+```bash
+# 逻辑备份
+pg_dump -h localhost -U kode -d kode_agents > backup.sql
+
+# 压缩备份
+pg_dump -h localhost -U kode -d kode_agents | gzip > backup.sql.gz
+
+# 定时备份（cron）
+0 2 * * * pg_dump -h localhost -U kode -d kode_agents | gzip > /backup/kode_$(date +\%Y\%m\%d).sql.gz
+```
+
+---
+
+## 故障排查
+
+### SQLite：数据库锁定
+
+```
+Error: SQLITE_BUSY: database is locked
+```
+
+**解决方案**：启用 WAL 模式
+
+```typescript
+const db = new Database('./agents.db');
+db.pragma('journal_mode = WAL');
+db.pragma('busy_timeout = 5000');
+```
+
+### PostgreSQL：连接被拒绝
+
+```
+Error: connect ECONNREFUSED 127.0.0.1:5432
+```
+
+**检查清单**：
+1. 检查 PostgreSQL 是否运行：`pg_isready -h localhost -p 5432`
+2. 检查防火墙设置
+3. 验证 `pg_hba.conf` 允许连接
+4. 验证 postgresql.conf 中 `listen_addresses = '*'`
+
+### PostgreSQL：连接数过多
+
+```
+Error: sorry, too many clients already
+```
+
+**解决方案**：优化连接池
+
+```typescript
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    ...config,
+    max: 10,                    // 减少单实例连接数
+    idleTimeoutMillis: 10000,   // 更快释放空闲连接
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## 常见问题
+
+**Q: 可以从 JSONStore 迁移到数据库吗？**
+
+A: 可以，但目前需要手动迁移。未来版本会提供迁移工具。
+
+**Q: 数据库存储会影响性能吗？**
+
+A: 不会。对于常规操作（create、send、resume），性能与 JSONStore 相当。
+
+**Q: 可以混用 SQLite 和 PostgreSQL 吗？**
+
+A: 可以。`ExtendedStore` 接口抽象了底层实现：
+
+```typescript
+const store = process.env.NODE_ENV === 'production'
+  ? await createExtendedStore({ type: 'postgres', ... })
+  : await createExtendedStore({ type: 'sqlite', ... });
+```
+
+**Q: 如何删除旧数据？**
+
+```typescript
+// 删除指定 Agent
+await store.delete(agentId);
+
+// 批量删除旧 Agent
+const sessions = await store.querySessions({
+  createdBefore: new Date(Date.now() - 90 * 24 * 60 * 60 * 1000), // 90 天前
+});
+for (const session of sessions) {
+  await store.delete(session.agentId);
+}
+```
+
+---
+
+## 参考资料
+
+- Store 接口：[API 参考](../reference/api.md#store)
diff --git a/docs/zh-CN/guides/error-handling.md b/docs/zh-CN/guides/error-handling.md
new file mode 100644
index 0000000..3073def
--- /dev/null
+++ b/docs/zh-CN/guides/error-handling.md
@@ -0,0 +1,309 @@
+# 错误处理指南
+
+KODE SDK 实现了完整的错误处理机制，遵循三个核心原则：
+
+1. **模型感知错误** - 所有错误信息对模型可见且可操作
+2. **程序永不崩溃** - 多层错误捕获，确保系统稳定运行
+3. **完整可观测性** - 所有错误触发事件，方便监控和调试
+
+---
+
+## 错误类型
+
+| 错误类型 | 标识 | 可重试 | 典型场景 |
+|---------|------|--------|---------|
+| `validation` | `_validationError: true` | 否 | 参数类型错误、必填参数缺失 |
+| `runtime` | `_thrownError: true` | 是 | 文件不存在、权限不足、网络错误 |
+| `logical` | 工具返回 `{ok: false}` | 是 | 文件内容不匹配、命令执行失败 |
+| `aborted` | 超时/中断 | 否 | 工具执行超时、用户中断 |
+| `exception` | 未预期异常 | 是 | 系统异常、未知错误 |
+
+---
+
+## 错误流转
+
+```
+工具执行
+  ├─ 参数验证失败 → {ok: false, error: ..., _validationError: true}
+  ├─ 执行抛异常 → {ok: false, error: ..., _thrownError: true}
+  ├─ 返回 {ok: false} → 保持原样（逻辑错误）
+  └─ 正常返回 → 保持原样
+     ↓
+Agent 处理
+  ├─ 识别错误类型：validation | runtime | logical | aborted | exception
+  ├─ 判断可重试性：validation不可重试，其他可重试
+  ├─ 生成智能建议：基于错误类型和工具名称
+  ├─ 发出 tool:error 事件（ProgressEvent - 用户可见）
+  └─ 发出 error 事件（MonitorEvent - 监控系统）
+     ↓
+返回给模型
+  └─ {
+       ok: false,
+       error: "具体错误信息",
+       errorType: "错误类型",
+       retryable: true/false,
+       recommendations: ["建议1", "建议2", ...]
+     }
+```
+
+---
+
+## 监听错误
+
+### Progress 事件（用户层）
+
+```typescript
+// 监听工具错误用于 UI
+agent.on('tool:error', (event) => {
+  console.log('工具错误:', event.error);
+  console.log('工具状态:', event.call.state);
+  // 显示 UI 通知
+});
+
+// 使用流
+for await (const envelope of agent.stream(input)) {
+  if (envelope.event.type === 'tool:error') {
+    showNotification({
+      type: 'error',
+      message: envelope.event.error,
+    });
+  }
+}
+```
+
+### Monitor 事件（系统层）
+
+```typescript
+// 监听所有错误
+agent.on('error', (event) => {
+  if (event.phase === 'tool') {
+    const { errorType, retryable } = event.detail || {};
+
+    // 记录到日志系统
+    logger.warn('Tool Error', {
+      message: event.message,
+      errorType,
+      retryable,
+      severity: event.severity,
+      timestamp: Date.now(),
+    });
+
+    // 发送告警
+    if (event.severity === 'error') {
+      alerting.send('工具执行失败', event);
+    }
+  }
+});
+```
+
+---
+
+## 模型自我调整
+
+### 场景：文件不存在
+
+**工具返回：**
+```json
+{
+  "ok": false,
+  "error": "File not found: /src/utils/helper.ts",
+  "errorType": "logical",
+  "retryable": true,
+  "recommendations": [
+    "确认文件路径是否正确",
+    "使用 fs_glob 搜索文件",
+    "检查文件是否被外部修改"
+  ]
+}
+```
+
+**模型分析：**
+1. `errorType: "logical"` - 不是参数问题，是文件确实不存在
+2. `retryable: true` - 可以尝试其他方案
+3. 建议提到"确认文件路径"
+
+**模型调整策略：**
+```
+1. 使用 fs_glob("src/**/*.ts") 查找所有 ts 文件
+2. 使用 fs_grep("helper", "src/**/*.ts") 搜索包含 helper 的文件
+3. 找到正确的文件路径后继续操作
+```
+
+### 场景：参数验证错误
+
+**工具返回：**
+```json
+{
+  "ok": false,
+  "error": "Invalid parameters: path is required",
+  "errorType": "validation",
+  "retryable": false,
+  "recommendations": [
+    "检查工具参数是否符合 schema 要求",
+    "确认所有必填参数已提供",
+    "检查参数类型是否正确"
+  ]
+}
+```
+
+**模型调整策略：**
+```
+1. 检查工具调用，发现缺少 path 参数
+2. 补充必要的 path 参数
+3. 重新调用工具
+```
+
+---
+
+## 多层防护机制
+
+```
+第1层：工具执行层 (tool.ts)
+  └─ try-catch 捕获所有异常 → {ok: false, _thrownError: true}
+
+第2层：Agent调用层 (agent.ts)
+  └─ try-catch 捕获调用异常 → errorType: 'exception'
+
+第3层：参数验证层
+  └─ safeParse 避免验证异常 → {ok: false, _validationError: true}
+
+第4层：Hook执行层
+  └─ Hook失败不影响主流程 → 记录错误继续执行
+```
+
+### 错误隔离原则
+
+- 单个工具错误 ≠ Agent 崩溃
+- Agent 错误 ≠ 系统崩溃
+- 工具间完全隔离
+- 所有错误可追踪
+
+---
+
+## 最佳实践
+
+### 工具开发者
+
+```typescript
+// ✅ 推荐：使用 {ok: false} 返回预期的业务错误
+if (!fileExists) {
+  return {
+    ok: false,
+    error: '文件未找到',
+    recommendations: ['检查文件路径', '使用 fs_glob 搜索文件'],
+  };
+}
+
+// ❌ 避免：抛出异常表示业务错误
+throw new Error('文件未找到');  // 应该只用于意外异常
+```
+
+### 应用开发者
+
+```typescript
+// 监听错误并做 UI 提示
+agent.on('tool:error', (event) => {
+  showNotification({
+    type: 'error',
+    message: event.error,
+    action: event.call.state === 'FAILED' ? 'retry' : null,
+  });
+});
+
+// 智能重试逻辑
+if (result.status === 'paused' && result.permissionIds?.length) {
+  // 有 pending 权限，等待用户决策
+} else if (lastError?.retryable && retryCount < 3) {
+  // 可重试错误，自动重试
+  await agent.send('请根据建议调整后重试');
+}
+```
+
+### 系统运维
+
+```typescript
+// 错误统计和分析
+const errorStats = {
+  validation: 0,
+  runtime: 0,
+  logical: 0,
+  aborted: 0,
+  exception: 0,
+};
+
+agent.on('error', (event) => {
+  if (event.phase === 'tool') {
+    const type = event.detail?.errorType || 'unknown';
+    errorStats[type]++;
+
+    // 定期分析错误模式
+    if (errorStats.validation > 100) {
+      alert('参数验证错误过多，请检查工具 schema 配置');
+    }
+  }
+});
+```
+
+---
+
+## 错误事件类型
+
+### ProgressToolErrorEvent
+
+```typescript
+interface ProgressToolErrorEvent {
+  channel: 'progress';
+  type: 'tool:error';
+  call: ToolCallSnapshot;  // 工具调用快照
+  error: string;           // 错误信息
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorErrorEvent
+
+```typescript
+interface MonitorErrorEvent {
+  channel: 'monitor';
+  type: 'error';
+  severity: 'warn' | 'error';
+  phase: 'model' | 'tool' | 'sandbox' | 'system';
+  message: string;
+  detail?: {
+    errorType?: string;
+    retryable?: boolean;
+    [key: string]: any;
+  };
+}
+```
+
+---
+
+## 总结
+
+错误处理机制提供：
+
+**模型智能感知**
+- 错误类型明确（validation/runtime/logical/aborted/exception）
+- 可重试性清晰（retryable: true/false）
+- 建议具体可操作（根据工具和错误类型定制）
+
+**系统稳定性**
+- 工具层 try-catch 兜底
+- Agent层 try-catch 保护
+- 参数验证 safeParse
+- Hook执行隔离
+
+**完整可观测性**
+- Progress 事件（tool:error）- 用户可见
+- Monitor 事件（error）- 系统记录
+- 工具记录（ToolCallRecord）- 完整审计
+- 事件时间线（EventBus）- 可回溯
+
+---
+
+## 参考资料
+
+- [事件系统指南](./events.md)
+- [工具系统指南](./tools.md)
+- [Resume/Fork 指南](./resume-fork.md)
diff --git a/docs/events.md b/docs/zh-CN/guides/events.md
similarity index 87%
rename from docs/events.md
rename to docs/zh-CN/guides/events.md
index 16f505b..a60e79d 100644
--- a/docs/events.md
+++ b/docs/zh-CN/guides/events.md
@@ -1,6 +1,6 @@
-# 事件驱动指南
+# 事件系统指南
 
-KODE SDK 的核心理念是“默认只推必要事件，其余一律走回调”。为此我们将交互拆成三条独立通道：
+KODE SDK 的核心理念是"默认只推必要事件，其余一律走回调"。为此我们将交互拆成三条独立通道：
 
 ```
 Progress  → 数据面（UI 渲染）
@@ -17,7 +17,7 @@ Monitor   → 治理面（审计/告警）
 Progress 负责所有对用户可见的数据流：文本增量、工具生命周期、最终完成信号。事件均按时间序列推送，可用 `cursor`/`bookmark` 做断点续播。
 
 | 事件 | 说明 |
-| --- | --- |
+|------|------|
 | `think_chunk_start / think_chunk / think_chunk_end` | 模型思考阶段（可通过模板 metadata 开启 `exposeThinking`）。|
 | `text_chunk_start / text_chunk / text_chunk_end` | 文本增量与最终分段。|
 | `tool:start / tool:error / tool:end` | 工具执行生命周期；`tool:end` 始终发送（即使失败）。|
@@ -47,7 +47,7 @@ for await (const envelope of agent.subscribe(['progress'], { since: lastBookmark
 - 使用 **SSE/WebSocket** 将 Progress 推送到前端。
 - 保存 `bookmark` / `cursor`，断线后以 `since` 续播。
 - UI 只负责展示；业务判断（审批、治理）放到 Control/Monitor 或 Hook。
-- 需要展示“思考过程”时开启 `exposeThinking`，否则保持默认关闭降低噪音。
+- 需要展示"思考过程"时开启 `exposeThinking`，否则保持默认关闭降低噪音。
 
 **常见陷阱**
 
@@ -58,10 +58,10 @@ for await (const envelope of agent.subscribe(['progress'], { since: lastBookmark
 
 ## Control：审批面
 
-Control 专门处理“需要人类决策”的瞬间。事件数量极少但重要，通常会被持久化到审批系统。
+Control 专门处理"需要人类决策"的瞬间。事件数量极少但重要，通常会被持久化到审批系统。
 
 | 事件 | 说明 |
-| --- | --- |
+|------|------|
 | `permission_required` | 工具执行需审批，包含 `call` 快照与 `respond(decision, opts?)` 回调。|
 | `permission_decided` | 审批结果广播，包含 `callId`、`decision`、`decidedBy`、`note`。|
 
@@ -88,7 +88,7 @@ agent.on('permission_required', async (event) => {
 **常见陷阱**
 
 - 忘记处理 `permission_required` 导致工具一直卡在 `AWAITING_APPROVAL`。
-- 审批回调抛错：`agent.decide` 只能调用一次，重复调用会报 “Permission not pending”。
+- 审批回调抛错：`agent.decide` 只能调用一次，重复调用会报 "Permission not pending"。
 
 ---
 
@@ -97,7 +97,7 @@ agent.on('permission_required', async (event) => {
 Monitor 面向平台治理、审计、告警。默认只在必要时推送，适合写入日志与指标系统。
 
 | 事件 | 说明 |
-| --- | --- |
+|------|------|
 | `state_changed` | Agent 状态切换（READY / WORKING / PAUSED）。|
 | `tool_executed` | 工具执行完成，含耗时、审批、审计信息。|
 | `error` | 分类错误（`phase: model/tool/system`），附详细上下文。|
@@ -148,12 +148,12 @@ agent.on('error', (event) => {
 const stream = agent.subscribe(['progress', 'monitor']);
 const iterator = stream[Symbol.asyncIterator]();
 
-// Back-end governance
+// 后台治理
 const off = agent.on('tool_executed', handler);
 // 在适当时机调用 off() 解除绑定
 ```
 
-> 默认约定：UI 订阅 Progress；审批系统监听 Control；治理/监控消费 Monitor。其余场景尽量通过 Hook 或内置事件完成，避免自定义轮询。
+> **默认约定**：UI 订阅 Progress；审批系统监听 Control；治理/监控消费 Monitor。其余场景尽量通过 Hook 或内置事件完成，避免自定义轮询。
 
 ---
 
@@ -163,4 +163,4 @@ const off = agent.on('tool_executed', handler);
 - 使用 `agent.status()` 查看 `lastSfpIndex`、`cursor`、`state`，定位卡顿问题。
 - 结合 `EventBus.getTimeline()`（内部 API）或 Store 事件日志进行回放。
 
-掌握三通道心智后，就能轻松构建“像同事一样协作”的 Agent 体验。
+掌握三通道心智后，就能轻松构建"像同事一样协作"的 Agent 体验。
diff --git a/docs/zh-CN/guides/multimodal.md b/docs/zh-CN/guides/multimodal.md
new file mode 100644
index 0000000..a0ed5d5
--- /dev/null
+++ b/docs/zh-CN/guides/multimodal.md
@@ -0,0 +1,323 @@
+# 多模态内容指南
+
+KODE SDK 支持多模态输入，包括图像、音频和文件（PDF）。本指南介绍如何向 LLM 模型发送多模态内容以及管理多模态历史记录。
+
+---
+
+## 支持的内容类型
+
+| 类型 | Block 类型 | 支持的 Provider |
+|------|------------|-----------------|
+| 图片 | `image` | Anthropic, OpenAI, Gemini, GLM, Minimax |
+| PDF 文件 | `file` | Anthropic, OpenAI (Responses API), Gemini |
+| 音频 | `audio` | OpenAI, Gemini |
+
+---
+
+## 发送多模态内容
+
+### 图片输入
+
+使用 `ContentBlock[]` 配合 `agent.send()` 发送图片：
+
+```typescript
+import { Agent, ContentBlock } from '@shareai-lab/kode-sdk';
+import * as fs from 'fs';
+
+// 读取图片为 base64
+const imageBuffer = fs.readFileSync('./image.png');
+const base64 = imageBuffer.toString('base64');
+
+// 构建内容块
+const content: ContentBlock[] = [
+  { type: 'text', text: '这张图片中有哪些动物？' },
+  { type: 'image', base64, mime_type: 'image/png' }
+];
+
+// 发送给 agent
+const response = await agent.send(content);
+```
+
+### 基于 URL 的图片
+
+也可以使用 URL 代替 base64：
+
+```typescript
+const content: ContentBlock[] = [
+  { type: 'text', text: '描述这张图片。' },
+  { type: 'image', url: 'https://example.com/image.jpg' }
+];
+
+const response = await agent.send(content);
+```
+
+### PDF 文件输入
+
+```typescript
+const pdfBuffer = fs.readFileSync('./document.pdf');
+const base64 = pdfBuffer.toString('base64');
+
+const content: ContentBlock[] = [
+  { type: 'text', text: '从这个 PDF 中提取主要内容。' },
+  { type: 'file', base64, mime_type: 'application/pdf', filename: 'document.pdf' }
+];
+
+const response = await agent.send(content);
+```
+
+---
+
+## 多模态配置
+
+### Agent 配置
+
+创建 Agent 时配置多模态行为：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'multimodal-assistant',
+  // 在对话历史中保留多模态内容
+  multimodalContinuation: 'history',
+  // 压缩上下文时保留最近 3 条多模态消息
+  multimodalRetention: { keepRecent: 3 },
+}, deps);
+```
+
+| 选项 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `multimodalContinuation` | `'history'` | `'history'` | 在对话历史中保留多模态内容 |
+| `multimodalRetention.keepRecent` | `number` | `3` | 上下文压缩时保留的最近多模态消息数量 |
+
+### Provider 配置
+
+在模型配置中配置多模态选项：
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined, // baseUrl
+  undefined, // proxyUrl
+  {
+    multimodal: {
+      mode: 'url+base64',           // 同时允许 URL 和 base64
+      maxBase64Bytes: 20_000_000,   // base64 最大 20MB
+      allowMimeTypes: [             // 允许的 MIME 类型
+        'image/jpeg',
+        'image/png',
+        'image/gif',
+        'image/webp',
+        'application/pdf',
+      ],
+    },
+  }
+);
+```
+
+| 选项 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `mode` | `'url'` \| `'url+base64'` | `'url'` | URL 处理模式 |
+| `maxBase64Bytes` | `number` | `20000000` | base64 内容最大尺寸 |
+| `allowMimeTypes` | `string[]` | 常见图片 + PDF 类型 | 允许的 MIME 类型 |
+
+---
+
+## 支持的 MIME 类型
+
+### 图片
+
+| MIME 类型 | 扩展名 | 备注 |
+|-----------|--------|------|
+| `image/jpeg` | `.jpg`, `.jpeg` | 所有 Provider |
+| `image/png` | `.png` | 所有 Provider |
+| `image/webp` | `.webp` | 所有 Provider |
+| `image/gif` | `.gif` | Gemini 不支持 |
+
+### 文档
+
+| MIME 类型 | 扩展名 | 备注 |
+|-----------|--------|------|
+| `application/pdf` | `.pdf` | Anthropic, OpenAI (Responses API), Gemini |
+
+---
+
+## Provider 特定说明
+
+### Anthropic
+
+- 支持图片和 PDF 文件
+- 使用 `files-api-2025-04-14` beta 进行文件上传
+- Base64 图片直接嵌入消息
+
+```typescript
+const provider = new AnthropicProvider(apiKey, model, baseUrl, proxyUrl, {
+  beta: {
+    filesApi: true,  // 启用 Files API
+  },
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+### OpenAI
+
+- 图片：Chat Completions API 支持
+- PDF/文件：需要 Responses API（`openaiApi: 'responses'`）
+
+```typescript
+const provider = new OpenAIProvider(apiKey, model, baseUrl, proxyUrl, {
+  api: 'responses',  // PDF 支持必需
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+### Gemini
+
+- 支持图片和 PDF 文件
+- 不支持 GIF 格式
+- 使用 `mediaResolution` 选项控制图片质量
+
+```typescript
+const provider = new GeminiProvider(apiKey, model, baseUrl, proxyUrl, {
+  mediaResolution: 'high',  // 'low' | 'medium' | 'high'
+  multimodal: {
+    mode: 'url+base64',
+  },
+});
+```
+
+---
+
+## 最佳实践
+
+### 1. 使用适当的图片尺寸
+
+大图片会增加 token 使用量和延迟。发送前请调整图片大小：
+
+```typescript
+// 建议：保持图片在 1MB 以下以获得最佳性能
+const maxBytes = 1024 * 1024; // 1MB
+
+function validateImageSize(base64: string): boolean {
+  const bytes = Math.ceil(base64.length * 3 / 4);
+  return bytes <= maxBytes;
+}
+```
+
+### 2. 处理多模态上下文保留
+
+对于包含大量图片的长对话，配置保留策略以避免上下文溢出：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'vision-assistant',
+  multimodalRetention: { keepRecent: 2 },  // 仅保留最近 2 张图片
+  context: {
+    maxTokens: 100_000,
+    compressToTokens: 60_000,
+  },
+}, deps);
+```
+
+### 3. 验证 MIME 类型
+
+发送前始终验证 MIME 类型：
+
+```typescript
+const ALLOWED_IMAGE_TYPES = ['image/jpeg', 'image/png', 'image/webp'];
+
+function getImageMimeType(filename: string): string {
+  const ext = filename.toLowerCase().split('.').pop();
+  const mimeMap: Record<string, string> = {
+    jpg: 'image/jpeg',
+    jpeg: 'image/jpeg',
+    png: 'image/png',
+    webp: 'image/webp',
+  };
+  const mimeType = mimeMap[ext!];
+  if (!mimeType || !ALLOWED_IMAGE_TYPES.includes(mimeType)) {
+    throw new Error(`不支持的图片类型: ${ext}`);
+  }
+  return mimeType;
+}
+```
+
+---
+
+## 错误处理
+
+常见多模态错误：
+
+| 错误 | 原因 | 解决方案 |
+|------|------|----------|
+| `MultimodalValidationError: Base64 is not allowed` | `mode` 仅设置为 `'url'` | 设置 `mode: 'url+base64'` |
+| `MultimodalValidationError: base64 payload too large` | 超过 `maxBase64Bytes` | 调整图片大小或增加限制 |
+| `MultimodalValidationError: mime_type not allowed` | MIME 类型不在允许列表中 | 添加到 `allowMimeTypes` |
+| `MultimodalValidationError: Missing url/file_id/base64` | 未提供内容源 | 提供 `url`、`file_id` 或 `base64` |
+
+---
+
+## 完整示例
+
+```typescript
+import { Agent, AnthropicProvider, JSONStore, ContentBlock } from '@shareai-lab/kode-sdk';
+import * as fs from 'fs';
+
+async function analyzeImage() {
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    'claude-sonnet-4-20250514',
+    undefined,
+    undefined,
+    {
+      multimodal: {
+        mode: 'url+base64',
+        maxBase64Bytes: 10_000_000,
+      },
+    }
+  );
+
+  const store = new JSONStore('./.kode');
+
+  const agent = await Agent.create({
+    templateId: 'vision-assistant',
+    multimodalContinuation: 'history',
+    multimodalRetention: { keepRecent: 3 },
+  }, {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    modelFactory: () => provider,
+  });
+
+  // 读取并发送图片
+  const imageBuffer = fs.readFileSync('./photo.jpg');
+  const base64 = imageBuffer.toString('base64');
+
+  const content: ContentBlock[] = [
+    { type: 'text', text: '这张照片中有哪些物体？' },
+    { type: 'image', base64, mime_type: 'image/jpeg' }
+  ];
+
+  for await (const envelope of agent.subscribe(['progress'])) {
+    if (envelope.event.type === 'text_chunk') {
+      process.stdout.write(envelope.event.delta);
+    }
+    if (envelope.event.type === 'done') break;
+  }
+
+  await agent.send(content);
+}
+```
+
+---
+
+## 参考资料
+
+- [Provider 指南](./providers.md) - Provider 特定配置
+- [事件指南](./events.md) - Progress 事件处理
+- [API 参考](../reference/api.md) - ContentBlock 类型
diff --git a/docs/zh-CN/guides/providers.md b/docs/zh-CN/guides/providers.md
new file mode 100644
index 0000000..454819c
--- /dev/null
+++ b/docs/zh-CN/guides/providers.md
@@ -0,0 +1,373 @@
+# Provider 配置指南
+
+KODE SDK 提供三个内置 Provider 实现，支持所有符合对应 API 协议的模型服务。
+
+---
+
+## 内置 Provider
+
+| Provider | API 协议 | 兼容服务 |
+|----------|----------|----------|
+| `AnthropicProvider` | Anthropic Messages API | Anthropic、兼容服务 |
+| `OpenAIProvider` | OpenAI Chat/Responses API | OpenAI、DeepSeek、GLM、Qwen、Minimax、OpenRouter 等 |
+| `GeminiProvider` | Google Generative AI API | Google Gemini |
+
+> **说明**：只要服务的 API 协议兼容，即可使用对应的 Provider。例如 DeepSeek、GLM、Qwen 等都使用 OpenAI 兼容 API，可通过 `OpenAIProvider` 配置 `baseURL` 使用。
+
+---
+
+## 环境变量配置
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+export ANTHROPIC_BASE_URL=https://api.anthropic.com  # 可选
+export OPENAI_API_KEY=sk-...
+export OPENAI_BASE_URL=https://api.openai.com/v1  # 可选
+export GOOGLE_API_KEY=...
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:ANTHROPIC_API_KEY="sk-ant-..."
+$env:ANTHROPIC_BASE_URL="https://api.anthropic.com"  # 可选
+$env:OPENAI_API_KEY="sk-..."
+$env:OPENAI_BASE_URL="https://api.openai.com/v1"  # 可选
+$env:GOOGLE_API_KEY="..."
+```
+<!-- tabs:end -->
+
+---
+
+## AnthropicProvider
+
+用于 Anthropic Claude 系列模型及兼容 Anthropic API 的服务。
+
+### 基本配置
+
+```typescript
+import { AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',  // 任意支持的模型 ID
+  process.env.ANTHROPIC_BASE_URL  // 可选，默认 https://api.anthropic.com
+);
+```
+
+### 启用扩展思维
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,
+  undefined,
+  {
+    extraBody: {
+      thinking: {
+        type: 'enabled',
+        budget_tokens: 10000,  // 最小 1024
+      },
+    },
+  }
+);
+```
+
+### 启用缓存
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,
+  undefined,
+  {
+    cache: {
+      breakpoints: 4,  // 1-4 个缓存断点
+      defaultTtl: '1h', // '5m' 或 '1h'
+    },
+    beta: {
+      extendedCacheTtl: true,
+    },
+  }
+);
+```
+
+### 示例模型
+
+以下为常用模型示例，实际支持所有 Anthropic API 兼容的模型：
+
+| 模型 | 说明 |
+|------|------|
+| `claude-sonnet-4-5-20250929` | Claude 4.5 Sonnet（推荐） |
+| `claude-opus-4-5-20251101` | Claude 4.5 Opus |
+| `claude-haiku-4-5-20251015` | Claude 4.5 Haiku（快速低成本） |
+
+---
+
+## OpenAIProvider
+
+用于 OpenAI 及所有兼容 OpenAI API 的服务（DeepSeek、GLM、Qwen、Minimax、OpenRouter 等）。
+
+### 基本配置
+
+```typescript
+import { OpenAIProvider } from '@shareai-lab/kode-sdk';
+
+// OpenAI 官方
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'gpt-5-2025-08-07',  // 任意支持的模型 ID
+  process.env.OPENAI_BASE_URL  // 可选，默认 https://api.openai.com/v1
+);
+```
+
+### 使用 DeepSeek
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-chat',
+  'https://api.deepseek.com/v1'
+);
+
+// DeepSeek 推理模型
+const reasonerProvider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-reasoner',
+  'https://api.deepseek.com/v1',
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      stripFromHistory: true,
+    },
+  }
+);
+```
+
+### 使用 GLM (智谱)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.GLM_API_KEY!,
+  'glm-4-plus',
+  'https://open.bigmodel.cn/api/paas/v4'
+);
+```
+
+### 使用 Qwen (通义千问)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.QWEN_API_KEY!,
+  'qwen-plus',
+  'https://dashscope.aliyuncs.com/compatible-mode/v1'
+);
+```
+
+### 使用 Minimax
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.MINIMAX_API_KEY!,
+  'abab6.5s-chat',
+  'https://api.minimax.chat/v1'
+);
+```
+
+### 使用 OpenRouter
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENROUTER_API_KEY!,
+  'anthropic/claude-sonnet-4.5',  // OpenRouter 模型格式
+  'https://openrouter.ai/api/v1'
+);
+```
+
+### 启用推理 (o4 模型)
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'o4-mini',
+  undefined,
+  undefined,
+  {
+    api: 'responses',
+    responses: {
+      reasoning: {
+        effort: 'medium',  // 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+      },
+    },
+  }
+);
+```
+
+### 示例模型
+
+以下为常用模型示例，实际支持所有 OpenAI API 兼容的模型：
+
+| 服务 | 模型示例 |
+|------|----------|
+| OpenAI | `gpt-5.2-pro-2025-12-11`, `gpt-5-2025-08-07`, `o4-mini-2025-04-16` |
+| DeepSeek | `deepseek-chat`, `deepseek-reasoner` |
+| GLM | `glm-4-plus`, `glm-4-flash` |
+| Qwen | `qwen-plus`, `qwen-turbo` |
+| OpenRouter | `anthropic/claude-sonnet-4.5`, `openai/gpt-5` |
+
+---
+
+## GeminiProvider
+
+用于 Google Gemini 系列模型。
+
+### 基本配置
+
+```typescript
+import { GeminiProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-3-flash'  // 任意支持的模型 ID
+);
+```
+
+### 启用 Thinking
+
+```typescript
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-2.5-pro',
+  undefined,
+  undefined,
+  {
+    thinking: {
+      level: 'medium',  // 'minimal' | 'low' | 'medium' | 'high'
+      includeThoughts: true,
+    },
+  }
+);
+```
+
+### 示例模型
+
+以下为常用模型示例，实际支持所有 Gemini API 兼容的模型：
+
+| 模型 | 说明 |
+|------|------|
+| `gemini-3-flash` | Gemini 3 Flash（最新，推荐） |
+| `gemini-2.5-pro` | Gemini 2.5 Pro（稳定版，支持 thinking） |
+| `gemini-2.5-flash` | Gemini 2.5 Flash（稳定版） |
+
+---
+
+## 与 Agent 配合使用
+
+### Provider 工厂模式
+
+```typescript
+import { Agent, AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const agent = await Agent.create(
+  {
+    templateId: 'default',
+    sandbox: { kind: 'local', workDir: './workspace' },
+  },
+  {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    modelFactory: () => new AnthropicProvider(
+      process.env.ANTHROPIC_API_KEY!,
+      process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-5-20250929'
+    ),
+  }
+);
+```
+
+### 动态 Provider 选择
+
+```typescript
+function createProvider(providerName: string) {
+  switch (providerName) {
+    case 'anthropic':
+      return new AnthropicProvider(
+        process.env.ANTHROPIC_API_KEY!,
+        process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-5-20250929'
+      );
+    case 'openai':
+      return new OpenAIProvider(
+        process.env.OPENAI_API_KEY!,
+        process.env.OPENAI_MODEL_ID ?? 'gpt-5-2025-08-07'
+      );
+    case 'deepseek':
+      return new OpenAIProvider(
+        process.env.DEEPSEEK_API_KEY!,
+        'deepseek-chat',
+        'https://api.deepseek.com/v1'
+      );
+    case 'gemini':
+      return new GeminiProvider(
+        process.env.GOOGLE_API_KEY!,
+        process.env.GEMINI_MODEL_ID ?? 'gemini-3-flash'
+      );
+    default:
+      throw new Error(`未知 provider: ${providerName}`);
+  }
+}
+```
+
+---
+
+## 代理配置
+
+所有 Provider 都支持代理配置：
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-5-20250929',
+  undefined,  // baseUrl
+  process.env.HTTPS_PROXY  // proxyUrl
+);
+```
+
+---
+
+## 错误处理
+
+```typescript
+try {
+  await agent.send('你好');
+} catch (error) {
+  if (error.message.includes('rate limit')) {
+    // 速率限制，等待后重试
+  } else if (error.message.includes('authentication')) {
+    // API 密钥无效
+  }
+}
+```
+
+---
+
+## 最佳实践
+
+1. **使用环境变量** 存储 API 密钥和 baseURL
+2. **设置合理的超时时间** 根据预期响应时间
+3. **启用缓存** 用于重复提示词（Anthropic、Gemini）
+4. **处理速率限制** 使用指数退避
+
+---
+
+## 参考资料
+
+- [Anthropic API 文档](https://docs.anthropic.com/)
+- [OpenAI API 文档](https://platform.openai.com/docs/)
+- [Google AI 文档](https://ai.google.dev/docs)
+- [DeepSeek API 文档](https://platform.deepseek.com/docs)
+- [OpenRouter 文档](https://openrouter.ai/docs)
diff --git a/docs/zh-CN/guides/resume-fork.md b/docs/zh-CN/guides/resume-fork.md
new file mode 100644
index 0000000..aeb6412
--- /dev/null
+++ b/docs/zh-CN/guides/resume-fork.md
@@ -0,0 +1,239 @@
+# Resume / Fork 指南
+
+长时运行的 Agent 必须具备"随时恢复、可分叉、可审计"的能力。KODE SDK 在内核层实现了统一的持久化协议（消息、工具调用、Todo、事件、断点、Lineage）。
+
+---
+
+## 关键概念
+
+| 概念 | 说明 |
+|------|------|
+| **Metadata** | 序列化模板、工具描述符、权限、Todo、沙箱配置、断点、lineage 等信息 |
+| **Safe-Fork-Point (SFP)** | 每次用户消息或工具结果都会形成可恢复节点，用于 snapshot/fork |
+| **BreakpointState** | 标记当前执行阶段（`READY` → `PRE_MODEL` → ... → `POST_TOOL`） |
+| **Auto-Seal** | 当崩溃发生在工具执行阶段，Resume 会自动封口并落下 `tool_result` |
+
+---
+
+## Resume 方式
+
+### 方式一：显式配置
+
+```typescript
+import { Agent } from '@shareai-lab/kode-sdk';
+
+const agent = await Agent.resume('agt-demo', {
+  templateId: 'repo-assistant',
+  modelConfig: {
+    provider: 'anthropic',
+    model: process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+    apiKey: process.env.ANTHROPIC_API_KEY!,
+  },
+  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+}, deps, {
+  strategy: 'crash',  // 自动封口未完成工具
+  autoRun: true,      // 恢复后继续处理队列
+});
+```
+
+### 方式二：从 Store 恢复（推荐）
+
+```typescript
+const agent = await Agent.resumeFromStore('agt-demo', deps, {
+  overrides: {
+    modelConfig: {
+      provider: 'anthropic',
+      model: process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+      apiKey: process.env.ANTHROPIC_API_KEY!,
+    },
+  },
+});
+```
+
+### Resume 选项
+
+| 选项 | 取值 | 说明 |
+|------|------|------|
+| `strategy` | `'manual'` \| `'crash'` | `crash` 会自动封口未完成工具 |
+| `autoRun` | `boolean` | 恢复后立即继续处理消息队列 |
+| `overrides` | `Partial<AgentConfig>` | 对 metadata 进行覆盖（模型升级、权限调整等） |
+
+> **重要**：Resume 后**必须**重新绑定事件监听（Control/Monitor 回调不会自动恢复）。
+
+---
+
+## SDK vs 业务方的职责分界
+
+| 能力 | SDK | 业务方 |
+|------|-----|--------|
+| 模板、工具、沙箱恢复 | 自动重建 | 无需处理 |
+| 消息、工具记录、Todo、Lineage | 自动加载 | 无需处理 |
+| FilePool 监听 | 自动恢复 | 无需处理 |
+| Hooks | 自动重新注册 | 无需处理 |
+| Control/Monitor 监听 | 不处理 | Resume 后需重新绑定 |
+| 审批流程、告警 | 不处理 | 结合业务系统处理 |
+| 依赖单例管理 | 不处理 | 确保 `store`/`registry` 全局复用 |
+
+---
+
+## 快照与分叉
+
+### 创建快照
+
+```typescript
+// 在当前点创建快照
+const bookmarkId = await agent.snapshot('pre-release-audit');
+```
+
+### 分叉 Agent
+
+```typescript
+// 从快照分叉
+const forked = await agent.fork(bookmarkId);
+
+// 从最新点分叉
+const forked2 = await agent.fork();
+
+// 使用分叉的 Agent
+await forked.send('这是一个基于原对话分叉出的新任务。');
+```
+
+- `snapshot(label?)` 返回 `SnapshotId`（默认为 `sfp-{index}`）
+- `fork(sel?)` 创建新 Agent：继承工具/权限/lineage，把消息复制到新 Store 命名空间
+- 分叉后的 Agent 需要独立绑定事件
+
+---
+
+## 自动封口机制
+
+当崩溃发生在以下阶段，Resume 会自动写入补偿性的 `tool_result`：
+
+| 阶段 | 封口信息 | 推荐处理 |
+|------|---------|---------|
+| `PENDING` | 工具尚未执行 | 验证参数后重新触发 |
+| `APPROVAL_REQUIRED` | 等待审批 | 再次触发审批或手动完成 |
+| `APPROVED` | 准备执行 | 确认输入仍然有效后重试 |
+| `EXECUTING` | 执行中断 | 检查副作用，必要时人工确认 |
+
+封口会触发：
+
+- `monitor.agent_resumed`：包含 `sealed` 列表与 `strategy`
+- `progress.tool:end`：补上一条失败的 `tool_result`，附带 `recommendations`
+
+---
+
+## Resume 后重新绑定事件
+
+```typescript
+const agent = await Agent.resumeFromStore('agt-demo', deps);
+
+// 重新绑定 Control/Monitor 事件监听
+agent.on('tool_executed', (event) => {
+  console.log('工具执行:', event.call.name);
+});
+
+agent.on('error', (event) => {
+  console.error('错误:', event.message);
+});
+
+agent.on('permission_required', async (event) => {
+  await event.respond('allow');
+});
+
+// 对于 Progress 事件，使用 subscribe()
+const progressSubscription = (async () => {
+  for await (const envelope of agent.subscribe(['progress'])) {
+    if (envelope.event.type === 'text_chunk') {
+      process.stdout.write(envelope.event.delta);
+    }
+    if (envelope.event.type === 'done') break;
+  }
+})();
+
+// 继续处理
+await agent.run();
+await progressSubscription;
+```
+
+---
+
+## 多实例 / Serverless 最佳实践
+
+1. **依赖单例**：在模块级创建 `AgentDependencies`，避免多个实例写入同一 Store 目录
+
+2. **事件重绑**：每次 `resume` 后立刻绑定事件
+
+3. **并发控制**：同一个 AgentId 最好只在单实例中运行，可通过外部锁或队列保证
+
+4. **持久化目录**：`JSONStore` 适用于单机/共享磁盘环境。分布式部署请实现自定义 Store（如 S3 + DynamoDB）
+
+5. **可观测性**：监听 `monitor.state_changed` 与 `monitor.error`，在异常时迅速定位
+
+---
+
+## 故障排查
+
+| 现象 | 排查方向 |
+|------|---------|
+| Resume 报 `AGENT_NOT_FOUND` | Store 目录缺失或未持久化。确认 `store.baseDir` 是否正确挂载 |
+| Resume 报 `TEMPLATE_NOT_FOUND` | 启动时未注册模板；确保模板 ID 与 metadata 中一致 |
+| 工具缺失 | ToolRegistry 未注册对应名称；内置工具需手动注册 |
+| FilePool 未恢复 | 自定义 Sandbox 未实现 `watchFiles`；可关闭 watch 或补齐实现 |
+| 事件监听失效 | Resume 后未重新调用 `agent.on(...)` 绑定 |
+
+---
+
+## 完整 Resume 示例
+
+```typescript
+import { Agent, createExtendedStore } from '@shareai-lab/kode-sdk';
+
+async function resumeAgent(agentId: string) {
+  const store = await createExtendedStore();
+  const deps = createDependencies({ store });
+
+  // 检查 Agent 是否存在
+  const exists = await store.exists(agentId);
+  if (!exists) {
+    throw new Error(`Agent ${agentId} 不存在`);
+  }
+
+  // 从 store 恢复
+  const agent = await Agent.resumeFromStore(agentId, deps, {
+    strategy: 'crash',
+    autoRun: false,
+  });
+
+  // 重新绑定 Monitor 事件监听（on() 仅支持 Control/Monitor 事件）
+  agent.on('tool_executed', (e) => console.log('工具:', e.call.name));
+  agent.on('agent_resumed', (e) => {
+    if (e.sealed.length > 0) {
+      console.log('自动封口的工具:', e.sealed);
+    }
+  });
+  agent.on('error', (e) => console.error('错误:', e.message));
+
+  // 对于 Progress 事件，使用 subscribe()
+  const progressTask = (async () => {
+    for await (const env of agent.subscribe(['progress'])) {
+      if (env.event.type === 'text_chunk') {
+        process.stdout.write(env.event.delta);
+      }
+      if (env.event.type === 'done') break;
+    }
+  })();
+
+  // 继续处理
+  await agent.run();
+
+  return agent;
+}
+```
+
+---
+
+## 参考资料
+
+- [事件系统指南](./events.md)
+- [错误处理指南](./error-handling.md)
+- [数据库指南](./database.md)
diff --git a/docs/zh-CN/guides/skills.md b/docs/zh-CN/guides/skills.md
new file mode 100644
index 0000000..58f1eb2
--- /dev/null
+++ b/docs/zh-CN/guides/skills.md
@@ -0,0 +1,329 @@
+# Skills 系统指南
+
+KODE SDK 提供完整的 Skills 系统，支持模块化、可重用的能力单元，使 Agent 能够动态加载和执行特定技能。
+
+---
+
+## 核心特性
+
+| 特性 | 说明 |
+|------|------|
+| **热重载** | Skills 代码修改后自动重新加载 |
+| **元数据注入** | 自动将技能描述注入到系统提示 |
+| **沙箱隔离** | 每个技能有独立的文件系统空间 |
+| **白名单机制** | 选择性加载特定技能 |
+
+---
+
+## 目录结构
+
+```
+skills/
+├── skill-name/              # 技能目录
+│   ├── SKILL.md            # 技能定义（必需）
+│   ├── metadata.json       # 技能元数据（可选）
+│   ├── references/         # 参考资料
+│   ├── scripts/            # 可执行脚本
+│   └── assets/             # 静态资源
+└── .archived/              # 已归档技能
+    └── archived-skill/
+```
+
+### SKILL.md 格式
+
+```markdown
+<!-- skill: skill-name -->
+<!-- version: 1.0.0 -->
+<!-- author: Your Name -->
+
+# 技能名称
+
+简短描述技能的功能。
+
+## 使用场景
+
+- 场景1
+- 场景2
+
+## 使用指南
+
+使用此技能的详细说明...
+```
+
+### metadata.json 格式
+
+```json
+{
+  "name": "skill-name",
+  "description": "技能描述",
+  "version": "1.0.0",
+  "author": "作者",
+  "baseDir": "/path/to/skill"
+}
+```
+
+---
+
+## 环境变量配置
+
+<!-- tabs:start -->
+#### **Linux / macOS**
+```bash
+export SKILLS_DIR=/path/to/skills
+```
+
+#### **Windows (PowerShell)**
+```powershell
+$env:SKILLS_DIR="/path/to/skills"
+```
+
+#### **Windows (CMD)**
+```cmd
+set SKILLS_DIR=/path/to/skills
+```
+<!-- tabs:end -->
+
+---
+
+## SkillsManager（Agent 运行时）
+
+SkillsManager 是 Agent 在运行时使用的技能管理器，支持热更新和动态加载。
+
+### 基本用法
+
+```typescript
+import { SkillsManager } from '@shareai-lab/kode-sdk';
+
+// 创建 Skills 管理器
+const skillsManager = new SkillsManager(
+  './skills',           // 技能目录路径
+  ['skill1', 'skill2']  // 可选：白名单
+);
+
+// 扫描所有技能
+const skills = await skillsManager.getSkillsMetadata();
+console.log(`Found ${skills.length} skills`);
+
+// 加载特定技能内容
+const skillContent = await skillsManager.loadSkillContent('skill-name');
+if (skillContent) {
+  console.log('Metadata:', skillContent.metadata);
+  console.log('Content:', skillContent.content);
+  console.log('References:', skillContent.references);
+  console.log('Scripts:', skillContent.scripts);
+}
+```
+
+### 热更新机制
+
+SkillsManager 每次调用都会重新扫描文件系统，确保数据最新：
+
+```typescript
+await skillsManager.getSkillsMetadata();  // 扫描1
+// ... 修改文件 ...
+await skillsManager.getSkillsMetadata();  // 扫描2，获取最新数据
+```
+
+### 白名单过滤
+
+通过白名单机制，可以限制 Agent 只加载特定技能：
+
+```typescript
+// 只加载白名单中的技能
+const manager = new SkillsManager('./skills', ['allowed-skill-1', 'allowed-skill-2']);
+const skills = await manager.getSkillsMetadata();
+// 只返回白名单中的技能
+```
+
+---
+
+## SkillsManagementManager（CRUD 操作）
+
+SkillsManagementManager 提供技能的 CRUD 操作，包括创建、更新、归档等。
+
+### 基本操作
+
+```typescript
+import { SkillsManagementManager } from '@shareai-lab/kode-sdk';
+
+const manager = new SkillsManagementManager('./skills');
+
+// 列出所有在线技能
+const skills = await manager.listSkills();
+
+// 获取技能详细信息
+const skillDetail = await manager.getSkillInfo('skill-name');
+
+// 创建新技能
+await manager.createSkill('new-skill', {
+  description: '新技能描述',
+  content: '# 新技能\n\n详细内容...'
+});
+
+// 更新技能
+await manager.updateSkill('skill-name', {
+  content: '# 更新后的内容'
+});
+
+// 删除技能（移动到归档）
+await manager.deleteSkill('skill-name');
+
+// 列出已归档技能
+const archived = await manager.listArchivedSkills();
+
+// 恢复已归档技能
+await manager.restoreSkill('archived-skill');
+```
+
+### 文件操作
+
+```typescript
+// 获取技能文件树
+const files = await manager.getSkillFileTree('skill-name');
+
+// 读取技能文件
+const content = await manager.readSkillFile('skill-name', 'SKILL.md');
+
+// 写入技能文件
+await manager.writeSkillFile('skill-name', 'references/doc.md', '内容');
+
+// 删除技能文件
+await manager.deleteSkillFile('skill-name', 'references/old-doc.md');
+
+// 上传文件到技能目录
+await manager.uploadSkillFile('skill-name', 'assets/image.png', fileBuffer);
+```
+
+---
+
+## Agent 集成
+
+### 注册 Skills 工具
+
+```typescript
+import { Agent, createSkillsTool, SkillsManager } from '@shareai-lab/kode-sdk';
+
+const deps = createDependencies();
+
+// 创建 Skills 管理器
+const skillsManager = new SkillsManager('./skills');
+
+// 注册 Skills 工具
+const skillsTool = createSkillsTool(skillsManager);
+deps.toolRegistry.register('skills', () => skillsTool);
+
+// 创建 Agent
+const agent = await Agent.create({
+  templateId: 'my-agent',
+  tools: ['skills', 'fs_read', 'fs_write'],
+}, deps);
+```
+
+### Skills 工具使用
+
+Agent 可以通过 `skills` 工具动态加载技能：
+
+```
+用户: 我需要处理代码格式化
+
+Agent: 我来加载代码格式化技能。
+
+[调用 skills 工具，action=load, skill_name=code-formatter]
+
+Agent: 已加载代码格式化技能。现在我可以帮你格式化代码了。
+```
+
+---
+
+## 最佳实践
+
+### 1. 技能设计原则
+
+- **单一职责**：每个技能只做一件事
+- **可组合**：技能之间可以互相调用
+- **文档完整**：提供清晰的使用说明
+- **版本控制**：使用语义化版本号
+
+### 2. 白名单管理
+
+```typescript
+// 生产环境使用白名单
+const allowedSkills = ['safe-skill-1', 'safe-skill-2'];
+const manager = new SkillsManager('./skills', allowedSkills);
+
+// 开发环境加载所有技能
+const devManager = new SkillsManager('./skills');
+```
+
+### 3. 错误处理
+
+```typescript
+const content = await skillsManager.loadSkillContent('skill-name');
+if (!content) {
+  console.error('技能未找到或加载失败');
+  // 降级处理
+}
+```
+
+---
+
+## 监控
+
+### Monitor 事件
+
+```typescript
+// 监听技能工具调用
+agent.on('tool_executed', (event) => {
+  if (event.call.name === 'skills') {
+    console.log('加载技能:', event.call.input.skill_name);
+  }
+});
+
+// 监听工具说明书更新
+agent.on('tool_manual_updated', (event) => {
+  console.log('工具说明书更新:', event.tools);
+});
+```
+
+---
+
+## 故障排除
+
+### 常见问题
+
+**技能未找到**
+- 检查技能目录路径是否正确
+- 确认 SKILL.md 文件存在
+- 检查白名单配置
+
+**热更新不生效**
+- 确认文件保存成功
+- 检查文件系统权限
+- 查看日志确认扫描时间
+
+**沙箱权限错误**
+- 检查沙箱工作目录配置
+- 确认文件路径在允许范围内
+- 查看沙箱日志
+
+### 调试技巧
+
+```typescript
+// 启用详细日志
+process.env.LOG_LEVEL = 'debug';
+
+// 检查技能元数据
+console.log(JSON.stringify(skills, null, 2));
+
+// 验证技能目录
+const fs = require('fs');
+console.log(fs.readdirSync('./skills'));
+```
+
+---
+
+## 参考资料
+
+- [工具系统指南](./tools.md)
+- [事件系统指南](./events.md)
+- [API 参考](../reference/api.md)
diff --git a/docs/zh-CN/guides/thinking.md b/docs/zh-CN/guides/thinking.md
new file mode 100644
index 0000000..ad211e9
--- /dev/null
+++ b/docs/zh-CN/guides/thinking.md
@@ -0,0 +1,463 @@
+# 扩展思维指南
+
+KODE SDK 支持各种 LLM Provider 的扩展思维（也称为推理或思维链）功能。本指南介绍如何启用、配置和使用思维功能，包括交错思维。
+
+---
+
+## 概述
+
+扩展思维允许模型在提供最终答案之前逐步"思考"复杂问题。不同的 Provider 实现方式不同：
+
+| Provider | 功能名称 | 实现方式 |
+|----------|----------|----------|
+| Anthropic | Extended Thinking | `thinking` 块 + budget tokens |
+| OpenAI | Reasoning | `reasoning_effort` 参数 |
+| Gemini | Thinking | `thinkingLevel` 参数 |
+| DeepSeek | Deep Think | `reasoning_content` 字段 |
+| GLM | Thinking | `reasoning_content` 字段 |
+| Minimax | Reasoning | `reasoning_details` 字段 |
+
+---
+
+## Agent 配置
+
+### 启用思维暴露
+
+创建 Agent 时配置思维暴露：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'reasoning-assistant',
+  // 将思维事件暴露到 Progress 通道
+  exposeThinking: true,
+  // 在消息历史中保留思维块
+  retainThinking: true,
+}, deps);
+```
+
+| 选项 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `exposeThinking` | `boolean` | `false` | 发出 `think_chunk_start`、`think_chunk`、`think_chunk_end` 事件 |
+| `retainThinking` | `boolean` | `false` | 在消息历史中持久化推理块 |
+
+---
+
+## Provider 配置
+
+### Anthropic 扩展思维
+
+```typescript
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined,
+  undefined,
+  {
+    // 启用扩展思维
+    extraBody: {
+      thinking: {
+        type: 'enabled',
+        budget_tokens: 10000,  // 最小 1024
+      },
+    },
+    // 如何在历史中传输推理
+    reasoningTransport: 'provider',  // 'provider' | 'text' | 'omit'
+    // 启用交错思维 beta
+    beta: {
+      interleavedThinking: true,  // interleaved-thinking-2025-05-14
+    },
+  }
+);
+```
+
+### OpenAI Reasoning
+
+```typescript
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  'o3-mini',
+  undefined,
+  undefined,
+  {
+    api: 'responses',  // Responses API 用于推理
+    responses: {
+      reasoning: {
+        effort: 'medium',  // 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+      },
+    },
+    reasoningTransport: 'text',
+  }
+);
+```
+
+### Gemini Thinking
+
+```typescript
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  'gemini-2.5-pro',
+  undefined,
+  undefined,
+  {
+    thinking: {
+      level: 'medium',  // 'minimal' | 'low' | 'medium' | 'high'
+      includeThoughts: true,
+    },
+    reasoningTransport: 'text',
+  }
+);
+```
+
+### DeepSeek / GLM / Qwen
+
+这些 Provider 使用 OpenAI 兼容 API 并带有自定义推理字段：
+
+```typescript
+// DeepSeek
+const provider = new OpenAIProvider(
+  process.env.DEEPSEEK_API_KEY!,
+  'deepseek-reasoner',
+  'https://api.deepseek.com/v1',
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      stripFromHistory: true,  // DeepSeek 必需
+    },
+    reasoningTransport: 'text',
+  }
+);
+
+// GLM
+const provider = new OpenAIProvider(
+  process.env.GLM_API_KEY!,
+  'glm-zero-preview',
+  process.env.GLM_BASE_URL!,
+  undefined,
+  {
+    reasoning: {
+      fieldName: 'reasoning_content',
+      requestParams: {
+        thinking: { type: 'enabled', clear_thinking: false },
+      },
+    },
+    reasoningTransport: 'provider',
+  }
+);
+```
+
+---
+
+## 推理传输
+
+`reasoningTransport` 选项控制思维内容在消息历史中的处理方式：
+
+| 值 | 行为 | 使用场景 |
+|----|------|----------|
+| `'provider'` | 保持为原生 `reasoning` 块 | 完整思维保留，多轮连续性 |
+| `'text'` | 包装在 `<think></think>` 标签中 | 跨 Provider 兼容性 |
+| `'omit'` | 从历史中移除 | 节省 token，隐私保护 |
+
+```typescript
+// Provider 原生格式
+const config = {
+  reasoningTransport: 'provider',  // { type: 'reasoning', reasoning: '...' }
+};
+
+// 文本格式
+const config = {
+  reasoningTransport: 'text',  // { type: 'text', text: '<think>...</think>' }
+};
+
+// 从历史中省略
+const config = {
+  reasoningTransport: 'omit',  // 思维块被移除
+};
+```
+
+---
+
+## 交错思维
+
+交错思维允许模型在工具调用之间进行思考，实现更复杂的推理：
+
+```
+用户: 搜索 X，然后总结
+模型: <thinking> 让我先搜索 X... </thinking>
+模型: [tool_use: search_tool]
+[tool_result]
+模型: <thinking> 得到结果了，现在我应该总结... </thinking>
+模型: [tool_use: summarize_tool]
+[tool_result]
+模型: <thinking> 综合所有内容... </thinking>
+模型: 这是总结...
+```
+
+### 启用交错思维
+
+```typescript
+// Anthropic 交错思维
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  'claude-sonnet-4-20250514',
+  undefined,
+  undefined,
+  {
+    extraBody: {
+      thinking: { type: 'enabled', budget_tokens: 10000 },
+    },
+    beta: {
+      interleavedThinking: true,
+    },
+    reasoningTransport: 'provider',
+  }
+);
+
+const agent = await Agent.create({
+  templateId: 'reasoning-agent',
+  exposeThinking: true,
+  retainThinking: true,
+}, deps);
+```
+
+---
+
+## 思维事件
+
+当 `exposeThinking: true` 时，思维事件会发送到 Progress 通道：
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  switch (envelope.event.type) {
+    case 'think_chunk_start':
+      // 思维块开始
+      console.log('[思考中...]');
+      break;
+
+    case 'think_chunk':
+      // 思维内容增量
+      process.stdout.write(envelope.event.delta);
+      break;
+
+    case 'think_chunk_end':
+      // 思维块结束
+      console.log('[/思考]');
+      break;
+
+    case 'tool:start':
+      console.log(`[工具: ${envelope.event.call.name}]`);
+      break;
+
+    case 'text_chunk':
+      process.stdout.write(envelope.event.delta);
+      break;
+
+    case 'done':
+      break;
+  }
+}
+```
+
+### 事件序列
+
+典型的交错思维序列：
+
+```
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> tool:start -> tool:end
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> tool:start -> tool:end
+think_chunk_start -> think_chunk (x N) -> think_chunk_end
+  -> text_chunk_start -> text_chunk (x N) -> text_chunk_end
+  -> done
+```
+
+---
+
+## ThinkingOptions
+
+通过 `CompletionOptions.thinking` 配置思维：
+
+```typescript
+interface ThinkingOptions {
+  enabled?: boolean;          // 启用思维模式
+  budgetTokens?: number;      // Token 预算（Anthropic, Gemini 2.5）
+  effort?: 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';  // OpenAI
+  level?: 'minimal' | 'low' | 'medium' | 'high';  // Gemini 3.x
+}
+```
+
+---
+
+## 最佳实践
+
+### 1. 选择适当的预算
+
+更高的预算 = 更深入的思考，但更慢且更昂贵：
+
+```typescript
+// 快速任务：较低预算
+const quickThinking = { type: 'enabled', budget_tokens: 2000 };
+
+// 复杂推理：较高预算
+const deepThinking = { type: 'enabled', budget_tokens: 16000 };
+```
+
+### 2. 多轮推理使用 `retainThinking`
+
+对于需要推理连续性的对话：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'analyst',
+  exposeThinking: true,
+  retainThinking: true,  // 保留推理以提供上下文
+}, deps);
+```
+
+### 3. 剥离思维以节省 Token
+
+如果思维仅用于单轮且不需要保留在历史中：
+
+```typescript
+const provider = new AnthropicProvider(apiKey, model, undefined, undefined, {
+  reasoningTransport: 'omit',  // 不持久化思维
+  extraBody: {
+    thinking: { type: 'enabled', budget_tokens: 5000 },
+  },
+});
+
+const agent = await Agent.create({
+  templateId: 'solver',
+  exposeThinking: true,   // 向用户展示思维
+  retainThinking: false,  // 不持久化
+}, deps);
+```
+
+### 4. 提示交错思维
+
+鼓励模型在步骤之间进行思考：
+
+```typescript
+const prompt = `
+我需要分析这些数据。请：
+1. 首先，使用 fetch_data 工具获取数据
+2. 思考你观察到的模式
+3. 使用 analyze_tool 运行分析
+4. 思考其含义
+5. 提供你的结论
+
+在每个步骤之间仔细思考。
+`;
+
+await agent.send(prompt);
+```
+
+---
+
+## 完整示例
+
+```typescript
+import {
+  Agent,
+  AnthropicProvider,
+  JSONStore,
+  defineTool,
+} from '@shareai-lab/kode-sdk';
+
+// 定义工具
+const searchTool = defineTool({
+  name: 'search',
+  description: '搜索信息',
+  params: {
+    query: { type: 'string', description: '搜索查询' }
+  },
+  async exec(args) {
+    return { results: `关于 ${args.query} 的结果` };
+  }
+});
+
+async function reasoningAgent() {
+  // 配置带扩展思维的 provider
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    'claude-sonnet-4-20250514',
+    undefined,
+    undefined,
+    {
+      extraBody: {
+        thinking: { type: 'enabled', budget_tokens: 10000 },
+      },
+      beta: {
+        interleavedThinking: true,
+      },
+      reasoningTransport: 'provider',
+    }
+  );
+
+  const store = new JSONStore('./.kode');
+
+  // 创建启用思维的 agent
+  const agent = await Agent.create({
+    templateId: 'reasoning-assistant',
+    exposeThinking: true,
+    retainThinking: true,
+  }, {
+    store,
+    templateRegistry,
+    toolRegistry,
+    sandboxFactory,
+    modelFactory: () => provider,
+  });
+
+  // 监听进度事件
+  const progressTask = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      const event = envelope.event;
+
+      if (event.type === 'think_chunk_start') {
+        process.stdout.write('\n[思考] ');
+      } else if (event.type === 'think_chunk') {
+        process.stdout.write(event.delta);
+      } else if (event.type === 'think_chunk_end') {
+        process.stdout.write(' [/思考]\n');
+      } else if (event.type === 'tool:start') {
+        console.log(`\n[工具: ${event.call.name}]`);
+      } else if (event.type === 'text_chunk') {
+        process.stdout.write(event.delta);
+      } else if (event.type === 'done') {
+        break;
+      }
+    }
+  })();
+
+  // 发送需要推理的任务
+  await agent.send(`
+    使用 search 工具研究"机器学习趋势"，
+    然后提供深入的分析。逐步思考。
+  `);
+
+  await progressTask;
+}
+```
+
+---
+
+## 故障排查
+
+| 问题 | 原因 | 解决方案 |
+|------|------|----------|
+| 无思维事件 | `exposeThinking: false` | 设置 `exposeThinking: true` |
+| 思维未保留 | `retainThinking: false` | 设置 `retainThinking: true` |
+| 思维从历史中剥离 | `reasoningTransport: 'omit'` | 使用 `'provider'` 或 `'text'` |
+| 工具无交错 | Beta 未启用 | 启用 `beta.interleavedThinking` |
+| "Thinking signature invalid" 错误 | 修改了思维块 | 不要修改推理内容 |
+
+---
+
+## 参考资料
+
+- [Provider 指南](./providers.md) - Provider 特定的思维配置
+- [事件指南](./events.md) - Progress 事件处理
+- [工具指南](./tools.md) - 工具集成
+- [API 参考](../reference/api.md) - ThinkingOptions 接口
diff --git a/docs/zh-CN/guides/tools.md b/docs/zh-CN/guides/tools.md
new file mode 100644
index 0000000..5ebd40e
--- /dev/null
+++ b/docs/zh-CN/guides/tools.md
@@ -0,0 +1,526 @@
+# 工具系统指南
+
+KODE SDK 提供完整的工具系统，包含内置工具、自定义工具定义 API 和 MCP 集成。所有工具遵循以下规范：
+
+- **Prompt 说明书**：每个工具都提供详细 Prompt，引导模型安全使用
+- **结构化返回**：工具返回 JSON 结构（例如 `fs_read` 返回 `{ content, offset, limit, truncated }`）
+- **FilePool 集成**：文件类工具自动通过 FilePool 校验与记录，防止新鲜度冲突
+- **审计追踪**：ToolCallRecord 记录审批、耗时、错误信息，Resume 时完整恢复
+
+---
+
+## 内置工具
+
+### 文件系统工具
+
+| 工具 | 说明 | 返回字段 |
+|------|------|----------|
+| `fs_read` | 读取文件片段 | `{ path, offset, limit, truncated, content }` |
+| `fs_write` | 创建/覆写文件，写前校验新鲜度 | `{ ok, path, bytes, length }` |
+| `fs_edit` | 精确替换文本（支持 `replace_all`） | `{ ok, path, replacements, length }` |
+| `fs_glob` | 使用 glob 模式匹配文件 | `{ ok, pattern, cwd, matches, truncated }` |
+| `fs_grep` | 在文件/通配符集合中搜索文本/正则 | `{ ok, pattern, path, matches[] }` |
+| `fs_multi_edit` | 批量编辑多个文件 | `{ ok, results[{ path, status, replacements, message? }] }` |
+
+#### FilePool 说明
+
+- `recordRead` / `recordEdit`：记录最近读取/写入时间，用于冲突检测
+- `validateWrite`：写入前校验文件是否在此 Agent 读取后被外部修改
+- `watchFiles`：自动监听文件变更，触发 `monitor.file_changed` 事件
+
+### Bash 工具
+
+- `bash_run`：支持前台/后台执行，可通过 Hook 或 `permission.mode='approval'` 控制敏感命令
+- `bash_logs`：读取后台命令输出
+- `bash_kill`：终止后台命令
+
+**推荐安全策略：**
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'secure-runner',
+  sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+  overrides: {
+    hooks: {
+      preToolUse(call) {
+        if (call.name === 'bash_run' && !/^git /.test(call.args.cmd)) {
+          return { decision: 'ask', meta: { reason: '非白名单命令' } };
+        }
+        return undefined;
+      },
+    },
+  },
+}, deps);
+```
+
+### Todo 工具
+
+- `todo_read`：返回 Todo 列表
+- `todo_write`：写入完整 Todo 列表（校验 ID 唯一、进行中 <=1）。结合 `TodoManager` 自动提醒与事件
+
+### Task（子代理）
+
+- `task_run`：根据模板池派发子 Agent，支持 `subagent_type`、`context`、`model_name` 参数
+- 模板可以通过 `runtime.subagents` 限制深度与可选模板
+
+### Skills 工具
+
+- `skills`：加载特定技能的详细内容（包含指令、references、scripts、assets）
+  - **参数**：
+    - `action`：操作类型（目前仅支持 `load`）
+    - `skill_name`：技能名称（当 action=load 时必需）
+  - **返回**：
+    ```typescript
+    {
+      ok: true,
+      data: {
+        name: string,           // 技能名称
+        description: string,    // 技能描述
+        content: string,        // SKILL.md 内容
+        base_dir: string,       // 技能基础目录
+        references: string[],   // 参考文档列表
+        scripts: string[],      // 可用脚本列表
+        assets: string[]        // 资源文件列表
+      }
+    }
+    ```
+
+详见 [skills.md](./skills.md) 获取完整的 Skills 系统文档。
+
+---
+
+## 定义自定义工具
+
+### 使用 `defineTool()` 快速开始（推荐）
+
+简化 API（v2.7+）从参数定义自动生成 JSON Schema：
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const weatherTool = defineTool({
+  name: 'get_weather',
+  description: '获取天气信息',
+
+  // 简洁的参数定义 - 自动生成 Schema
+  params: {
+    city: {
+      type: 'string',
+      description: '城市名称'
+    },
+    units: {
+      type: 'string',
+      description: '温度单位',
+      enum: ['celsius', 'fahrenheit'],
+      required: false,
+      default: 'celsius'
+    }
+  },
+
+  // 简化的属性标记
+  attributes: {
+    readonly: true,   // 只读工具
+    noEffect: true    // 无副作用，可安全重试
+  },
+
+  async exec(args, ctx) {
+    // 自定义事件
+    ctx.emit('weather_fetched', { city: args.city });
+    return { temperature: 22, condition: 'sunny' };
+  }
+});
+```
+
+### 使用 `defineTools()` 批量定义
+
+```typescript
+import { defineTools } from '@shareai-lab/kode-sdk';
+
+const calculatorTools = defineTools([
+  {
+    name: 'add',
+    description: '两数相加',
+    params: {
+      a: { type: 'number' },
+      b: { type: 'number' }
+    },
+    attributes: { readonly: true, noEffect: true },
+    async exec(args, ctx) {
+      return args.a + args.b;
+    }
+  },
+  {
+    name: 'multiply',
+    description: '两数相乘',
+    params: {
+      a: { type: 'number' },
+      b: { type: 'number' }
+    },
+    attributes: { readonly: true, noEffect: true },
+    async exec(args, ctx) {
+      return args.a * args.b;
+    }
+  }
+]);
+```
+
+### 传统 ToolInstance 接口
+
+需要精细控制时，使用经典接口：
+
+```typescript
+const registry = new ToolRegistry();
+
+registry.register('greet', () => ({
+  name: 'greet',
+  description: '向指定对象问好',
+  input_schema: {
+    type: 'object',
+    properties: { name: { type: 'string' } },
+    required: ['name']
+  },
+  prompt: 'Use this tool to greet teammates by name.',
+  async exec(args) {
+    return `Hello, ${args.name}!`;
+  },
+  toDescriptor() {
+    return { source: 'registered', name: 'greet', registryId: 'greet' };
+  },
+}));
+```
+
+---
+
+## 参数定义
+
+### 基础类型
+
+```typescript
+params: {
+  str: { type: 'string', description: '字符串' },
+  num: { type: 'number', description: '数字' },
+  bool: { type: 'boolean', description: '布尔值' },
+
+  // 可选参数
+  optional: { type: 'string', required: false },
+
+  // 默认值
+  withDefault: { type: 'number', default: 42 },
+
+  // 枚举
+  choice: {
+    type: 'string',
+    enum: ['option1', 'option2', 'option3']
+  }
+}
+```
+
+### 复杂类型
+
+```typescript
+params: {
+  // 数组
+  tags: {
+    type: 'array',
+    description: '标签列表',
+    items: { type: 'string' }
+  },
+
+  // 嵌套对象
+  profile: {
+    type: 'object',
+    description: '用户配置',
+    properties: {
+      email: { type: 'string' },
+      age: { type: 'number', required: false },
+      roles: {
+        type: 'array',
+        items: { type: 'string' }
+      }
+    }
+  }
+}
+```
+
+### 直接使用 JSON Schema（高级）
+
+需要 `pattern`、`minLength` 等约束时，直接使用 `input_schema`：
+
+```typescript
+defineTool({
+  name: 'advanced_tool',
+  description: '高级工具',
+  input_schema: {
+    type: 'object',
+    properties: {
+      data: {
+        type: 'string',
+        pattern: '^[A-Z]{3}$',
+        minLength: 3,
+        maxLength: 3
+      }
+    },
+    required: ['data']
+  },
+  async exec(args, ctx) {
+    // ...
+  }
+});
+```
+
+---
+
+## 工具属性
+
+### `readonly` - 只读工具
+
+表示工具不修改任何状态（文件、数据库、外部 API）：
+
+```typescript
+attributes: {
+  readonly: true
+}
+```
+
+**用途**：
+- `readonly` 权限模式会自动放行只读工具
+- 适用于查询、读取、计算等操作
+
+### `noEffect` - 无副作用
+
+表示工具可以安全重试，多次执行结果相同：
+
+```typescript
+attributes: {
+  noEffect: true
+}
+```
+
+**用途**：
+- Resume 时可安全重新执行
+- 适用于幂等操作（GET 请求、纯计算等）
+
+### 默认行为
+
+不设置 `attributes` 时，工具被视为：
+- 非只读（可能写入）
+- 有副作用（不可重试）
+
+---
+
+## 自定义事件
+
+### 基本用法
+
+```typescript
+defineTool({
+  name: 'process_data',
+  description: '处理数据',
+  params: { input: { type: 'string' } },
+
+  async exec(args, ctx: EnhancedToolContext) {
+    ctx.emit('processing_started', { input: args.input });
+    const result = await heavyComputation(args.input);
+    ctx.emit('processing_completed', { result, duration: 1234 });
+    return result;
+  }
+});
+```
+
+### 监听自定义事件
+
+```typescript
+agent.on('tool_custom_event', (event) => {
+  console.log(`[${event.toolName}] ${event.eventType}:`, event.data);
+});
+```
+
+### 事件结构
+
+```typescript
+interface MonitorToolCustomEvent {
+  channel: 'monitor';
+  type: 'tool_custom_event';
+  toolName: string;        // 工具名称
+  eventType: string;       // 自定义事件类型
+  data?: any;              // 事件数据
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## 工具超时与 AbortSignal
+
+### 超时配置
+
+默认工具执行超时为 **60 秒**，可通过 Agent 配置自定义：
+
+```typescript
+const agent = await Agent.create({
+  templateId: 'my-assistant',
+  metadata: {
+    toolTimeoutMs: 120000, // 2 分钟
+  }
+}, deps);
+```
+
+### 处理 AbortSignal（必须）
+
+所有自定义工具的 `exec()` 方法都会收到 `context.signal`，**必须**在耗时操作中检查：
+
+```typescript
+export class MyLongRunningTool implements ToolInstance {
+  async exec(args: any, context: ToolContext) {
+    // 在长时间操作前检查
+    if (context.signal?.aborted) {
+      throw new Error('Operation aborted');
+    }
+
+    // 将 signal 传递给底层 API
+    const response = await fetch(url, { signal: context.signal });
+
+    // 在循环中定期检查
+    for (const item of items) {
+      if (context.signal?.aborted) {
+        throw new Error('Operation aborted');
+      }
+      await processItem(item);
+    }
+
+    return result;
+  }
+}
+```
+
+### CPU 密集型任务
+
+对于纯计算任务（无 I/O），需要主动在循环中检查：
+
+```typescript
+for (let i = 0; i < args.iterations; i++) {
+  // 每 100 次迭代检查一次
+  if (i % 100 === 0 && context.signal?.aborted) {
+    throw new Error('Computation aborted');
+  }
+  result.push(this.compute(i));
+}
+```
+
+### 超时恢复策略
+
+工具超时后，Agent 会：
+1. 发送 `abort` 信号
+2. 标记工具调用为 `FAILED` 状态
+3. 生成 `tool_result` 包含超时信息
+4. 继续下一轮 `runStep`
+
+Resume 时，超时的工具调用会被自动封口（Auto-Seal），不会重新执行。
+
+---
+
+## MCP 集成
+
+在 ToolRegistry 注册 MCP loader，将 `registryId` 指向 MCP 服务：
+
+```typescript
+const registry = new ToolRegistry();
+
+// 注册 MCP 工具加载器
+registry.registerMCPLoader('my-mcp-server', async () => {
+  const client = await connectToMCPServer('my-mcp-server');
+  return client.getTools();
+});
+```
+
+配合 TemplateRegistry 指定哪些模板启用 MCP 工具，Resume 时即可正常恢复。
+
+---
+
+## 最佳实践
+
+1. **始终检查 `context.signal?.aborted`** - 在长时间操作中
+2. **将 signal 传递给支持 AbortSignal 的 API**（fetch、axios 等）
+3. **设置合理的 `attributes`** - 帮助权限系统正确判断
+4. **善用自定义事件** - 提供工具执行的可观测性
+5. **优先使用 `defineTool()`** - 代码更简洁、类型安全
+6. **仅在需要高级约束时使用 `input_schema`**
+7. **监听超时事件进行告警**
+
+```typescript
+agent.on('error', (event) => {
+  if (event.phase === 'tool' && event.message.includes('aborted')) {
+    console.log('Tool execution timed out:', event.detail);
+  }
+});
+```
+
+---
+
+## 从旧 API 迁移
+
+### Metadata 映射
+
+| 旧方式 | 新方式 |
+|--------|--------|
+| `{ access: 'read', mutates: false }` | `{ readonly: true }` |
+| `{ access: 'write', mutates: true }` | （默认，无需设置） |
+| `{ safe: true }` | `{ noEffect: true }` |
+
+### 添加自定义事件
+
+```typescript
+// 旧方式 - 无法发射事件
+async exec(args, ctx: ToolContext) {
+  return result;
+}
+
+// 新方式 - 可以发射事件
+async exec(args, ctx: EnhancedToolContext) {
+  ctx.emit('event_name', { data: 'value' });
+  return result;
+}
+```
+
+---
+
+## 常见问题
+
+**Q: 必须使用新 API 吗？**
+
+A: 不，旧的 `ToolInstance` 接口完全兼容。新 API 是可选的增强功能。
+
+**Q: `readonly` 和 `noEffect` 有什么区别？**
+
+A:
+- `readonly`：工具不修改任何状态（文件、数据库等）
+- `noEffect`：工具可以安全重试，多次执行结果相同
+
+一个只读工具通常也是无副作用的，但反之不一定成立。
+
+**Q: 自定义事件会被持久化吗？**
+
+A: 是的，自定义事件作为 `MonitorToolCustomEvent` 被完整持久化到 WAL，Resume 时可恢复。
+
+**Q: 可以混用新旧 API 吗？**
+
+A: 可以自由混用，Agent 接受任何 `ToolInstance`：
+
+```typescript
+const agent = await Agent.create({
+  tools: [
+    oldStyleTool,           // 旧方式
+    defineTool({ ... }),    // 新方式
+    new FsRead(),           // 内置工具
+  ]
+});
+```
+
+---
+
+## 参考
+
+- 示例代码：`examples/tooling/simplified-tools.ts`
+- 类型定义：`src/tools/define.ts`
+- 事件系统：[events.md](./events.md)
diff --git a/docs/zh-CN/reference/api.md b/docs/zh-CN/reference/api.md
new file mode 100644
index 0000000..58c6dd2
--- /dev/null
+++ b/docs/zh-CN/reference/api.md
@@ -0,0 +1,691 @@
+# API 参考
+
+本文档提供 KODE SDK v2.7.0 的完整 API 参考。
+
+---
+
+## Agent
+
+创建和管理 AI Agent 的核心类。
+
+### 静态方法
+
+#### `Agent.create(config, deps)`
+
+创建新的 Agent 实例。
+
+```typescript
+static async create(config: AgentConfig, deps: AgentDependencies): Promise<Agent>
+```
+
+**参数：**
+- `config: AgentConfig` - Agent 配置
+- `deps: AgentDependencies` - 必需的依赖项
+
+**示例：**
+```typescript
+const agent = await Agent.create({
+  templateId: 'assistant',
+  modelConfig: {
+    provider: 'anthropic',
+    apiKey: process.env.ANTHROPIC_API_KEY!,
+  },
+  sandbox: { kind: 'local', workDir: './workspace' },
+}, deps);
+```
+
+#### `Agent.resume(agentId, config, deps, opts?)`
+
+从存储恢复已有的 Agent。
+
+```typescript
+static async resume(
+  agentId: string,
+  config: AgentConfig,
+  deps: AgentDependencies,
+  opts?: { autoRun?: boolean; strategy?: ResumeStrategy }
+): Promise<Agent>
+```
+
+**参数：**
+- `agentId: string` - 要恢复的 Agent ID
+- `config: AgentConfig` - Agent 配置
+- `deps: AgentDependencies` - 必需的依赖项
+- `opts.autoRun?: boolean` - 恢复后继续处理（默认：false）
+- `opts.strategy?: ResumeStrategy` - `'crash'`（自动封口）或 `'manual'`
+
+#### `Agent.resumeFromStore(agentId, deps, opts?)`
+
+使用存储中的元数据恢复 Agent（推荐）。
+
+```typescript
+static async resumeFromStore(
+  agentId: string,
+  deps: AgentDependencies,
+  opts?: { overrides?: Partial<AgentConfig>; autoRun?: boolean; strategy?: ResumeStrategy }
+): Promise<Agent>
+```
+
+### 实例方法
+
+#### `agent.send(message, options?)`
+
+发送消息并返回文本响应。
+
+```typescript
+async send(message: string | ContentBlock[], options?: SendOptions): Promise<string>
+```
+
+#### `agent.chat(input, opts?)`
+
+发送消息并返回带状态的结构化结果。
+
+```typescript
+async chat(input: string | ContentBlock[], opts?: StreamOptions): Promise<CompleteResult>
+```
+
+**返回：**
+```typescript
+interface CompleteResult {
+  status: 'ok' | 'paused';
+  text?: string;
+  last?: Bookmark;
+  permissionIds?: string[];
+}
+```
+
+#### `agent.complete(input, opts?)`
+
+`chat()` 的别名。
+
+#### `agent.decide(permissionId, decision, note?)`
+
+响应权限请求。
+
+```typescript
+async decide(permissionId: string, decision: 'allow' | 'deny', note?: string): Promise<void>
+```
+
+#### `agent.interrupt(opts?)`
+
+中断当前处理。
+
+```typescript
+async interrupt(opts?: { note?: string }): Promise<void>
+```
+
+#### `agent.snapshot(label?)`
+
+在当前 Safe-Fork-Point 创建快照。
+
+```typescript
+async snapshot(label?: string): Promise<SnapshotId>
+```
+
+#### `agent.fork(sel?)`
+
+从快照创建分叉的 Agent。
+
+```typescript
+async fork(sel?: SnapshotId | { at?: string }): Promise<Agent>
+```
+
+#### `agent.status()`
+
+返回当前 Agent 状态。
+
+```typescript
+async status(): Promise<AgentStatus>
+```
+
+**返回：**
+```typescript
+interface AgentStatus {
+  agentId: string;
+  state: AgentRuntimeState;  // 'READY' | 'WORKING' | 'PAUSED'
+  stepCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  cursor: number;
+  breakpoint: BreakpointState;
+}
+```
+
+#### `agent.info()`
+
+返回 Agent 元数据。
+
+```typescript
+async info(): Promise<AgentInfo>
+```
+
+#### `agent.setTodos(todos)`
+
+设置完整的 Todo 列表。
+
+```typescript
+async setTodos(todos: TodoInput[]): Promise<void>
+```
+
+#### `agent.updateTodo(todo)`
+
+更新单个 Todo 项。
+
+```typescript
+async updateTodo(todo: TodoInput): Promise<void>
+```
+
+#### `agent.deleteTodo(id)`
+
+删除 Todo 项。
+
+```typescript
+async deleteTodo(id: string): Promise<void>
+```
+
+#### `agent.on(event, handler)`
+
+订阅 Control 和 Monitor 事件。返回取消订阅函数。
+
+```typescript
+on<T extends ControlEvent['type'] | MonitorEvent['type']>(
+  event: T,
+  handler: (evt: any) => void
+): () => void
+```
+
+**支持的事件：**
+- Control: `'permission_required'`, `'permission_decided'`
+- Monitor: `'state_changed'`, `'step_complete'`, `'error'`, `'token_usage'`, `'tool_executed'`, `'agent_resumed'`, `'todo_changed'`, `'file_changed'`
+
+**示例：**
+```typescript
+// Monitor 事件
+const unsubscribe = agent.on('tool_executed', (event) => {
+  console.log(`工具 ${event.call.name} 已执行`);
+});
+
+agent.on('error', (event) => {
+  console.error('错误:', event.error);
+});
+
+// Control 事件
+agent.on('permission_required', (event) => {
+  console.log(`需要权限: ${event.call.name}`);
+});
+
+// 完成后取消订阅
+unsubscribe();
+```
+
+> **注意：** 对于 Progress 事件（`text_chunk`、`tool:start`、`done` 等），请使用 `agent.subscribe(['progress'])`。
+
+---
+
+## AgentConfig
+
+创建 Agent 的配置。
+
+```typescript
+interface AgentConfig {
+  agentId?: string;                    // 不提供则自动生成
+  templateId: string;                  // 必需：模板 ID
+  templateVersion?: string;            // 可选：模板版本
+  model?: ModelProvider;               // 直接提供模型实例
+  modelConfig?: ModelConfig;           // 或模型配置
+  sandbox?: Sandbox | SandboxConfig;   // 沙箱实例或配置
+  tools?: string[];                    // 要启用的工具名称
+  exposeThinking?: boolean;            // 发送思考事件
+  retainThinking?: boolean;            // 在消息历史中保留思考
+  overrides?: {
+    permission?: PermissionConfig;
+    todo?: TodoConfig;
+    subagents?: SubAgentConfig;
+    hooks?: Hooks;
+  };
+  context?: ContextManagerOptions;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## AgentDependencies
+
+创建 Agent 所需的依赖项。
+
+```typescript
+interface AgentDependencies {
+  store: Store;                        // 存储后端
+  templateRegistry: AgentTemplateRegistry;
+  sandboxFactory: SandboxFactory;
+  toolRegistry: ToolRegistry;
+  modelFactory?: ModelFactory;         // 可选的模型创建工厂
+  skillsManager?: SkillsManager;       // 可选的技能管理器
+}
+```
+
+---
+
+## Store
+
+Agent 数据持久化接口。
+
+### 核心方法
+
+```typescript
+interface Store {
+  // 消息
+  saveMessages(agentId: string, messages: Message[]): Promise<void>;
+  loadMessages(agentId: string): Promise<Message[]>;
+
+  // 工具记录
+  saveToolCallRecords(agentId: string, records: ToolCallRecord[]): Promise<void>;
+  loadToolCallRecords(agentId: string): Promise<ToolCallRecord[]>;
+
+  // Todo
+  saveTodos(agentId: string, snapshot: TodoSnapshot): Promise<void>;
+  loadTodos(agentId: string): Promise<TodoSnapshot | undefined>;
+
+  // 事件
+  appendEvent(agentId: string, timeline: Timeline): Promise<void>;
+  readEvents(agentId: string, opts?: { since?: Bookmark; channel?: AgentChannel }): AsyncIterable<Timeline>;
+
+  // 快照
+  saveSnapshot(agentId: string, snapshot: Snapshot): Promise<void>;
+  loadSnapshot(agentId: string, snapshotId: string): Promise<Snapshot | undefined>;
+  listSnapshots(agentId: string): Promise<Snapshot[]>;
+
+  // 元数据
+  saveInfo(agentId: string, info: AgentInfo): Promise<void>;
+  loadInfo(agentId: string): Promise<AgentInfo | undefined>;
+
+  // 生命周期
+  exists(agentId: string): Promise<boolean>;
+  delete(agentId: string): Promise<void>;
+  list(prefix?: string): Promise<string[]>;
+}
+```
+
+### Store 实现
+
+| 类 | 说明 |
+|---|------|
+| `JSONStore` | 基于文件的存储（默认）|
+| `SqliteStore` | SQLite 数据库存储 |
+| `PostgresStore` | PostgreSQL 数据库存储 |
+
+### 工厂函数
+
+```typescript
+import { createExtendedStore } from '@shareai-lab/kode-sdk';
+
+// SQLite
+const store = await createExtendedStore({
+  type: 'sqlite',
+  dbPath: './data/agents.db',
+  fileStoreBaseDir: './data/store',
+});
+
+// PostgreSQL
+const store = await createExtendedStore({
+  type: 'postgres',
+  connection: {
+    host: 'localhost',
+    port: 5432,
+    database: 'kode_agents',
+    user: 'kode',
+    password: 'password',
+  },
+  fileStoreBaseDir: './data/store',
+});
+```
+
+---
+
+## QueryableStore
+
+带查询能力的扩展 Store 接口。
+
+```typescript
+interface QueryableStore extends Store {
+  querySessions(filters: SessionFilters): Promise<SessionInfo[]>;
+  queryMessages(filters: MessageFilters): Promise<Message[]>;
+  queryToolCalls(filters: ToolCallFilters): Promise<ToolCallRecord[]>;
+  aggregateStats(agentId: string): Promise<AgentStats>;
+}
+```
+
+### SessionFilters
+
+```typescript
+interface SessionFilters {
+  agentId?: string;
+  templateId?: string;
+  userId?: string;
+  startDate?: number;      // Unix 时间戳（毫秒）
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+  sortBy?: 'created_at' | 'updated_at' | 'message_count';
+  sortOrder?: 'asc' | 'desc';
+}
+```
+
+### MessageFilters
+
+```typescript
+interface MessageFilters {
+  agentId?: string;
+  role?: 'user' | 'assistant' | 'system';
+  startDate?: number;
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+}
+```
+
+### ToolCallFilters
+
+```typescript
+interface ToolCallFilters {
+  agentId?: string;
+  toolName?: string;
+  state?: ToolCallState;
+  startDate?: number;
+  endDate?: number;
+  limit?: number;
+  offset?: number;
+}
+```
+
+---
+
+## ExtendedStore
+
+带高级功能的 Store。
+
+```typescript
+interface ExtendedStore extends QueryableStore {
+  healthCheck(): Promise<StoreHealthStatus>;
+  checkConsistency(agentId: string): Promise<ConsistencyCheckResult>;
+  getMetrics(): Promise<StoreMetrics>;
+  acquireAgentLock(agentId: string, timeoutMs?: number): Promise<LockReleaseFn>;
+  batchFork(agentId: string, count: number): Promise<string[]>;
+  close(): Promise<void>;
+}
+```
+
+---
+
+## ToolRegistry
+
+工具工厂注册表。
+
+```typescript
+class ToolRegistry {
+  register(id: string, factory: ToolFactory): void;
+  has(id: string): boolean;
+  create(id: string, config?: Record<string, any>): ToolInstance;
+  list(): string[];
+}
+```
+
+### ToolInstance
+
+```typescript
+interface ToolInstance {
+  name: string;
+  description: string;
+  input_schema: any;                   // JSON Schema
+  hooks?: Hooks;
+  prompt?: string | ((ctx: ToolContext) => string | Promise<string>);
+  exec(args: any, ctx: ToolContext): Promise<any>;
+  toDescriptor(): ToolDescriptor;
+}
+```
+
+### defineTool()
+
+创建工具的简化 API。
+
+```typescript
+import { defineTool } from '@shareai-lab/kode-sdk';
+
+const myTool = defineTool({
+  name: 'my_tool',
+  description: '做一些有用的事情',
+  params: {
+    input: { type: 'string', description: '输入值' },
+    count: { type: 'number', required: false, default: 1 },
+  },
+  attributes: {
+    readonly: true,
+    noEffect: true,
+  },
+  async exec(args, ctx) {
+    ctx.emit('custom_event', { data: 'value' });
+    return { result: args.input };
+  },
+});
+```
+
+---
+
+## AgentTemplateRegistry
+
+Agent 模板注册表。
+
+```typescript
+class AgentTemplateRegistry {
+  register(template: AgentTemplateDefinition): void;
+  bulkRegister(templates: AgentTemplateDefinition[]): void;
+  has(id: string): boolean;
+  get(id: string): AgentTemplateDefinition;
+  list(): string[];
+}
+```
+
+### AgentTemplateDefinition
+
+```typescript
+interface AgentTemplateDefinition {
+  id: string;                          // 必需：唯一标识符
+  name?: string;                       // 显示名称
+  desc?: string;                       // 描述
+  version?: string;                    // 模板版本
+  systemPrompt: string;                // 必需：系统提示词
+  model?: string;                      // 默认模型
+  sandbox?: Record<string, any>;       // 沙箱配置
+  tools?: '*' | string[];              // '*' 表示全部，或指定工具
+  permission?: PermissionConfig;       // 权限配置
+  runtime?: TemplateRuntimeConfig;     // 运行时选项
+  hooks?: Hooks;                       // Hook 函数
+  metadata?: Record<string, any>;      // 自定义元数据
+}
+```
+
+---
+
+## AgentPool
+
+管理多个 Agent 实例。
+
+```typescript
+class AgentPool {
+  constructor(opts: AgentPoolOptions);
+
+  async create(agentId: string, config: AgentConfig): Promise<Agent>;
+  get(agentId: string): Agent | undefined;
+  list(opts?: { prefix?: string }): string[];
+  async status(agentId: string): Promise<AgentStatus | undefined>;
+  async fork(agentId: string, snapshotSel?: SnapshotId | { at?: string }): Promise<Agent>;
+  async resume(agentId: string, config: AgentConfig, opts?: { autoRun?: boolean; strategy?: ResumeStrategy }): Promise<Agent>;
+  async destroy(agentId: string): Promise<void>;
+}
+```
+
+---
+
+## Room
+
+多 Agent 协作空间。
+
+```typescript
+class Room {
+  constructor(pool: AgentPool);
+
+  join(name: string, agentId: string): void;
+  leave(name: string): void;
+  async say(from: string, text: string): Promise<void>;
+  getMembers(): RoomMember[];
+}
+```
+
+**示例：**
+```typescript
+const pool = new AgentPool({ dependencies: deps });
+const room = new Room(pool);
+
+// 创建并加入 agents
+const agent1 = await pool.create('agent-1', config);
+const agent2 = await pool.create('agent-2', config);
+
+room.join('Alice', 'agent-1');
+room.join('Bob', 'agent-2');
+
+// 广播消息
+await room.say('Alice', 'Hello everyone!');
+
+// 定向消息
+await room.say('Alice', '@Bob What do you think?');
+```
+
+---
+
+## Providers
+
+### AnthropicProvider
+
+```typescript
+import { AnthropicProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new AnthropicProvider(
+  process.env.ANTHROPIC_API_KEY!,
+  process.env.ANTHROPIC_MODEL_ID ?? 'claude-sonnet-4-20250514',
+  {
+    thinking: { enabled: true, budgetTokens: 10000 },
+    cache: { breakpoints: 4 },
+  }
+);
+```
+
+### OpenAIProvider
+
+```typescript
+import { OpenAIProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new OpenAIProvider(
+  process.env.OPENAI_API_KEY!,
+  process.env.OPENAI_MODEL_ID ?? 'gpt-4o',
+  {
+    api: 'responses',
+    responses: { reasoning: { effort: 'medium' } },
+  }
+);
+```
+
+### GeminiProvider
+
+```typescript
+import { GeminiProvider } from '@shareai-lab/kode-sdk';
+
+const provider = new GeminiProvider(
+  process.env.GOOGLE_API_KEY!,
+  process.env.GEMINI_MODEL_ID ?? 'gemini-2.0-flash',
+  {
+    thinking: { level: 'medium', includeThoughts: true },
+  }
+);
+```
+
+---
+
+## 内置工具
+
+| 工具 | 说明 |
+|------|------|
+| `fs_read` | 读取文件内容 |
+| `fs_write` | 创建/覆写文件 |
+| `fs_edit` | 编辑文件（替换）|
+| `fs_glob` | 使用 glob 模式匹配文件 |
+| `fs_grep` | 在文件中搜索文本/正则 |
+| `fs_multi_edit` | 批量编辑多个文件 |
+| `bash_run` | 执行 shell 命令 |
+| `bash_logs` | 读取后台命令输出 |
+| `bash_kill` | 终止后台命令 |
+| `todo_read` | 读取 Todo 列表 |
+| `todo_write` | 写入 Todo 列表 |
+| `task_run` | 派发子 Agent |
+| `skills` | 加载技能 |
+
+### 注册内置工具
+
+```typescript
+import { builtin, ToolRegistry } from '@shareai-lab/kode-sdk';
+
+const registry = new ToolRegistry();
+
+// builtin 是一个包含方法的对象，每个方法返回 ToolInstance[]
+for (const tool of [...builtin.fs(), ...builtin.bash(), ...builtin.todo()]) {
+  registry.register(tool.name, () => tool);
+}
+
+// 或分组注册特定工具
+builtin.fs().forEach(tool => registry.register(tool.name, () => tool));
+builtin.bash().forEach(tool => registry.register(tool.name, () => tool));
+builtin.todo().forEach(tool => registry.register(tool.name, () => tool));
+```
+
+**可用的 builtin 分组：**
+- `builtin.fs()` - 文件系统工具：`fs_read`, `fs_write`, `fs_edit`, `fs_glob`, `fs_grep`, `fs_multi_edit`
+- `builtin.bash()` - Shell 工具：`bash_run`, `bash_logs`, `bash_kill`
+- `builtin.todo()` - Todo 工具：`todo_read`, `todo_write`
+- `builtin.task(templates)` - 子 Agent 工具：`task_run`（需要提供模板）
+
+---
+
+## SkillsManager
+
+在 Agent 运行时管理技能。
+
+```typescript
+class SkillsManager {
+  constructor(skillsDir: string, whitelist?: string[]);
+
+  async getSkillsMetadata(): Promise<SkillMetadata[]>;
+  async loadSkillContent(skillName: string): Promise<SkillContent | null>;
+}
+```
+
+---
+
+## 工具函数
+
+### generateAgentId()
+
+生成唯一的 Agent ID。
+
+```typescript
+import { generateAgentId } from '@shareai-lab/kode-sdk';
+
+const agentId = generateAgentId(); // 例如 'agt-abc123xyz'
+```
+
+---
+
+## 参考资料
+
+- [类型参考](./types.md)
+- [事件参考](./events-reference.md)
+- [使用指南](../guides/events.md)
diff --git a/docs/zh-CN/reference/events-reference.md b/docs/zh-CN/reference/events-reference.md
new file mode 100644
index 0000000..7c08b4f
--- /dev/null
+++ b/docs/zh-CN/reference/events-reference.md
@@ -0,0 +1,576 @@
+# 事件参考
+
+KODE SDK 所有事件的完整参考，按通道组织。
+
+---
+
+## 事件通道
+
+| 通道 | 用途 | 订阅者 |
+|------|------|--------|
+| `progress` | 流式输出（文本、工具调用）| 用户界面 |
+| `control` | 权限请求和决策 | 业务逻辑 |
+| `monitor` | 系统可观测性 | 监控/日志 |
+
+---
+
+## Progress 事件
+
+用于向用户流式输出的事件。
+
+### ProgressTextChunkStartEvent
+
+文本流开始时发出。
+
+```typescript
+interface ProgressTextChunkStartEvent {
+  channel: 'progress';
+  type: 'text_chunk_start';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressTextChunkEvent
+
+流式传输时每个文本块发出。
+
+```typescript
+interface ProgressTextChunkEvent {
+  channel: 'progress';
+  type: 'text_chunk';
+  step: number;
+  delta: string;           // 文本块内容
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressTextChunkEndEvent
+
+文本流完成时发出。
+
+```typescript
+interface ProgressTextChunkEndEvent {
+  channel: 'progress';
+  type: 'text_chunk_end';
+  step: number;
+  text: string;            // 完整文本
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkStartEvent
+
+思考/推理流开始时发出。
+
+```typescript
+interface ProgressThinkChunkStartEvent {
+  channel: 'progress';
+  type: 'think_chunk_start';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkEvent
+
+每个思考块发出。
+
+```typescript
+interface ProgressThinkChunkEvent {
+  channel: 'progress';
+  type: 'think_chunk';
+  step: number;
+  delta: string;           // 思考块内容
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressThinkChunkEndEvent
+
+思考流完成时发出。
+
+```typescript
+interface ProgressThinkChunkEndEvent {
+  channel: 'progress';
+  type: 'think_chunk_end';
+  step: number;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolStartEvent
+
+工具执行开始时发出。
+
+```typescript
+interface ProgressToolStartEvent {
+  channel: 'progress';
+  type: 'tool:start';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolEndEvent
+
+工具执行完成时发出。
+
+```typescript
+interface ProgressToolEndEvent {
+  channel: 'progress';
+  type: 'tool:end';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressToolErrorEvent
+
+工具执行失败时发出。
+
+```typescript
+interface ProgressToolErrorEvent {
+  channel: 'progress';
+  type: 'tool:error';
+  call: ToolCallSnapshot;
+  error: string;
+  bookmark?: Bookmark;
+}
+```
+
+### ProgressDoneEvent
+
+处理完成时发出。
+
+```typescript
+interface ProgressDoneEvent {
+  channel: 'progress';
+  type: 'done';
+  step: number;
+  reason: 'completed' | 'interrupted';
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Control 事件
+
+用于权限处理的事件。
+
+### ControlPermissionRequiredEvent
+
+工具调用需要审批时发出。
+
+```typescript
+interface ControlPermissionRequiredEvent {
+  channel: 'control';
+  type: 'permission_required';
+  call: ToolCallSnapshot;
+  respond(decision: 'allow' | 'deny', opts?: { note?: string }): Promise<void>;
+  bookmark?: Bookmark;
+}
+```
+
+**用法：**
+```typescript
+agent.on('permission_required', async (event) => {
+  // 审查工具调用
+  console.log('工具:', event.call.name);
+  console.log('输入:', event.call.inputPreview);
+
+  // 做出决策
+  await event.respond('allow', { note: '管理员批准' });
+});
+```
+
+### ControlPermissionDecidedEvent
+
+权限决策完成时发出。
+
+```typescript
+interface ControlPermissionDecidedEvent {
+  channel: 'control';
+  type: 'permission_decided';
+  callId: string;
+  decision: 'allow' | 'deny';
+  decidedBy: string;
+  note?: string;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## Monitor 事件
+
+用于系统可观测性的事件。
+
+### MonitorStateChangedEvent
+
+Agent 状态变化时发出。
+
+```typescript
+interface MonitorStateChangedEvent {
+  channel: 'monitor';
+  type: 'state_changed';
+  state: AgentRuntimeState;   // 'READY' | 'WORKING' | 'PAUSED'
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorStepCompleteEvent
+
+处理步骤完成时发出。
+
+```typescript
+interface MonitorStepCompleteEvent {
+  channel: 'monitor';
+  type: 'step_complete';
+  step: number;
+  durationMs?: number;
+  bookmark: Bookmark;
+}
+```
+
+### MonitorErrorEvent
+
+发生错误时发出。
+
+```typescript
+interface MonitorErrorEvent {
+  channel: 'monitor';
+  type: 'error';
+  severity: 'info' | 'warn' | 'error';
+  phase: 'model' | 'tool' | 'system' | 'lifecycle';
+  message: string;
+  detail?: any;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTokenUsageEvent
+
+Token 使用统计。
+
+```typescript
+interface MonitorTokenUsageEvent {
+  channel: 'monitor';
+  type: 'token_usage';
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolExecutedEvent
+
+工具执行完成时发出。
+
+```typescript
+interface MonitorToolExecutedEvent {
+  channel: 'monitor';
+  type: 'tool_executed';
+  call: ToolCallSnapshot;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorAgentResumedEvent
+
+Agent 从存储恢复时发出。
+
+```typescript
+interface MonitorAgentResumedEvent {
+  channel: 'monitor';
+  type: 'agent_resumed';
+  strategy: 'crash' | 'manual';
+  sealed: ToolCallSnapshot[];    // 自动封口的工具调用
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorBreakpointChangedEvent
+
+断点状态变化时发出。
+
+```typescript
+interface MonitorBreakpointChangedEvent {
+  channel: 'monitor';
+  type: 'breakpoint_changed';
+  previous: BreakpointState;
+  current: BreakpointState;
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTodoChangedEvent
+
+Todo 列表变化时发出。
+
+```typescript
+interface MonitorTodoChangedEvent {
+  channel: 'monitor';
+  type: 'todo_changed';
+  current: TodoItem[];
+  previous: TodoItem[];
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorTodoReminderEvent
+
+Todo 提醒触发时发出。
+
+```typescript
+interface MonitorTodoReminderEvent {
+  channel: 'monitor';
+  type: 'todo_reminder';
+  todos: TodoItem[];
+  reason: string;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorFileChangedEvent
+
+监听的文件变化时发出。
+
+```typescript
+interface MonitorFileChangedEvent {
+  channel: 'monitor';
+  type: 'file_changed';
+  path: string;
+  mtime: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorReminderSentEvent
+
+向模型发送提醒时发出。
+
+```typescript
+interface MonitorReminderSentEvent {
+  channel: 'monitor';
+  type: 'reminder_sent';
+  category: 'file' | 'todo' | 'security' | 'performance' | 'general';
+  content: string;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorContextCompressionEvent
+
+上下文压缩期间发出。
+
+```typescript
+interface MonitorContextCompressionEvent {
+  channel: 'monitor';
+  type: 'context_compression';
+  phase: 'start' | 'end';
+  summary?: string;
+  ratio?: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorSchedulerTriggeredEvent
+
+定时任务触发时发出。
+
+```typescript
+interface MonitorSchedulerTriggeredEvent {
+  channel: 'monitor';
+  type: 'scheduler_triggered';
+  taskId: string;
+  spec: string;
+  kind: 'steps' | 'time' | 'cron';
+  triggeredAt: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolManualUpdatedEvent
+
+工具说明书更新时发出。
+
+```typescript
+interface MonitorToolManualUpdatedEvent {
+  channel: 'monitor';
+  type: 'tool_manual_updated';
+  tools: string[];
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorSkillsMetadataUpdatedEvent
+
+技能元数据更新时发出。
+
+```typescript
+interface MonitorSkillsMetadataUpdatedEvent {
+  channel: 'monitor';
+  type: 'skills_metadata_updated';
+  skills: string[];
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+### MonitorToolCustomEvent
+
+工具发出的自定义事件。
+
+```typescript
+interface MonitorToolCustomEvent {
+  channel: 'monitor';
+  type: 'tool_custom_event';
+  toolName: string;
+  eventType: string;
+  data?: any;
+  timestamp: number;
+  bookmark?: Bookmark;
+}
+```
+
+---
+
+## 订阅事件
+
+### 使用 `agent.on()` (仅 Control/Monitor)
+
+`agent.on()` 仅支持 Control 和 Monitor 事件。
+
+```typescript
+// Control 事件
+agent.on('permission_required', async (event) => {
+  console.log('需要权限:', event.call.name);
+  await event.respond('allow');
+});
+
+agent.on('permission_decided', (event) => {
+  console.log(`决定: ${event.decision} 由 ${event.decidedBy}`);
+});
+
+// Monitor 事件
+agent.on('error', (event) => {
+  console.error(`[${event.severity}] ${event.message}`);
+});
+
+agent.on('token_usage', (event) => {
+  console.log(`Tokens: ${event.totalTokens}`);
+});
+
+agent.on('tool_executed', (event) => {
+  console.log(`工具 ${event.call.name} 已执行`);
+});
+
+agent.on('state_changed', (event) => {
+  console.log(`状态: ${event.state}`);
+});
+```
+
+### 使用 `agent.subscribe()` (所有通道)
+
+对于 Progress 事件，请使用 `agent.subscribe()`:
+
+```typescript
+for await (const envelope of agent.subscribe(['progress'])) {
+  const { event } = envelope;
+
+  switch (event.type) {
+    case 'text_chunk':
+      process.stdout.write(event.delta);
+      break;
+    case 'tool:start':
+      console.log('工具:', event.call.name);
+      break;
+    case 'done':
+      console.log('完成');
+      break;
+  }
+}
+```
+
+### 使用 `stream()` 异步迭代器
+
+```typescript
+for await (const envelope of agent.stream('Hello')) {
+  const { event } = envelope;
+
+  switch (event.type) {
+    case 'text_chunk':
+      process.stdout.write(event.delta);
+      break;
+    case 'tool:start':
+      console.log('工具:', event.call.name);
+      break;
+    case 'done':
+      console.log('完成');
+      break;
+  }
+}
+```
+
+---
+
+## 事件类型联合
+
+### ProgressEvent
+
+```typescript
+type ProgressEvent =
+  | ProgressThinkChunkStartEvent
+  | ProgressThinkChunkEvent
+  | ProgressThinkChunkEndEvent
+  | ProgressTextChunkStartEvent
+  | ProgressTextChunkEvent
+  | ProgressTextChunkEndEvent
+  | ProgressToolStartEvent
+  | ProgressToolEndEvent
+  | ProgressToolErrorEvent
+  | ProgressDoneEvent;
+```
+
+### ControlEvent
+
+```typescript
+type ControlEvent =
+  | ControlPermissionRequiredEvent
+  | ControlPermissionDecidedEvent;
+```
+
+### MonitorEvent
+
+```typescript
+type MonitorEvent =
+  | MonitorStateChangedEvent
+  | MonitorStepCompleteEvent
+  | MonitorErrorEvent
+  | MonitorTokenUsageEvent
+  | MonitorToolExecutedEvent
+  | MonitorAgentResumedEvent
+  | MonitorTodoChangedEvent
+  | MonitorTodoReminderEvent
+  | MonitorFileChangedEvent
+  | MonitorReminderSentEvent
+  | MonitorContextCompressionEvent
+  | MonitorSchedulerTriggeredEvent
+  | MonitorBreakpointChangedEvent
+  | MonitorToolManualUpdatedEvent
+  | MonitorSkillsMetadataUpdatedEvent
+  | MonitorToolCustomEvent;
+```
+
+---
+
+## 参考资料
+
+- [事件系统指南](../guides/events.md)
+- [API 参考](./api.md)
+- [类型参考](./types.md)
diff --git a/docs/zh-CN/reference/types.md b/docs/zh-CN/reference/types.md
new file mode 100644
index 0000000..6a67b63
--- /dev/null
+++ b/docs/zh-CN/reference/types.md
@@ -0,0 +1,483 @@
+# 类型参考
+
+本文档提供 KODE SDK 导出的所有 TypeScript 类型参考。
+
+---
+
+## 消息类型
+
+### MessageRole
+
+```typescript
+type MessageRole = 'user' | 'assistant' | 'system';
+```
+
+### Message
+
+```typescript
+interface Message {
+  role: MessageRole;
+  content: ContentBlock[];
+  metadata?: MessageMetadata;
+}
+```
+
+### MessageMetadata
+
+```typescript
+interface MessageMetadata {
+  content_blocks?: ContentBlock[];
+  transport?: 'provider' | 'text' | 'omit';
+}
+```
+
+---
+
+## 内容块
+
+### ContentBlock
+
+所有内容块类型的联合类型。
+
+```typescript
+type ContentBlock =
+  | { type: 'text'; text: string }
+  | { type: 'image_url'; image_url: { url: string } }
+  | { type: 'tool_use'; id: string; name: string; input: any; meta?: Record<string, any> }
+  | { type: 'tool_result'; tool_use_id: string; content: any; is_error?: boolean }
+  | ReasoningContentBlock
+  | ImageContentBlock
+  | AudioContentBlock
+  | FileContentBlock;
+```
+
+### ReasoningContentBlock
+
+```typescript
+type ReasoningContentBlock = {
+  type: 'reasoning';
+  reasoning: string;
+  meta?: Record<string, any>;
+};
+```
+
+### ImageContentBlock
+
+```typescript
+type ImageContentBlock = {
+  type: 'image';
+  url?: string;
+  file_id?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+### AudioContentBlock
+
+```typescript
+type AudioContentBlock = {
+  type: 'audio';
+  url?: string;
+  file_id?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+### FileContentBlock
+
+```typescript
+type FileContentBlock = {
+  type: 'file';
+  url?: string;
+  file_id?: string;
+  filename?: string;
+  base64?: string;
+  mime_type?: string;
+  meta?: Record<string, any>;
+};
+```
+
+---
+
+## Agent 状态类型
+
+### AgentRuntimeState
+
+```typescript
+type AgentRuntimeState = 'READY' | 'WORKING' | 'PAUSED';
+```
+
+| 状态 | 说明 |
+|------|------|
+| `READY` | Agent 空闲，准备接收消息 |
+| `WORKING` | Agent 正在处理消息 |
+| `PAUSED` | Agent 暂停，等待权限决策 |
+
+### BreakpointState
+
+```typescript
+type BreakpointState =
+  | 'READY'
+  | 'PRE_MODEL'
+  | 'STREAMING_MODEL'
+  | 'TOOL_PENDING'
+  | 'AWAITING_APPROVAL'
+  | 'PRE_TOOL'
+  | 'TOOL_EXECUTING'
+  | 'POST_TOOL';
+```
+
+### AgentStatus
+
+```typescript
+interface AgentStatus {
+  agentId: string;
+  state: AgentRuntimeState;
+  stepCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  cursor: number;
+  breakpoint: BreakpointState;
+}
+```
+
+### AgentInfo
+
+```typescript
+interface AgentInfo {
+  agentId: string;
+  templateId: string;
+  createdAt: string;
+  lineage: string[];
+  configVersion: string;
+  messageCount: number;
+  lastSfpIndex: number;
+  lastBookmark?: Bookmark;
+  breakpoint?: BreakpointState;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## 工具调用类型
+
+### ToolCallState
+
+```typescript
+type ToolCallState =
+  | 'PENDING'
+  | 'APPROVAL_REQUIRED'
+  | 'APPROVED'
+  | 'EXECUTING'
+  | 'COMPLETED'
+  | 'FAILED'
+  | 'DENIED'
+  | 'SEALED';
+```
+
+| 状态 | 说明 |
+|------|------|
+| `PENDING` | 收到工具调用，尚未处理 |
+| `APPROVAL_REQUIRED` | 等待用户审批 |
+| `APPROVED` | 已批准，准备执行 |
+| `EXECUTING` | 正在执行 |
+| `COMPLETED` | 执行成功完成 |
+| `FAILED` | 执行失败 |
+| `DENIED` | 用户拒绝了工具调用 |
+| `SEALED` | Resume 时自动封口 |
+
+### ToolCallRecord
+
+```typescript
+interface ToolCallRecord {
+  id: string;
+  name: string;
+  input: any;
+  state: ToolCallState;
+  approval: ToolCallApproval;
+  result?: any;
+  error?: string;
+  isError?: boolean;
+  startedAt?: number;
+  completedAt?: number;
+  durationMs?: number;
+  createdAt: number;
+  updatedAt: number;
+  auditTrail: ToolCallAuditEntry[];
+}
+```
+
+### ToolCallSnapshot
+
+```typescript
+type ToolCallSnapshot = Pick<
+  ToolCallRecord,
+  'id' | 'name' | 'state' | 'approval' | 'result' | 'error' | 'isError' | 'durationMs' | 'startedAt' | 'completedAt'
+> & {
+  inputPreview?: any;
+  auditTrail?: ToolCallAuditEntry[];
+};
+```
+
+### ToolCallApproval
+
+```typescript
+interface ToolCallApproval {
+  required: boolean;
+  decision?: 'allow' | 'deny';
+  decidedBy?: string;
+  decidedAt?: number;
+  note?: string;
+  meta?: Record<string, any>;
+}
+```
+
+### ToolCallAuditEntry
+
+```typescript
+interface ToolCallAuditEntry {
+  state: ToolCallState;
+  timestamp: number;
+  note?: string;
+}
+```
+
+### ToolOutcome
+
+```typescript
+interface ToolOutcome {
+  id: string;
+  name: string;
+  ok: boolean;
+  content: any;
+  durationMs?: number;
+}
+```
+
+### ToolCall
+
+```typescript
+interface ToolCall {
+  id: string;
+  name: string;
+  args: any;
+  agentId: string;
+}
+```
+
+### ToolContext
+
+```typescript
+interface ToolContext {
+  agentId: string;
+  sandbox: Sandbox;
+  agent: any;
+  services?: Record<string, any>;
+  signal?: AbortSignal;
+  emit?: (eventType: string, data?: any) => void;
+}
+```
+
+---
+
+## 事件类型
+
+### Bookmark
+
+```typescript
+interface Bookmark {
+  seq: number;
+  timestamp: number;
+}
+```
+
+### AgentChannel
+
+```typescript
+type AgentChannel = 'progress' | 'control' | 'monitor';
+```
+
+### AgentEvent
+
+```typescript
+type AgentEvent = ProgressEvent | ControlEvent | MonitorEvent;
+```
+
+### AgentEventEnvelope
+
+```typescript
+interface AgentEventEnvelope<T extends AgentEvent = AgentEvent> {
+  cursor: number;
+  bookmark: Bookmark;
+  event: T;
+}
+```
+
+### Timeline
+
+```typescript
+interface Timeline {
+  cursor: number;
+  bookmark: Bookmark;
+  event: AgentEvent;
+}
+```
+
+---
+
+## 快照类型
+
+### SnapshotId
+
+```typescript
+type SnapshotId = string;
+```
+
+### Snapshot
+
+```typescript
+interface Snapshot {
+  id: SnapshotId;
+  messages: Message[];
+  lastSfpIndex: number;
+  lastBookmark: Bookmark;
+  createdAt: string;
+  metadata?: Record<string, any>;
+}
+```
+
+---
+
+## Hook 类型
+
+### HookDecision
+
+```typescript
+type HookDecision =
+  | { decision: 'ask'; meta?: any }
+  | { decision: 'deny'; reason?: string; toolResult?: any }
+  | { result: any }
+  | void;
+```
+
+### PostHookResult
+
+```typescript
+type PostHookResult =
+  | void
+  | { update: Partial<ToolOutcome> }
+  | { replace: ToolOutcome };
+```
+
+---
+
+## 配置类型
+
+### PermissionConfig
+
+```typescript
+interface PermissionConfig {
+  mode: PermissionDecisionMode;
+  requireApprovalTools?: string[];
+  allowTools?: string[];
+  denyTools?: string[];
+  metadata?: Record<string, any>;
+}
+```
+
+### PermissionDecisionMode
+
+```typescript
+type PermissionDecisionMode = 'auto' | 'approval' | 'readonly' | (string & {});
+```
+
+| 模式 | 说明 |
+|------|------|
+| `auto` | 自动允许所有工具调用 |
+| `approval` | 所有工具调用都需要审批 |
+| `readonly` | 允许只读工具，其他需要审批 |
+
+### SubAgentConfig
+
+```typescript
+interface SubAgentConfig {
+  templates?: string[];
+  depth: number;
+  inheritConfig?: boolean;
+  overrides?: {
+    permission?: PermissionConfig;
+    todo?: TodoConfig;
+  };
+}
+```
+
+### TodoConfig
+
+```typescript
+interface TodoConfig {
+  enabled: boolean;
+  remindIntervalSteps?: number;
+  storagePath?: string;
+  reminderOnStart?: boolean;
+}
+```
+
+### SandboxConfig
+
+```typescript
+interface SandboxConfig {
+  kind: SandboxKind;
+  workDir?: string;
+  enforceBoundary?: boolean;
+  allowPaths?: string[];
+  watchFiles?: boolean;
+  [key: string]: any;
+}
+```
+
+### SandboxKind
+
+```typescript
+type SandboxKind = 'local' | 'docker' | 'remote';
+```
+
+---
+
+## Resume 类型
+
+### ResumeStrategy
+
+```typescript
+type ResumeStrategy = 'crash' | 'manual';
+```
+
+| 策略 | 说明 |
+|------|------|
+| `crash` | 自动封口未完成工具，发出 `agent_resumed` 事件 |
+| `manual` | 保持未完成工具不变，手动处理 |
+
+---
+
+## 提醒类型
+
+### ReminderOptions
+
+```typescript
+interface ReminderOptions {
+  skipStandardEnding?: boolean;
+  priority?: 'low' | 'medium' | 'high';
+  category?: 'file' | 'todo' | 'security' | 'performance' | 'general';
+}
+```
+
+---
+
+## 参考资料
+
+- [API 参考](./api.md)
+- [事件参考](./events-reference.md)
diff --git a/examples/05-openrouter-complete.ts b/examples/05-openrouter-complete.ts
index 1e7d1e2..9b1d75e 100644
--- a/examples/05-openrouter-complete.ts
+++ b/examples/05-openrouter-complete.ts
@@ -1,11 +1,15 @@
 import './shared/load-env';
 
-import { OpenRouterProvider, Message } from '../src';
+import { OpenAIProvider, Message } from '../src';
 
+/**
+ * OpenRouter uses an OpenAI-compatible API, so we use OpenAIProvider with
+ * the OpenRouter base URL (https://openrouter.ai/api/v1).
+ */
 async function main() {
   const apiKey = process.env.OPENROUTER_API_KEY;
   const modelId = process.env.OPENROUTER_MODEL_ID;
-  const baseUrl = process.env.OPENROUTER_BASE_URL;
+  const baseUrl = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1';
 
   if (!apiKey) {
     throw new Error('Missing OPENROUTER_API_KEY');
@@ -14,7 +18,8 @@ async function main() {
     throw new Error('Missing OPENROUTER_MODEL_ID (e.g. openai/gpt-4.1-mini, anthropic/claude-3.5-sonnet)');
   }
 
-  const provider = new OpenRouterProvider(apiKey, modelId, baseUrl);
+  // OpenRouter is OpenAI-compatible, use OpenAIProvider with custom baseUrl
+  const provider = new OpenAIProvider(apiKey, modelId, baseUrl);
 
   const messages: Message[] = [
     {
diff --git a/examples/06-openrouter-stream.ts b/examples/06-openrouter-stream.ts
index 3aa6df6..3a4bd5a 100644
--- a/examples/06-openrouter-stream.ts
+++ b/examples/06-openrouter-stream.ts
@@ -1,6 +1,11 @@
 import './shared/load-env';
 
-import { OpenRouterProvider, Message, ModelStreamChunk } from '../src';
+import { OpenAIProvider, Message, ModelStreamChunk } from '../src';
+
+/**
+ * OpenRouter uses an OpenAI-compatible API, so we use OpenAIProvider with
+ * the OpenRouter base URL (https://openrouter.ai/api/v1).
+ */
 
 function chunkToDebugString(chunk: ModelStreamChunk): string {
   if (chunk.type === 'content_block_start') {
@@ -36,12 +41,13 @@ function chunkToDebugString(chunk: ModelStreamChunk): string {
 async function main() {
   const apiKey = process.env.OPENROUTER_API_KEY;
   const modelId = process.env.OPENROUTER_MODEL_ID;
-  const baseUrl = process.env.OPENROUTER_BASE_URL;
+  const baseUrl = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1';
 
   if (!apiKey) throw new Error('Missing OPENROUTER_API_KEY');
   if (!modelId) throw new Error('Missing OPENROUTER_MODEL_ID (e.g. openai/gpt-4.1-mini)');
 
-  const provider = new OpenRouterProvider(apiKey, modelId, baseUrl);
+  // OpenRouter is OpenAI-compatible, use OpenAIProvider with custom baseUrl
+  const provider = new OpenAIProvider(apiKey, modelId, baseUrl);
 
   const messages: Message[] = [
     {
diff --git a/examples/db-postgres.ts b/examples/db-postgres.ts
new file mode 100644
index 0000000..b575faf
--- /dev/null
+++ b/examples/db-postgres.ts
@@ -0,0 +1,217 @@
+/**
+ * PostgreSQL Database Store Example
+ *
+ * Demonstrates:
+ * 1. Using createExtendedStore factory function to create PostgreSQL Store
+ * 2. Connection pool configuration
+ * 3. Query API with JSONB advanced queries
+ * 4. Production environment best practices
+ *
+ * Run: npm run example:db-postgres
+ *
+ * Prerequisites:
+ *   - PostgreSQL database server running
+ *   - Database created (default: kode_agents)
+ *
+ * Environment variables:
+ *   POSTGRES_HOST (default: localhost)
+ *   POSTGRES_PORT (default: 5432)
+ *   POSTGRES_DB (default: kode_agents)
+ *   POSTGRES_USER (default: kode)
+ *   POSTGRES_PASSWORD (required)
+ *
+ * Quick start with Docker:
+ *   docker run --name kode-postgres \
+ *     -e POSTGRES_PASSWORD=kode123 \
+ *     -e POSTGRES_DB=kode_agents \
+ *     -e POSTGRES_USER=kode \
+ *     -p 5432:5432 \
+ *     -d postgres:16-alpine
+ */
+
+import './shared/load-env';
+import * as path from 'path';
+import * as fs from 'fs';
+import {
+  Agent,
+  createExtendedStore,
+  PostgresStore,
+  AnthropicProvider,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+  builtin,
+} from '../src';
+
+async function main() {
+  console.log('=== PostgreSQL Store Example ===\n');
+
+  // Check for required environment variable
+  if (!process.env.POSTGRES_PASSWORD) {
+    console.log('⚠️  POSTGRES_PASSWORD not set.');
+    console.log('');
+    console.log('To run this example, set the following environment variables:');
+    console.log('  export POSTGRES_PASSWORD=your_password');
+    console.log('  export POSTGRES_HOST=localhost       # optional, default: localhost');
+    console.log('  export POSTGRES_PORT=5432            # optional, default: 5432');
+    console.log('  export POSTGRES_DB=kode_agents       # optional, default: kode_agents');
+    console.log('  export POSTGRES_USER=kode            # optional, default: kode');
+    console.log('');
+    console.log('Quick start with Docker:');
+    console.log('  docker run --name kode-postgres \\');
+    console.log('    -e POSTGRES_PASSWORD=kode123 \\');
+    console.log('    -e POSTGRES_DB=kode_agents \\');
+    console.log('    -e POSTGRES_USER=kode \\');
+    console.log('    -p 5432:5432 \\');
+    console.log('    -d postgres:16-alpine');
+    console.log('');
+    console.log('Then run: POSTGRES_PASSWORD=kode123 npm run example:db-postgres');
+    process.exit(0);
+  }
+
+  // Connection configuration
+  const connectionConfig = {
+    host: process.env.POSTGRES_HOST || 'localhost',
+    port: parseInt(process.env.POSTGRES_PORT || '5432'),
+    database: process.env.POSTGRES_DB || 'kode_agents',
+    user: process.env.POSTGRES_USER || 'kode',
+    password: process.env.POSTGRES_PASSWORD,
+    // Connection pool settings (production recommendations)
+    max: 20,                      // Maximum connections in pool
+    idleTimeoutMillis: 30000,     // Close idle connections after 30s
+    connectionTimeoutMillis: 5000, // Connection timeout 5s
+  };
+
+  const storePath = path.join(__dirname, '../.data/postgres-store');
+  fs.mkdirSync(storePath, { recursive: true });
+
+  console.log(`Connecting to PostgreSQL at ${connectionConfig.host}:${connectionConfig.port}/${connectionConfig.database}...`);
+
+  // Method 1: Using factory function (recommended)
+  console.log('\n1. Creating PostgreSQL Store using factory function...');
+  let store: PostgresStore;
+  try {
+    store = createExtendedStore({
+      type: 'postgres',
+      connection: connectionConfig,
+      fileStoreBaseDir: storePath,
+    }) as PostgresStore;
+    console.log('   Store created successfully!\n');
+  } catch (error: any) {
+    console.error('   Failed to connect to PostgreSQL:', error.message);
+    console.log('\n   Make sure PostgreSQL is running and accessible.');
+    process.exit(1);
+  }
+
+  // Method 2: Using class directly (alternative)
+  // const store = new PostgresStore(connectionConfig, storePath);
+
+  // Setup dependencies
+  const templates = new AgentTemplateRegistry();
+  const tools = new ToolRegistry();
+  const sandboxFactory = new SandboxFactory();
+
+  // Register tools
+  for (const tool of [...builtin.fs(), ...builtin.todo()]) {
+    tools.register(tool.name, () => tool);
+  }
+
+  // Register template
+  const modelId = process.env.ANTHROPIC_MODEL_ID || 'claude-sonnet-4-20250514';
+  templates.register({
+    id: 'postgres-demo',
+    systemPrompt: 'You are a helpful assistant. Keep answers concise.',
+    tools: ['fs_read', 'todo_read', 'todo_write'],
+    model: modelId,
+    runtime: { todo: { enabled: true } },
+  });
+
+  // Create provider
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    modelId
+  );
+
+  // Create agent
+  console.log('2. Creating Agent...');
+  const agent = await Agent.create(
+    {
+      templateId: 'postgres-demo',
+      sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+    },
+    {
+      store,
+      templateRegistry: templates,
+      toolRegistry: tools,
+      sandboxFactory,
+      modelFactory: () => provider,
+    }
+  );
+  console.log(`   Agent created: ${agent.agentId}\n`);
+
+  // Subscribe to progress events
+  const progressPromise = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      if (envelope.event.type === 'text_chunk') {
+        process.stdout.write(envelope.event.delta);
+      }
+      if (envelope.event.type === 'done') {
+        console.log('\n');
+        break;
+      }
+    }
+  })();
+
+  // Send a message
+  console.log('3. Sending message...');
+  await agent.send('Hello! What is the capital of France? Answer in one sentence.');
+  await progressPromise;
+
+  // Query API demonstration
+  console.log('4. Demonstrating Query APIs...\n');
+
+  // Query sessions
+  console.log('   [querySessions]');
+  const sessions = await store.querySessions({ limit: 5 });
+  console.log(`   Found ${sessions.length} session(s)`);
+  for (const session of sessions) {
+    console.log(`   - ${session.agentId} (template: ${session.templateId})`);
+  }
+  console.log();
+
+  // Query messages
+  console.log('   [queryMessages]');
+  const messages = await store.queryMessages({ agentId: agent.agentId, limit: 10 });
+  console.log(`   Found ${messages.length} message(s) for this agent`);
+  console.log();
+
+  // Query tool calls
+  console.log('   [queryToolCalls]');
+  const toolCalls = await store.queryToolCalls({ agentId: agent.agentId, limit: 10 });
+  console.log(`   Found ${toolCalls.length} tool call(s) for this agent`);
+  console.log();
+
+  // Aggregate stats
+  console.log('   [aggregateStats]');
+  const stats = await store.aggregateStats(agent.agentId);
+  console.log(`   Total messages: ${stats.totalMessages}`);
+  console.log(`   Total tool calls: ${stats.totalToolCalls}`);
+  if (stats.toolCallsByState) {
+    console.log(`   Tool calls by state:`, stats.toolCallsByState);
+  }
+  console.log();
+
+  // Cleanup
+  console.log('5. Closing database connection pool...');
+  await store.close();
+  console.log('   Done!\n');
+
+  console.log('=== Example Complete ===');
+  console.log(`Connected to: ${connectionConfig.host}:${connectionConfig.port}/${connectionConfig.database}`);
+  console.log(`File store: ${storePath}`);
+}
+
+main().catch((error) => {
+  console.error('Error:', error);
+  process.exit(1);
+});
diff --git a/examples/db-sqlite.ts b/examples/db-sqlite.ts
new file mode 100644
index 0000000..1fd6393
--- /dev/null
+++ b/examples/db-sqlite.ts
@@ -0,0 +1,158 @@
+/**
+ * SQLite Database Store Example
+ *
+ * Demonstrates:
+ * 1. Using createExtendedStore factory function to create SQLite Store
+ * 2. Basic Agent creation and conversation
+ * 3. Query API: querySessions, queryMessages, queryToolCalls, aggregateStats
+ * 4. Database cleanup
+ *
+ * Run: npm run example:db-sqlite
+ * No additional setup required - SQLite is file-based.
+ */
+
+import './shared/load-env';
+import * as path from 'path';
+import * as fs from 'fs';
+import {
+  Agent,
+  createExtendedStore,
+  SqliteStore,
+  AnthropicProvider,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  SandboxFactory,
+  builtin,
+} from '../src';
+
+async function main() {
+  console.log('=== SQLite Store Example ===\n');
+
+  // Setup paths
+  const dbPath = path.join(__dirname, '../.data/example-sqlite.db');
+  const storePath = path.join(__dirname, '../.data/sqlite-store');
+
+  // Ensure directory exists
+  fs.mkdirSync(path.dirname(dbPath), { recursive: true });
+
+  // Method 1: Using factory function (recommended)
+  console.log('1. Creating SQLite Store using factory function...');
+  const store = createExtendedStore({
+    type: 'sqlite',
+    dbPath,
+    fileStoreBaseDir: storePath,
+  }) as SqliteStore;
+  console.log('   Store created successfully!\n');
+
+  // Method 2: Using class directly (alternative)
+  // const store = new SqliteStore(dbPath, storePath);
+
+  // Setup dependencies
+  const templates = new AgentTemplateRegistry();
+  const tools = new ToolRegistry();
+  const sandboxFactory = new SandboxFactory();
+
+  // Register tools
+  for (const tool of [...builtin.fs(), ...builtin.todo()]) {
+    tools.register(tool.name, () => tool);
+  }
+
+  // Register template
+  const modelId = process.env.ANTHROPIC_MODEL_ID || 'claude-sonnet-4-20250514';
+  templates.register({
+    id: 'sqlite-demo',
+    systemPrompt: 'You are a helpful assistant. Keep answers concise.',
+    tools: ['fs_read', 'todo_read', 'todo_write'],
+    model: modelId,
+    runtime: { todo: { enabled: true } },
+  });
+
+  // Create provider
+  const provider = new AnthropicProvider(
+    process.env.ANTHROPIC_API_KEY!,
+    modelId
+  );
+
+  // Create agent
+  console.log('2. Creating Agent...');
+  const agent = await Agent.create(
+    {
+      templateId: 'sqlite-demo',
+      sandbox: { kind: 'local', workDir: './workspace', enforceBoundary: true },
+    },
+    {
+      store,
+      templateRegistry: templates,
+      toolRegistry: tools,
+      sandboxFactory,
+      modelFactory: () => provider,
+    }
+  );
+  console.log(`   Agent created: ${agent.agentId}\n`);
+
+  // Subscribe to progress events
+  const progressPromise = (async () => {
+    for await (const envelope of agent.subscribe(['progress'])) {
+      if (envelope.event.type === 'text_chunk') {
+        process.stdout.write(envelope.event.delta);
+      }
+      if (envelope.event.type === 'done') {
+        console.log('\n');
+        break;
+      }
+    }
+  })();
+
+  // Send a message
+  console.log('3. Sending message...');
+  await agent.send('Hello! What is 2 + 2? Answer briefly.');
+  await progressPromise;
+
+  // Query API demonstration
+  console.log('4. Demonstrating Query APIs...\n');
+
+  // Query sessions
+  console.log('   [querySessions]');
+  const sessions = await store.querySessions({ limit: 5 });
+  console.log(`   Found ${sessions.length} session(s)`);
+  for (const session of sessions) {
+    console.log(`   - ${session.agentId} (template: ${session.templateId})`);
+  }
+  console.log();
+
+  // Query messages
+  console.log('   [queryMessages]');
+  const messages = await store.queryMessages({ agentId: agent.agentId, limit: 10 });
+  console.log(`   Found ${messages.length} message(s) for this agent`);
+  console.log();
+
+  // Query tool calls
+  console.log('   [queryToolCalls]');
+  const toolCalls = await store.queryToolCalls({ agentId: agent.agentId, limit: 10 });
+  console.log(`   Found ${toolCalls.length} tool call(s) for this agent`);
+  console.log();
+
+  // Aggregate stats
+  console.log('   [aggregateStats]');
+  const stats = await store.aggregateStats(agent.agentId);
+  console.log(`   Total messages: ${stats.totalMessages}`);
+  console.log(`   Total tool calls: ${stats.totalToolCalls}`);
+  if (stats.toolCallsByState) {
+    console.log(`   Tool calls by state:`, stats.toolCallsByState);
+  }
+  console.log();
+
+  // Cleanup
+  console.log('5. Closing database connection...');
+  await store.close();
+  console.log('   Done!\n');
+
+  console.log('=== Example Complete ===');
+  console.log(`Database file: ${dbPath}`);
+  console.log(`Store directory: ${storePath}`);
+}
+
+main().catch((error) => {
+  console.error('Error:', error);
+  process.exit(1);
+});
diff --git a/examples/tooling/fs-playground.ts b/examples/tooling/fs-playground.ts
index d5e1c95..8e70428 100644
--- a/examples/tooling/fs-playground.ts
+++ b/examples/tooling/fs-playground.ts
@@ -5,22 +5,31 @@ import {
   AnthropicProvider,
   JSONStore,
   SandboxFactory,
-  TemplateRegistry,
+  AgentTemplateRegistry,
   ToolRegistry,
   builtin,
-} from 'kode-sdk';
+} from '../../src';
 
 async function runFsDemo() {
   const store = new JSONStore('./.kode');
-  const templates = new TemplateRegistry();
+  const templates = new AgentTemplateRegistry();
   const tools = new ToolRegistry();
   const sandboxFactory = new SandboxFactory();
 
-  builtin.registerAll(tools);
+  // Register builtin tools
+  for (const tool of builtin.fs()) {
+    tools.register(tool.name, () => tool);
+  }
+  for (const tool of builtin.bash()) {
+    tools.register(tool.name, () => tool);
+  }
+  for (const tool of builtin.todo()) {
+    tools.register(tool.name, () => tool);
+  }
 
   templates.register({
     id: 'fs-demo',
-    desc: 'Filesystem playground',
+    systemPrompt: 'Filesystem playground assistant',
     tools: ['fs_read', 'fs_write', 'fs_edit', 'fs_glob', 'fs_grep', 'fs_multi_edit'],
   });
 
@@ -42,7 +51,7 @@ async function runFsDemo() {
 
   await agent.send('请使用 fs_glob 列出 src/**/*.ts 再用 fs_grep 找到包含 TODO 的文件');
 
-  for await (const event of agent.chatStream('执行上述操作并总结结果')) {
+  for await (const event of agent.stream('执行上述操作并总结结果')) {
     if (event.event.type === 'text_chunk') {
       process.stdout.write(event.event.delta);
     }
diff --git a/package-lock.json b/package-lock.json
index 1d8a384..3e9b25b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -108,6 +108,7 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
       "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
@@ -200,6 +201,7 @@
       "integrity": "sha512-N2clP5pJhB2YnZJ3PIHFk5RkygRX5WO/5f0WC08tp0wd+sv0rsJk3MqWn3CbNmT2J505a5336jaQj4ph1AdMug==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~6.21.0"
       }
@@ -726,6 +728,7 @@
       "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
       "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "accepts": "^2.0.0",
         "body-parser": "^2.2.1",
@@ -1333,6 +1336,7 @@
       "resolved": "https://registry.npmmirror.com/pg/-/pg-8.17.2.tgz",
       "integrity": "sha512-vjbKdiBJRqzcYw1fNU5KuHyYvdJ1qpcQg1CeBrHFqV1pWgHeVR6j/+kX0E1AAXfyuLUGY1ICrN2ELKA/z2HWzw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "pg-connection-string": "^2.10.1",
         "pg-pool": "^3.11.0",
@@ -2060,6 +2064,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -2160,6 +2165,7 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
       "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/package.json b/package.json
index 4581ce9..1a21012 100644
--- a/package.json
+++ b/package.json
@@ -24,7 +24,9 @@
     "example:nextjs": "ts-node examples/nextjs-api-route.ts",
     "example:openrouter": "ts-node examples/05-openrouter-complete.ts",
     "example:openrouter-stream": "ts-node examples/06-openrouter-stream.ts",
-    "example:openrouter-agent": "ts-node examples/07-openrouter-agent.ts"
+    "example:openrouter-agent": "ts-node examples/07-openrouter-agent.ts",
+    "example:db-sqlite": "ts-node examples/db-sqlite.ts",
+    "example:db-postgres": "ts-node examples/db-postgres.ts"
   },
   "keywords": [
     "agent",
diff --git a/quickstart.sh b/quickstart.sh
index f41ba9d..0e6e462 100755
--- a/quickstart.sh
+++ b/quickstart.sh
@@ -1,61 +1,74 @@
 #!/bin/bash
 
-# Kode SDK v1.5.1 - Quick Start Script
+# KODE SDK v2.7.0 - Quick Start Script
 
-echo "🚀 Kode SDK v1.5.1 Quick Start"
+echo "KODE SDK v2.7.0 Quick Start"
 echo ""
 
 # Check Node.js version
 if ! command -v node &> /dev/null; then
-    echo "❌ Node.js is not installed. Please install Node.js 18+ first."
+    echo "Node.js is not installed. Please install Node.js 18+ first."
     exit 1
 fi
 
 NODE_VERSION=$(node -v | cut -d'v' -f2 | cut -d'.' -f1)
 if [ "$NODE_VERSION" -lt 18 ]; then
-    echo "❌ Node.js version must be 18 or higher. Current: $(node -v)"
+    echo "Node.js version must be 18 or higher. Current: $(node -v)"
     exit 1
 fi
 
-echo "✅ Node.js $(node -v) detected"
+echo "Node.js $(node -v) detected"
 echo ""
 
 # Install dependencies
-echo "📦 Installing dependencies..."
+echo "Installing dependencies..."
 npm install
 
 # Build the project
-echo "🔨 Building TypeScript..."
+echo "Building TypeScript..."
 npm run build
 
 if [ $? -ne 0 ]; then
-    echo "❌ Build failed. Please check for errors above."
+    echo "Build failed. Please check for errors above."
     exit 1
 fi
 
-echo "✅ Build successful!"
+echo "Build successful!"
 echo ""
 
 # Check for API key
 if [ -z "$ANTHROPIC_API_KEY" ]; then
-    echo "⚠️  Warning: ANTHROPIC_API_KEY environment variable is not set."
-    echo "   Please set it to run examples:"
-    echo "   export ANTHROPIC_API_KEY=your_key_here"
+    echo "Warning: ANTHROPIC_API_KEY environment variable is not set."
+    echo "  Please set it to run examples:"
+    echo "  export ANTHROPIC_API_KEY=your_key_here"
     echo ""
 fi
 
-echo "📚 Available examples:"
-echo "   npm run example:u1  - Next.js backend (send + subscribe)"
-echo "   npm run example:u2  - Permission approval flow"
-echo "   npm run example:u3  - Hook for path guard and result trimming"
-echo "   npm run example:u4  - Scheduler with time and step triggers"
-echo "   npm run example:u5  - Sub-agent task delegation"
-echo "   npm run example:u6  - Room group chat"
-echo "   npm run example:u7  - ChatDev team collaboration"
+echo "Available examples:"
+echo ""
+echo "  Getting Started:"
+echo "    npm run example:getting-started    - Minimal chat example"
+echo ""
+echo "  Providers:"
+echo "    npm run example:openai             - OpenAI provider usage"
+echo "    npm run example:gemini             - Gemini provider usage"
+echo "    npm run example:openrouter         - OpenRouter complete example"
+echo "    npm run example:openrouter-stream  - OpenRouter streaming"
+echo "    npm run example:openrouter-agent   - OpenRouter agent with tools"
+echo ""
+echo "  Features:"
+echo "    npm run example:agent-inbox        - Event-driven inbox"
+echo "    npm run example:approval           - Tool approval workflow"
+echo "    npm run example:room               - Multi-agent collaboration"
+echo "    npm run example:scheduler          - Scheduler with triggers"
+echo "    npm run example:nextjs             - Next.js API route"
+echo ""
+echo "  Database:"
+echo "    npm run example:db-sqlite          - SQLite persistence"
+echo "    npm run example:db-postgres        - PostgreSQL persistence"
 echo ""
 
-echo "📖 Documentation: README.md"
-echo "🔍 Implementation details: IMPLEMENTATION_SUMMARY.md"
+echo "Documentation: docs/en/ or docs/zh-CN/"
 echo ""
 
-echo "✨ Kode SDK is ready! Happy coding! ✨"
+echo "KODE SDK is ready! Happy coding!"
diff --git a/src/index.ts b/src/index.ts
index 1609c8a..e34f123 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -67,7 +67,9 @@ export {
 } from './core/errors';
 
 // Infrastructure
-export { Store, JSONStore } from './infra/store';
+export { Store, JSONStore, createStore, createExtendedStore } from './infra/store';
+export { SqliteStore } from './infra/db/sqlite/sqlite-store';
+export { PostgresStore } from './infra/db/postgres/postgres-store';
 export { Sandbox, LocalSandbox, SandboxKind } from './infra/sandbox';
 export {
   ModelProvider,
diff --git a/tests/unit/core/pool-shutdown.test.ts b/tests/unit/core/pool-shutdown.test.ts
index fd04c66..cc5cd27 100644
--- a/tests/unit/core/pool-shutdown.test.ts
+++ b/tests/unit/core/pool-shutdown.test.ts
@@ -1,163 +1,191 @@
 /**
  * Tests for AgentPool graceful shutdown functionality
  */
-import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
-import { AgentPool, GracefulShutdownOptions, ShutdownResult } from '../../../src/core/pool';
-import { Agent } from '../../../src/core/agent';
-import { JSONStore } from '../../../src/infra/store/json-store';
-import * as fs from 'fs';
-import * as path from 'path';
-import * as os from 'os';
-
-describe('AgentPool Graceful Shutdown', () => {
-  let pool: AgentPool;
-  let store: JSONStore;
-  let testDir: string;
-
-  beforeEach(async () => {
-    testDir = path.join(os.tmpdir(), `kode-pool-test-${Date.now()}`);
-    fs.mkdirSync(testDir, { recursive: true });
-    store = new JSONStore(testDir);
-
-    // Mock dependencies
-    const mockProvider = {
-      chat: vi.fn().mockResolvedValue({
-        role: 'assistant',
-        content: [{ type: 'text', text: 'Hello!' }],
-      }),
-      stream: vi.fn(),
-    };
 
-    pool = new AgentPool({
-      dependencies: {
-        store,
-        modelProvider: mockProvider as any,
-        sandbox: { run: vi.fn() } as any,
-      },
-      maxAgents: 10,
-    });
+import path from 'path';
+import fs from 'fs';
+import os from 'os';
+import {
+  AgentPool,
+  JSONStore,
+  SandboxFactory,
+  AgentTemplateRegistry,
+  ToolRegistry,
+  AgentConfig,
+} from '../../../src';
+import { Agent } from '../../../src/core/agent';
+import { MockProvider } from '../../mock-provider';
+import { TestRunner, expect } from '../../helpers/utils';
+
+const runner = new TestRunner('AgentPool Graceful Shutdown');
+
+let pool: AgentPool;
+let store: JSONStore;
+let testDir: string;
+
+function createMockAgent(state: 'READY' | 'WORKING' | 'PAUSED' = 'READY') {
+  let interruptCalled = false;
+  return {
+    status: async () => ({ state }),
+    interrupt: async (_opts?: { note?: string }) => {
+      interruptCalled = true;
+    },
+    get interruptCalled() {
+      return interruptCalled;
+    },
+  } as unknown as Agent & { interruptCalled: boolean };
+}
+
+async function setupPool() {
+  testDir = path.join(os.tmpdir(), `kode-pool-test-${Date.now()}`);
+  fs.mkdirSync(testDir, { recursive: true });
+  store = new JSONStore(testDir);
+
+  const templates = new AgentTemplateRegistry();
+  const tools = new ToolRegistry();
+  const sandboxFactory = new SandboxFactory();
+
+  templates.register({
+    id: 'test-agent',
+    systemPrompt: 'Test agent',
   });
 
-  afterEach(async () => {
-    try {
-      fs.rmSync(testDir, { recursive: true, force: true });
-    } catch {
-      // Ignore cleanup errors
-    }
+  pool = new AgentPool({
+    dependencies: {
+      store,
+      templateRegistry: templates,
+      sandboxFactory,
+      toolRegistry: tools,
+      modelFactory: () => new MockProvider([{ text: 'Hello!' }]),
+    },
+    maxAgents: 10,
   });
+}
 
-  describe('gracefulShutdown', () => {
-    it('should return empty result when pool is empty', async () => {
-      const result = await pool.gracefulShutdown();
+function cleanupPool() {
+  try {
+    fs.rmSync(testDir, { recursive: true, force: true });
+  } catch {
+    // Ignore cleanup errors
+  }
+}
 
-      expect(result.completed).toEqual([]);
-      expect(result.interrupted).toEqual([]);
-      expect(result.failed).toEqual([]);
-      expect(result.durationMs).toBeGreaterThanOrEqual(0);
-    });
+runner
+  .beforeEach(setupPool)
+  .afterEach(cleanupPool)
 
-    it('should save running agents list when saveRunningList is true', async () => {
-      // Create and add an agent to the pool
-      const mockAgent = {
-        status: vi.fn().mockResolvedValue({ state: 'READY' }),
-        interrupt: vi.fn(),
-      } as unknown as Agent;
+  .test('gracefulShutdown - should return empty result when pool is empty', async () => {
+    const result = await pool.gracefulShutdown();
 
-      (pool as any).agents.set('test-agent-1', mockAgent);
+    expect.toDeepEqual(result.completed, []);
+    expect.toDeepEqual(result.interrupted, []);
+    expect.toDeepEqual(result.failed, []);
+    expect.toBeGreaterThanOrEqual(result.durationMs, 0);
+  })
 
-      const result = await pool.gracefulShutdown({ saveRunningList: true });
+  .test('gracefulShutdown - should save running agents list when saveRunningList is true', async () => {
+    const mockAgent = createMockAgent('READY');
+    (pool as any).agents.set('test-agent-1', mockAgent);
 
-      expect(result.completed).toContain('test-agent-1');
+    const result = await pool.gracefulShutdown({ saveRunningList: true });
 
-      // Verify running agents list was saved
-      const savedInfo = await store.loadInfo('__pool_meta__');
-      expect(savedInfo).toBeDefined();
-      expect((savedInfo as any).runningAgents.agentIds).toContain('test-agent-1');
-    });
+    expect.toContain(result.completed, 'test-agent-1');
 
-    it('should not save running agents list when saveRunningList is false', async () => {
-      const mockAgent = {
-        status: vi.fn().mockResolvedValue({ state: 'READY' }),
-        interrupt: vi.fn(),
-      } as unknown as Agent;
+    // Verify running agents list was saved
+    const savedInfo = await store.loadInfo('__pool_meta__');
+    expect.toBeTruthy(savedInfo);
+    expect.toContain((savedInfo as any).runningAgents.agentIds, 'test-agent-1');
+  })
 
-      (pool as any).agents.set('test-agent-2', mockAgent);
+  .test('gracefulShutdown - should not save running agents list when saveRunningList is false', async () => {
+    const mockAgent = createMockAgent('READY');
+    (pool as any).agents.set('test-agent-2', mockAgent);
 
-      await pool.gracefulShutdown({ saveRunningList: false });
+    await pool.gracefulShutdown({ saveRunningList: false });
 
-      // Verify running agents list was NOT saved
-      const savedInfo = await store.loadInfo('__pool_meta__');
-      expect(savedInfo).toBeUndefined();
-    });
+    // Verify running agents list was NOT saved
+    const savedInfo = await store.loadInfo('__pool_meta__');
+    expect.toBeFalsy(savedInfo);
+  })
 
-    it('should interrupt working agents after timeout', async () => {
-      const interruptMock = vi.fn().mockResolvedValue(undefined);
-      const mockAgent = {
-        status: vi.fn().mockResolvedValue({ state: 'WORKING' }),
-        interrupt: interruptMock,
-      } as unknown as Agent;
+  .test('gracefulShutdown - should interrupt working agents after timeout', async () => {
+    const mockAgent = createMockAgent('WORKING');
+    (pool as any).agents.set('working-agent', mockAgent);
 
-      (pool as any).agents.set('working-agent', mockAgent);
+    const result = await pool.gracefulShutdown({
+      timeout: 100, // Very short timeout
+      forceInterrupt: true,
+    });
 
-      const result = await pool.gracefulShutdown({
-        timeout: 100, // Very short timeout
-        forceInterrupt: true,
-      });
+    expect.toBeTruthy(mockAgent.interruptCalled);
+    expect.toContain(result.interrupted, 'working-agent');
+  })
 
-      expect(interruptMock).toHaveBeenCalledWith({ note: 'Graceful shutdown timeout' });
-      expect(result.interrupted).toContain('working-agent');
+  .test('resumeFromShutdown - should return empty array when no running agents list exists', async () => {
+    const configFactory = (agentId: string): AgentConfig => ({
+      agentId,
+      templateId: 'test-agent',
     });
-  });
 
-  describe('resumeFromShutdown', () => {
-    it('should return empty array when no running agents list exists', async () => {
-      const configFactory = (agentId: string) => ({
-        agentId,
-        template: { systemPrompt: 'test' },
-      });
-
-      const resumed = await pool.resumeFromShutdown(configFactory);
+    const resumed = await pool.resumeFromShutdown(configFactory);
+
+    expect.toDeepEqual(resumed, []);
+  })
+
+  .test('resumeFromShutdown - should clear running agents list after resume', async () => {
+    // Manually save a running agents list
+    await store.saveInfo('__pool_meta__', {
+      agentId: '__pool_meta__',
+      templateId: '__pool_meta__',
+      createdAt: new Date().toISOString(),
+      runningAgents: {
+        agentIds: ['non-existent-agent'],
+        shutdownAt: new Date().toISOString(),
+        version: '1.0.0',
+      },
+    } as any);
 
-      expect(resumed).toEqual([]);
+    const configFactory = (agentId: string): AgentConfig => ({
+      agentId,
+      templateId: 'test-agent',
     });
 
-    it('should clear running agents list after resume', async () => {
-      // Manually save a running agents list
-      await store.saveInfo('__pool_meta__', {
-        agentId: '__pool_meta__',
-        templateId: '__pool_meta__',
-        createdAt: new Date().toISOString(),
-        runningAgents: {
-          agentIds: ['non-existent-agent'],
-          shutdownAt: new Date().toISOString(),
-          version: '1.0.0',
-        },
-      } as any);
-
-      const configFactory = (agentId: string) => ({
-        agentId,
-        template: { systemPrompt: 'test' },
-      });
-
-      // Resume will fail for non-existent agent, but should still clear the list
-      await pool.resumeFromShutdown(configFactory);
-
-      // Verify the list was cleared
-      const savedInfo = await store.loadInfo('__pool_meta__');
-      expect(savedInfo).toBeUndefined();
-    });
-  });
+    // Resume will fail for non-existent agent, but should still clear the list
+    await pool.resumeFromShutdown(configFactory);
+
+    // Verify the list was cleared
+    const savedInfo = await store.loadInfo('__pool_meta__');
+    expect.toBeFalsy(savedInfo);
+  })
+
+  .test('registerShutdownHandlers - should register SIGTERM and SIGINT handlers', async () => {
+    const handlers: Map<string, Function> = new Map();
+    const originalOn = process.on.bind(process);
 
-  describe('registerShutdownHandlers', () => {
-    it('should register SIGTERM and SIGINT handlers', () => {
-      const onSpy = vi.spyOn(process, 'on');
+    // Mock process.on
+    (process as any).on = (event: string, handler: Function) => {
+      handlers.set(event, handler);
+      return process;
+    };
 
+    try {
       pool.registerShutdownHandlers();
 
-      expect(onSpy).toHaveBeenCalledWith('SIGTERM', expect.any(Function));
-      expect(onSpy).toHaveBeenCalledWith('SIGINT', expect.any(Function));
+      expect.toBeTruthy(handlers.has('SIGTERM'));
+      expect.toBeTruthy(handlers.has('SIGINT'));
+    } finally {
+      // Restore original
+      (process as any).on = originalOn;
+    }
+  });
+
+export async function run() {
+  return await runner.run();
+}
 
-      onSpy.mockRestore();
-    });
+if (require.main === module) {
+  run().catch((err) => {
+    console.error(err);
+    process.exitCode = 1;
   });
-});
+}