diff --git a/src/content/docs/agents/agent-system/api-reference.mdx b/src/content/docs/agents/agent-system/api-reference.mdx new file mode 100644 index 00000000000000..52cdec3cae1c11 --- /dev/null +++ b/src/content/docs/agents/agent-system/api-reference.mdx @@ -0,0 +1,748 @@ +--- +title: API Reference +pcx_content_type: reference +sidebar: + order: 3 +--- + +import { TypeScriptExample } from "~/components"; + +This reference covers the TypeScript/Worker API and the HTTP surface for the Agent System. + +--- + +## Module: `agents/sys` + +Everything in `packages/agents/src/sys` is re-exported from this entrypoint. + +### AgentSystem + + +```ts +class AgentSystem> { + constructor(options: AgentSystemOptions); + + defaults(): AgentSystem; + + addTool(handler: ToolHandler, tags?: string[]): AgentSystem; + + use( + mw: AgentMiddleware, + tags?: string[] + ): AgentSystem; + + addAgent(blueprint: AgentBlueprint>): AgentSystem; + + export(): { + SystemAgent: typeof SystemAgent; + Agency: typeof Agency; + handler: ReturnType; + }; +} +``` + + +Options: + + +```ts +type AgentSystemOptions = { + defaultModel: string; // e.g. "openai:gpt-4.1-mini" + provider?: Provider; // optional custom provider + handlerOptions?: HandlerOptions; +}; +``` + + +- `defaultModel` – Fallback model ID for agents that do not set `blueprint.model`. +- `provider` – Optional `Provider` implementation. If omitted, OpenAI is used via `makeOpenAI(LLM_API_KEY, LLM_API_BASE)`. +- `handlerOptions` – Passed into `createHandler` (refer to [Worker handler](#worker-handler) below). + +`defaults()` registers the stock middleware: + +- `planning`, `filesystem`, `subagents` (all tagged `"default"` plus their own tags) + +`addTool(handler, tags?)`: + +- Registers a **global** tool. It will be available to any blueprint whose `tags` intersect with the tags you pass here. + +`use(middleware, tags?)`: + +- Registers middleware globally with tags equal to the union of `tags` and `middleware.tags`. + +`addAgent(blueprint)`: + +- Registers an `AgentBlueprint` by name in the internal registry. +- These become the static defaults for agencies. + +`export()`: + +- Returns: + - `SystemAgent` – Configured DO class + - `Agency` – DO class for agencies + - `handler` – HTTP handler (to be `export default handler`) + +--- + +### SystemAgent + +Base class: + + +```ts +abstract class SystemAgent extends Agent { + // Persisted state + readonly info: Info; + readonly runState: RunState; + readonly store: Store; + + abstract get blueprint(): AgentBlueprint; + abstract get middleware(): AgentMiddleware[]; + abstract get tools(): Record; + abstract get systemPrompt(): string; + abstract get model(): string; + abstract get config(): AgentConfig; + abstract get provider(): Provider; + + get messages(): ChatMessage[]; + get mwContext(): MWContext; + get isPaused(): boolean; + get isWaitingSubagents(): boolean; + get isDone(): boolean; + + emit(type: AgentEventType, data: unknown): void; + + // HTTP entry points + protected async onRequest(req: Request): Promise; + protected abstract onDone(ctx: { + agent: SystemAgent; + final: string; + }): Promise; +} +``` + + +Configured version: + +- `AgentSystem.export()` returns a concrete subclass of `SystemAgent` that: + - Resolves `blueprint`, `middleware`, `tools`, `model`, `config`, `provider` + - Wires events to the `Provider` + - Implements `onRegister` to pull dynamic blueprints from the `Agency` DO + +You typically do not subclass this yourself; you just consume the class exported by `AgentSystem`. + +--- + +### Agent blueprints + + +```ts +type AgentBlueprint> = { + name: string; + description: string; + prompt: string; + tags: string[]; + model?: string; + config?: AgentConfig; +}; +``` + + +- `name` – Identifier used as `agentType` +- `description` – Human-readable description shown in the dashboard +- `prompt` – System prompt used for this agent +- `tags` – Used to select middleware and tools +- `model` – Per-agent model override +- `config` – Configuration blob consumed by middleware + +The blueprint for an agent thread can come from: + +1. Static registration via `AgentSystem.addAgent(...)` +2. Dynamic override in the `Agency` DO via `POST /agency/:id/blueprints` + +--- + +### Middleware + +Middleware type: + + +```ts +interface AgentMiddleware { + name: string; + tags: string[]; + + // Attach extra state to AgentState + state?(ctx: MWContext): Record; + + // Lifecycle hooks + onInit?(ctx: MWContext): Promise; + onTick?(ctx: MWContext): Promise; + beforeModel?(ctx: MWContext, plan: ModelPlanBuilder): Promise; + onModelResult?(ctx: MWContext, res: { message: ChatMessage }): Promise; + + onToolStart?(ctx: MWContext, call: ToolCall): Promise; + onToolResult?(ctx: MWContext, call: ToolCall, result: unknown): Promise; + onToolError?(ctx: MWContext, call: ToolCall, error: Error): Promise; + + onResume?(ctx: MWContext, reason: string, payload: unknown): Promise; + onChildReport?( + ctx: MWContext, + child: { threadId: string; token: string; report?: string } + ): Promise; +} +``` + + +Helper: + + +```ts +function defineMiddleware( + mw: Omit, "__configType"> +): AgentMiddleware; +``` + + +The `MWContext`: + + +```ts +type MWContext = { + provider: Provider; + agent: SystemAgent; + registerTool: (handler: ToolHandler) => void; +}; +``` + + +Use `ctx.registerTool` when your middleware wants to attach tools dynamically (for example, `planning` registers `write_todos` this way). + +--- + +### Tools + +Tool handler type: + + +```ts +type ToolHandler = (( + input: any, + ctx: ToolContext +) => Promise) & { __tool?: ToolMeta }; +``` + + +Where: + + +```ts +type ToolMeta = { + name: string; + description?: string; + parameters?: ToolJsonSchema; +}; + +type ToolContext = { + agent: SystemAgent; + env: typeof env; // Cloudflare worker env binding + callId: string; // tool call ID from the LLM +}; +``` + + +Helpers: + + +```ts +function defineTool(meta: ToolMeta, handler: ToolHandler): ToolHandler; +function getToolMeta(fn: ToolHandler, fallbackName?: string): ToolMeta | null; +``` + + +`defineTool` attaches metadata on `handler.__tool`. This metadata is used: + +- To build the tool definitions passed into the LLM (`ModelRequest.toolDefs`) +- To expose the tools in `AgentState.tools` for the dashboard + +Return value semantics: + +- `string | object` → Becomes a tool result message (`role: "tool"`) attached to that `toolCallId` +- `null` → Means "this tool does not produce a direct message"; used by the `task` tool (subagent spawner) + +--- + +### Built-in middleware and tools + +#### planning + +Exports: + +- `planning: AgentMiddleware` +- Adds `write_todos` tool +- Adds `WRITE_TODOS_SYSTEM_PROMPT` to the system prompt +- Persists todos in a `todos` table +- Exposes `state.todos: Todo[]` + +Schema for `write_todos`: + + +```ts +type Todo = { + content: string; + status: "pending" | "in_progress" | "completed"; +}; +``` + + +#### filesystem + +Exports: + +- `filesystem: AgentMiddleware` + +Registers tools: + +- `ls()` – List file paths in the virtual filesystem +- `read_file({ path, offset?, limit? })` +- `write_file({ path, content })` +- `edit_file({ path, oldString, newString, replaceAll? })` + +Also: + +- Creates `files` table in SQLite (path → content) +- Exposes `state.files: Record` +- Tracks `lastReadPaths` in KV and enforces "must read before edit" + +#### subagents + +Exports: + +- `subagents: AgentMiddleware` + +Config type: + + +```ts +type SubagentsConfig = { + subagents?: { + subagents: AgentBlueprint[]; // list of available subagent blueprints + }; +}; +``` + + +Adds: + +- `TASK_SYSTEM_PROMPT` to the system prompt +- `task` tool: + + +```ts +type TaskInput = { + description: string; + subagentType: string; + timeoutMs?: number; +}; +``` + + +Behavior is described in the [Architecture documentation](/agents/agent-system/architecture/). + +It also: + +- Emits `SUBAGENT_SPAWNED` when a child is launched +- Pauses the parent run with `reason: "subagent"` +- Updates `state.subagents: SubagentLink[]` via `Store.listSubagentLinks()` + +#### hitl + +Exports: + +- `hitl: AgentMiddleware` + +Config: + + +```ts +type HitlConfig = { + hitl?: { + tools: string[]; // names of tools that require approval + }; +}; +``` + + +If the model proposes a tool call whose name is in `tools`, the middleware: + +- Sets `runState.status = "paused"` and `reason = "hitl"` +- Emits `RUN_PAUSED` +- The dashboard shows HITL buttons that call `/approve` + +--- + +### Providers + +Exports: + + +```ts +interface Provider { + invoke( + req: ModelRequest, + opts: { signal?: AbortSignal } + ): Promise; + stream( + req: ModelRequest, + onDelta: (chunk: string) => void + ): Promise; +} + +type ModelResult = { + message: ChatMessage; // assistant message (may include toolCalls) + usage?: { promptTokens: number; completionTokens: number; costUsd?: number }; +}; + +function parseModel(modelId: string): string; +function makeOpenAI(apiKey: string, baseUrl?: string): Provider; +function makeAnthropic(baseUrl: string, apiKey: string): Provider; +function makeWorkersAI(ai: unknown): Provider; +``` + + +- `parseModel` lets you pass IDs like `"openai:gpt-4.1-mini"`; it strips the prefix before sending to the provider. +- `makeOpenAI` adapts the internal `ModelRequest` and `ChatMessage` format to OpenAI Chat Completions. +- `makeAnthropic` and `makeWorkersAI` are placeholders in the current code (they return a dummy `"Hello, world!"` response). + +`SystemAgent.provider` wraps any `Provider` with event emission: + +- Emits `MODEL_STARTED` before calling the provider +- Emits `MODEL_COMPLETED` after + +--- + +### Worker handler + +Exports from `agents/sys/worker`: + + +```ts +type HandlerOptions = { + baseUrl?: string; // currently unused + secret?: string; // optional shared secret for X-SECRET auth + agentDefinitions?: AgentBlueprint[]; // static blueprints +}; + +type HandlerEnv = { + SYSTEM_AGENT: DurableObjectNamespace; + AGENCY: DurableObjectNamespace; + AGENCY_REGISTRY: KVNamespace; +}; + +function createHandler(opts?: HandlerOptions): { + fetch( + req: Request, + env: HandlerEnv, + ctx: ExecutionContext + ): Promise; +}; +``` + + +`AgentSystem.export()` calls `createHandler` for you and injects `agentDefinitions` from its internal agent registry if you did not set them. + +HTTP routes handled: + +- `GET /` – Serves `client.html` dashboard +- `GET /agencies` – List agencies from `AGENCY_REGISTRY` +- `POST /agencies` – Create a new Agency DO and store metadata + +Per agency: + +- `GET /agency/:agencyId/blueprints` – Static and dynamic blueprints +- `POST /agency/:agencyId/blueprints` – Write blueprint into Agency DO +- `GET /agency/:agencyId/agents` – List agent threads +- `POST /agency/:agencyId/agents` – Create new agent thread; injects request context (`ThreadRequestContext`) + +Per agent thread: + +- `POST /agency/:agencyId/agent/:agentId/invoke` + - Forwards to DO `/invoke`, injecting `threadId` into the body + +- `GET /agency/:agencyId/agent/:agentId/state` → DO `/state` +- `GET /agency/:agencyId/agent/:agentId/events` → DO `/events` +- `POST /agency/:agencyId/agent/:agentId/approve` → DO `/approve` +- `POST /agency/:agencyId/agent/:agentId/cancel` → DO `/cancel` +- `GET /agency/:agencyId/agent/:agentId/ws` – WebSocket for live events (implemented in the base `Agent` class) + +Auth: + +- If `opts.secret` is set, all non-`GET /` requests must include `X-SECRET: ` or they get `401`. + +--- + +## Types + +### Messages and threads + + +```ts +type ChatMessage = + | { role: "system" | "user" | "assistant"; content: string } + | { role: "assistant"; toolCalls?: ToolCall[] } + | { role: "tool"; content: string; toolCallId: string }; + +type ToolCall = { + name: string; + args: unknown; + id: string; +}; + +type ThreadRequestContext = { + userAgent?: string; + ip?: string; + referrer?: string; + origin?: string; + cf?: Record; // colo, country, city, region, timezone, postalCode, asOrganization +}; + +type ParentInfo = { + threadId: string; + token: string; +}; + +interface ThreadMetadata { + id: string; + createdAt: string; + request: ThreadRequestContext; + parent?: ParentInfo; + agentType: string; + agencyId: string; +} + +interface InvokeBody { + threadId?: string; + messages?: ChatMessage[]; + files?: Record; + idempotencyKey?: string; + agentType?: string; + parent?: ParentInfo; +} +``` + + +`AgentState` (what `/state` returns): + + +```ts +type AgentState = { + messages: ChatMessage[]; + tools: ToolMeta[]; + thread: ThreadMetadata; + threadId?: string; + parent?: ParentInfo; + agentType?: string; + model?: string; +} & Record; // middleware injects more (todos, files, subagents, ...) +``` + + +### Subagents + + +```ts +type SubagentLinkStatus = "waiting" | "completed" | "canceled"; + +interface SubagentLink { + childThreadId: string; + token: string; + status: SubagentLinkStatus; + createdAt: number; + completedAt?: number; + report?: string; + toolCallId?: string; +} +``` + + +When `subagents` middleware is active, `state.subagents` is a `SubagentLink[]`. + +### Events + + +```ts +enum AgentEventType { + THREAD_CREATED = "thread.created", + REQUEST_ACCEPTED = "request.accepted", + RUN_STARTED = "run.started", + RUN_TICK = "run.tick", + RUN_PAUSED = "run.paused", + RUN_RESUMED = "run.resumed", + RUN_CANCELED = "run.canceled", + AGENT_STARTED = "agent.started", + AGENT_COMPLETED = "agent.completed", + AGENT_ERROR = "agent.error", + CHECKPOINT_SAVED = "checkpoint.saved", + MODEL_STARTED = "model.started", + MODEL_DELTA = "model.delta", + MODEL_COMPLETED = "model.completed", + MIDDLEWARE_BEFORE_MODEL = "middleware.before_model", + MIDDLEWARE_AFTER_MODEL = "middleware.after_model", + TOOL_STARTED = "tool.started", + TOOL_OUTPUT = "tool.output", + TOOL_ERROR = "tool.error", + HITL_INTERRUPT = "hitl.interrupt", + HITL_RESUME = "hitl.resume", + SUBAGENT_SPAWNED = "subagent.spawned", + SUBAGENT_COMPLETED = "subagent.completed" +} + +type AgentEvent = { + threadId: string; + ts: string; + seq?: number; +} & AgentEventData; + +type AgentEventData = + | { type: AgentEventType.THREAD_CREATED; data: { threadId: string } } + | { type: AgentEventType.REQUEST_ACCEPTED; data: { idempotencyKey: string } } + | { type: AgentEventType.RUN_STARTED; data: { runId: string } } + | { type: AgentEventType.RUN_TICK; data: { runId: string; step: number } } + | { + type: AgentEventType.RUN_PAUSED; + data: { + runId: string; + reason: "hitl" | "error" | "exhausted" | "subagent"; + }; + } + | { type: AgentEventType.RUN_RESUMED; data: { runId: string } } + | { type: AgentEventType.RUN_CANCELED; data: { runId: string } } + | { type: AgentEventType.AGENT_STARTED; data: Record } + | { type: AgentEventType.AGENT_COMPLETED; data: { result?: unknown } } + | { + type: AgentEventType.AGENT_ERROR; + data: { error: string; stack?: string }; + } + | { + type: AgentEventType.CHECKPOINT_SAVED; + data: { stateHash: string; size: number }; + } + | { type: AgentEventType.MODEL_STARTED; data: { model: string } } + | { type: AgentEventType.MODEL_DELTA; data: { delta: string } } + | { + type: AgentEventType.MODEL_COMPLETED; + data: { usage?: { inputTokens: number; outputTokens: number } }; + } + | { + type: AgentEventType.MIDDLEWARE_BEFORE_MODEL; + data: { middlewareName: string }; + } + | { + type: AgentEventType.MIDDLEWARE_AFTER_MODEL; + data: { middlewareName: string }; + } + | { + type: AgentEventType.TOOL_STARTED; + data: { toolName: string; args: unknown }; + } + | { + type: AgentEventType.TOOL_OUTPUT; + data: { toolName: string; output: unknown }; + } + | { + type: AgentEventType.TOOL_ERROR; + data: { toolName: string; error: string }; + } + | { + type: AgentEventType.HITL_INTERRUPT; + data: { proposedToolCalls: Array<{ toolName: string; args: unknown }> }; + } + | { + type: AgentEventType.HITL_RESUME; + data: { + approved: boolean; + modifiedToolCalls?: Array<{ toolName: string; args: unknown }>; + }; + } + | { type: AgentEventType.SUBAGENT_SPAWNED; data: { childThreadId: string } } + | { + type: AgentEventType.SUBAGENT_COMPLETED; + data: { childThreadId: string; result?: unknown }; + }; +``` + + +`GET /agency/:agencyId/agent/:threadId/events` returns `{ events: AgentEvent[] }`. + +--- + +## SystemAgent HTTP API (per-thread) + +These are internal to the handler, but you can call them directly from another Worker if you have the DO stub. + +- `POST /register` – Thread metadata registration + + Body: `ThreadMetadata`. + +- `POST /invoke` – Start or continue a run + + Body: `InvokeBody`. + + Returns 202 with: + + ```json + { "runId": "uuid", "status": "running" | "paused" | "completed" | ... } + ``` + +- `POST /approve` – HITL approval with body `ApproveBody`: + + +```ts +type ApproveBody = { + approved: boolean; + modifiedToolCalls?: ToolCall[]; +}; +``` + + +- `POST /cancel` – Cancel current run (also propagates to child subagents if any). + +- `GET /state` – Returns `{ state: AgentState, run: RunState }`. + +- `GET /events` – Returns `{ events: AgentEvent[] }`. + +- `POST /child_result` – Internal; used by subagents to report back to parents. + +You usually interact via the higher-level `/agency/...` routes instead of calling these directly. + +--- + +## Putting it together + +A typical integration follows these steps: + +1. Configure an `AgentSystem` with: + - A default LLM model + - A set of tools and middleware + - A bundle of blueprints + +2. Export it and bind DOs and KV in Wrangler. +3. Spin up an **Agency** via `POST /agencies`. +4. Spawn one or more **SystemAgent** threads via `POST /agency/:id/agents`. +5. Talk to threads via: + - `POST /invoke` to send messages + - `GET /state` to inspect current state + - `/ws` to stream events + +6. Let the built-in middleware handle: + - Planning (todo lists) + - Filesystem (files) + - Subagents (`task`) + - Optional HITL + +You can extend the system in two main ways: + +- **New tools**: Use `defineTool` and `system.addTool` or register them from a middleware. +- **New middleware**: Use `defineMiddleware`, register it with `.use()`, and drive the agent via hooks (`beforeModel`, `onModelResult`, `onToolResult`, and so on). + +This covers the full surface area of the Agent System API. diff --git a/src/content/docs/agents/agent-system/architecture.mdx b/src/content/docs/agents/agent-system/architecture.mdx new file mode 100644 index 00000000000000..b8a7ab582240a2 --- /dev/null +++ b/src/content/docs/agents/agent-system/architecture.mdx @@ -0,0 +1,510 @@ +--- +title: Architecture +pcx_content_type: concept +sidebar: + order: 2 +--- + +import { TypeScriptExample } from "~/components"; + +This document provides a mental model for how the Agent System components fit together. + +--- + +## High-level components + +The Agent System consists of three main layers: + +1. **Worker handler** (exported `handler`) + - HTTP interface and dashboard + - Routes API calls to the appropriate Durable Objects + - Provides the HTML UI at `GET /` + +2. **Agency** Durable Object (`Agency`) + - Control plane per agency + - Stores and overrides agent blueprints + - Tracks which agent threads exist in this agency + +3. **SystemAgent** Durable Object (`SystemAgent`) + - One instance per agent thread + - Runs the agent loop: + - Stores messages, files, and todos + - Calls the LLM provider + - Executes tools and subagents + - Emits events + +All processing happens inside `SystemAgent` instances. + +--- + +## Agencies and agent threads + +### Agency Durable Object + +The `Agency` Durable Object manages: + +- **Blueprint management** in SQLite: + + ```sql + CREATE TABLE IF NOT EXISTS blueprints ( + name TEXT PRIMARY KEY, + data TEXT NOT NULL, -- JSON AgentBlueprint + updated_at INTEGER NOT NULL + ); + ``` + +- **Agent registry** per agency: + + ```sql + CREATE TABLE IF NOT EXISTS agents ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + created_at INTEGER NOT NULL, + metadata TEXT -- JSON + ); + ``` + +Each agency is one Durable Object instance, identified by a Durable Object ID string. Agencies are tracked in Workers KV through `AGENCY_REGISTRY`. + +### Agency HTTP API (via handler) + +The handler routes: + +- `GET /agencies` – List agencies from Workers KV +- `POST /agencies` – Create a new `Agency` Durable Object instance and store its metadata + +Per agency: + +- `GET /agency/:agencyId/blueprints` + - Merges static blueprints (from `AgentSystem.addAgent`) and dynamic ones stored in the Agency's SQLite +- `POST /agency/:agencyId/blueprints` + - Write or override a blueprint in the Agency's SQLite + +Agents within an agency: + +- `GET /agency/:agencyId/agents` – List agent threads +- `POST /agency/:agencyId/agents` + - Creates a new thread: + - Generates an ID + - Writes to `agents` table + - Spawns a `SystemAgent` Durable Object + - Calls `/register` on that Durable Object with `ThreadMetadata` + +The `Agency` Durable Object never calls the LLM; it manages metadata and blueprints only. + +--- + +## SystemAgent: per-thread runtime + +Each `SystemAgent` Durable Object is the agent brain for one thread. + +### Persistent info and run state + +Two key objects are persisted in Workers KV via `PersistedObject`: + +- `info: Info` (thread metadata) + + + ```ts + type Info = { + threadId: string; + agencyId: string; + createdAt: string; + request: ThreadRequestContext; + agentType: string; + parentInfo?: ParentInfo; // if this is a subagent + pendingToolCalls?: ToolCall[]; + blueprint?: AgentBlueprint; + }; + ``` + + +- `runState: RunState` + + + ```ts + type RunState = { + runId: string; + status: "idle" | "registered" | "running" | "paused" | "completed" | "canceled" | "error"; + step: number; + reason?: string; + nextAlarmAt?: number | null; + }; + ``` + + +`PersistedObject` maps object properties to Workers KV keys and adds mutation warnings if you try to mutate nested objects in place instead of reassigning them to avoid accidental non-persisting changes. + +### Store: messages, events, files, subagents + +`Store` wraps the Durable Object SQLite and Workers KV and provides the following interfaces: + +- Messages: + + ```sql + CREATE TABLE IF NOT EXISTS messages ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + role TEXT NOT NULL CHECK(role IN ('user','assistant','tool')), + content TEXT, + tool_call_id TEXT, + tool_calls_json TEXT, + created_at INTEGER NOT NULL + ); + ``` + + - `appendMessages()` records user, assistant, and tool messages. + - `listMessages()` reconstructs `ChatMessage[]` for the agent. + - `appendToolResult()` inserts a single tool message. + +- Events: + + ```sql + CREATE TABLE IF NOT EXISTS events ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + data_json TEXT NOT NULL, + ts TEXT NOT NULL + ); + ``` + + - `addEvent()` stores `AgentEvent` objects and returns their `seq`. + - `listEvents()` returns chronological agent events. + +- Files (virtual filesystem): + + ```sql + CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + content BLOB, + updated_at INTEGER NOT NULL + ); + ``` + + - `mergeFiles()`, `listFiles()`, `readFile()`, `writeFile()`, `editFile()`. + +- Subagent bookkeeping: + + ```sql + CREATE TABLE IF NOT EXISTS waiting_subagents (...); + CREATE TABLE IF NOT EXISTS subagent_links (...); + ``` + + This tracks: + - Which tool call spawned which child thread + - Which children are still pending + - Completion, cancellation, and reports for each subagent + +- Todos (via `planning` middleware): + + ```sql + CREATE TABLE IF NOT EXISTS todos ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending','in_progress','completed')), + pos INTEGER NOT NULL DEFAULT 0, + updated_at INTEGER NOT NULL + ); + ``` + + Only created when `planning` is enabled. + +### Middleware pipeline + +`SystemAgent` exposes: + + +```ts +abstract get middleware(): AgentMiddleware[]; +abstract get tools(): Record; +abstract get systemPrompt(): string; +abstract get model(): string; +abstract get config(): AgentConfig; +abstract get provider(): Provider; +``` + + +Most of these properties are resolved dynamically from the `AgentSystem` configuration and the current agent blueprint. + +The run loop (`run()`) follows this process: + +1. Emit `RUN_TICK` +2. If there are no pending tool calls and no waiting subagents: + - Build `ModelRequest` via `ModelPlanBuilder` using all middleware + - Call `provider.invoke(req)` + - Let middleware react via `onModelResult` + - Append the assistant message + - Capture any tool calls from the assistant into `info.pendingToolCalls` +3. If the agent paused (HITL or subagent), stop here +4. If the agent looks done (`isDone` checks last assistant message): + - Mark `runState.status = "completed"` + - Emit `AGENT_COMPLETED` + - If this is a subagent, call `/child_result` on the parent and stop +5. Otherwise, execute pending tools in batches (`executePendingTools`) +6. Reschedule via Durable Object alarm and repeat + +--- + +## AgentSystem and blueprints + +### AgentSystem + +`AgentSystem` is a configuration-time builder: + + +```ts +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini", + provider?: Provider, + handlerOptions?: HandlerOptions +}) + .defaults() + .addTool(myTool, ["analytics"]) + .use(myMiddleware, ["my-tag"]) + .addAgent({ + name: "my-agent", + description: "Does things.", + prompt: "You are...", + tags: ["default", "analytics"], + config: { ... } // config consumed by middleware + }); + +const { SystemAgent, Agency, handler } = system.export(); +``` + + +Internally: + +- Tools registered via `.addTool()` go into a `ToolRegistry`. +- Middleware registered via `.use()` go into a `MiddlewareRegistry`. +- Blueprints registered via `.addAgent()` go into an `agentRegistry`. + +The generic type `AgentSystem` grows as you add middleware, so you can get type-safe `config` for your blueprints. + +### Blueprint tags and selection + +Each `AgentBlueprint` has `tags: string[]`. + +At runtime: + +- `SystemAgent.middleware` selects middlewares whose tags intersect with the blueprint tags. +- `SystemAgent.tools` selects tools whose tags intersect with the blueprint tags, plus any tools that middleware registered dynamically with `ctx.registerTool()`. + +For example: + +- `system.defaults()` registers `planning`, `filesystem`, and `subagents` with tag `"default"` (plus their internal tags like `"planning"`, `"fs"`, and `"subagents"`). +- The `"manager-agent"` blueprint has `tags: ["default"]`, so it gets all of them. +- Custom analytics tools are added with tag `"security"` and the `"security-agent"` blueprint uses `tags: ["security"]`, so they appear only there. + +--- + +## Subagents and the task tool + +The `subagents` middleware defines one special tool: `task`. + +### What the task tool does + +When the model calls: + + +```json +{ + "name": "task", + "args": { + "description": "Analyze top IPs in this window...", + "subagentType": "security-agent" + } +} +``` + + +The handler: + +1. Generates a `token` and a `childThreadId`. +2. Emits `SUBAGENT_SPAWNED`. +3. Creates a stub for the child `SystemAgent` Durable Object. +4. Calls `POST /register` on the child with: + + + ```ts + { + id: childThreadId, + createdAt: nowISO, + agentType: subagentType, + request: ctx.agent.info.request, // propagate request context + parent: { + threadId: parentThreadId, + token + } + } + ``` + + +5. Calls `POST /invoke` on the child with a single user message equal to `description`. +6. Registers the waiting subagent in the parent SQLite (token, childThreadId, toolCallId). +7. Pauses the parent run: + + + ```ts + runState.status = "paused"; + runState.reason = "subagent"; + ``` + + +8. Returns `null` from the tool, so no immediate tool-result message is added. + +The child runs independently until it completes. + +### How results return + +When a subagent completes: + +1. `SystemAgent.run()` in the child sets status `completed` and emits `AGENT_COMPLETED`. +2. Because the child has `parentInfo`, it also: + - Computes the final text output (`final`) + - Calls `POST /child_result` on the parent thread with `{ token, childThreadId, report: final }` + +On the parent: + +- `childResult()`: + - Pops the waiting subagent via token and childThreadId + - Appends a tool message on the waiting tool call ID with the `report` + - Marks the subagent link as `completed` in SQLite + - Emits `SUBAGENT_COMPLETED` + - If no waiting subagents remain: + - Moves `runState.status` back to `"running"` + - Emits `RUN_RESUMED` + - Schedules the agent again + +From the agent perspective, it appears as one tool call that took multiple ticks and eventually produced a single tool output message. + +--- + +## Human-in-the-loop middleware + +The optional `hitl` middleware allows you to pause runs when certain tools are proposed. + +Configuration: + + +```ts +export type HitlConfig = { + hitl?: { + tools: string[]; // list of tool names that require human approval + }; +}; +``` + + +When enabled on a blueprint: + +1. `hitl.onModelResult()` inspects the last assistant message. +2. If any tool call name is in `config.hitl.tools`: + - It sets `runState.status = "paused"`, `reason = "hitl"`. + - Emits `RUN_PAUSED`. + +The dashboard shows HITL buttons when there are pending tool calls: + +- `POST /agency/:agencyId/agent/:threadId/approve` with: + + + ```ts + { + approved: boolean; + modifiedToolCalls?: ToolCall[]; // optional edits + } + ``` + + +On approve: + +- The parent `SystemAgent.approve()` stores the (possibly modified) tool calls in `info.pendingToolCalls`. +- Emits `HITL_RESUME` and `RUN_RESUMED`. +- Schedules the run again. + +--- + +## Observability and graph + +Every important action emits an `AgentEvent`: + +- `THREAD_CREATED`, `REQUEST_ACCEPTED` +- `RUN_*` (`started`, `tick`, `paused`, `resumed`, `canceled`) +- `AGENT_*` (`started`, `completed`, `error`) +- `MODEL_*` (`started`, `delta`, `completed`) +- `TOOL_*` (`started`, `output`, `error`) +- `SUBAGENT_*` (`spawned`, `completed`) +- `HITL_*` (`interrupt`, `resume`) +- `CHECKPOINT_SAVED` + +`SystemAgent.emit()`: + +- Writes the event to SQLite (`events` table) +- Broadcasts it as JSON over WebSocket to any connected clients + +The dashboard (`client.html`) subscribes to: + +- `GET /agency/:agencyId/agent/:threadId/events` for the history +- `/ws` for live events + +It then reconstructs a graph with: + +- Nodes: ticks, model calls, tools, done/error markers +- Edges: sequential flow and dashed spawn/completion edges between parent and child agents + +You do not have to use the built-in UI, but it is useful for debugging and understanding multi-agent behavior. + +--- + +## Filesystem and planning + +Because `.defaults()` enables both `planning` and `filesystem`, typical agents get: + +- A todo list backed by SQLite (`todos` table) +- A virtual filesystem in SQLite (`files` table) + +These appear in `AgentState`: + +- `state.todos` – current todo list +- `state.files` – map of path to string content + +And as tools: + +- `write_todos(todos: Todo[])` +- `ls()` +- `read_file({ path, offset?, limit? })` +- `write_file({ path, content })` +- `edit_file({ path, oldString, newString, replaceAll? })` + +The `filesystem` middleware also enforces a simple safety rule: + +- You must use `read_file` on a path at least once before you can `edit_file` it. + +It tracks this via a Workers KV entry `lastReadPaths`. + +--- + +## Dashboard UI structure + +The HTML dashboard consists of: + +- Left sidebar: + - Agencies dropdown + - Threads list (root threads and nested subagents) +- Main area: + - **Chat and Todos** tab: + - Chat transcript + - Run status (running, paused, completed, error) + - Message input + - HITL controls + - Todos panel + - Raw state JSON + - **Graph** tab: + - Cytoscape graph of events + - Zoom, fit, and export controls + - **Files** tab: + - List of files in `state.files` + - Markdown and code preview with line numbers and syntax highlighting + +All of this is built on the HTTP and WebSocket endpoints exposed by the `handler`. + +For more information about the API, refer to the [API Reference](/agents/agent-system/api-reference/). diff --git a/src/content/docs/agents/agent-system/getting-started.mdx b/src/content/docs/agents/agent-system/getting-started.mdx new file mode 100644 index 00000000000000..b75afbf4b665ff --- /dev/null +++ b/src/content/docs/agents/agent-system/getting-started.mdx @@ -0,0 +1,324 @@ +--- +title: Getting started with Agent System +pcx_content_type: get-started +sidebar: + order: 1 +--- + +import { TypeScriptExample, WranglerConfig, PackageManagers } from "~/components"; + +This guide walks through wiring the Agent System runtime into a Cloudflare Worker, defining an agent, and interacting with it. + +This guide assumes you already know your way around Workers and Wrangler. + +## Add the runtime to your Worker + +In your Worker entrypoint (for example, `src/index.ts`): + + + +```ts +import { AgentSystem } from "agents/sys"; +// (Optional) import custom tools / middleware and add them later + +// Build an AgentSystem with a default LLM model +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini" +}) + .defaults() // planning + filesystem + subagents + .addAgent({ + name: "manager-agent", + description: "Main agent.", + prompt: "You are a helpful assistant.", + tags: ["default"] // selects which tools/middleware apply + }); + +// Export configured DO classes and HTTP handler +const { SystemAgent, Agency, handler } = system.export(); + +export { SystemAgent, Agency }; +export default handler; +``` + + + +What `.defaults()` does: + +- Registers `planning` middleware (todo list + `write_todos` tool) +- Registers `filesystem` middleware (`ls`, `read_file`, `write_file`, `edit_file`) +- Registers `subagents` middleware (`task` tool for child agents) + +All of those middlewares are tagged with `"default"`, so any agent blueprint that includes `"default"` in its `tags` will use them. + +## Bind Durable Objects and KV + +Wire the `SystemAgent` and `Agency` DOs and the KV registry that stores agencies: + + + +```jsonc +{ + "name": "my-agent-worker", + "main": "src/index.ts", + "compatibility_date": "2025-03-14", + "compatibility_flags": ["nodejs_compat"], + + "durable_objects": { + "bindings": [ + { + "name": "SYSTEM_AGENT", + "class_name": "SystemAgent" + }, + { + "name": "AGENCY", + "class_name": "Agency" + } + ] + }, + + "kv_namespaces": [ + { + "binding": "AGENCY_REGISTRY", + "id": "my-agency-registry-kv-namespace-id" + } + ], + + "migrations": [ + { + "tag": "v1", + "new_sqlite_classes": ["SystemAgent", "Agency"] + } + ] +} +``` + + + +Run the migration once: + + + +## Configure an LLM provider + +By default, `AgentSystem` will use OpenAI's Chat Completions API via `makeOpenAI` if you do not pass a provider explicitly. + +You need: + +- `LLM_API_KEY` – secret (OpenAI API key or gateway key) +- Optional: `LLM_API_BASE` – custom base URL (proxy/gateway) + +Set them as Worker secrets: + + + + +Then `SystemAgent.provider` will: + +- Build an OpenAI provider using `makeOpenAI(apiKey, apiBase)` +- Wrap it to automatically emit `MODEL_STARTED` / `MODEL_COMPLETED` events + +If you want to use a custom provider, you can pass a `Provider` into `new AgentSystem({ provider })`. That provider is a simple interface: + + + +```ts +interface Provider { + invoke( + req: ModelRequest, + opts: { signal?: AbortSignal } + ): Promise; + stream( + req: ModelRequest, + onDelta: (chunk: string) => void + ): Promise; +} +``` + + + +:::note + +Advanced: wiring a provider that depends on `env` requires patching `SystemAgent.provider`, so for now sticking to the OpenAI path is easiest. + +::: + +## Run locally + +Start dev mode: + + + +Then open the Worker URL (default `http://127.0.0.1:8787/`) in a browser. + +You should see the built-in **Agent Dashboard** (`client.html`), which is being served by the exported `handler`. + +## Create an Agency and an Agent + +In the dashboard: + +1. Use the **New Agency** button to create an agency. + + Under the hood this calls: + - `POST /agencies` → creates a new `Agency` DO instance + - The metadata (ID, name, createdAt) is stored in `AGENCY_REGISTRY` KV + +2. Select that agency from the **Agencies** dropdown. + +3. Select **New Thread** to create an agent thread, and pick your agent type (for example, `"manager-agent"` from the example). + + Under the hood this calls: + - `POST /agency/:agencyId/agents` with `{ agentType }` + - The `Agency` DO: + - assigns a new `id` for the agent thread + - stores metadata in its local SQLite + - spawns a `SystemAgent` DO, calling `/register` with `ThreadMetadata` + +4. Select the thread in the sidebar and start sending messages from the chat panel. + +## Talk to an agent programmatically + +If you do not care about the dashboard, you can hit the REST-ish endpoints directly. + +### Create an agency + + + +```ts +const res = await fetch("https://your-worker.example.com/agencies", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ name: "Production" }) +}); + +const agency = await res.json(); // { id, name, createdAt } +``` + + + +### List blueprints available in that agency + +Static blueprints from `AgentSystem.addAgent` plus any overrides stored inside the `Agency` DO: + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/blueprints` +); +const { blueprints } = await res.json(); +``` + + + +### Create a new agent thread + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/agents`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ agentType: "manager-agent" }) + } +); + +const thread: { + id: string; + agentType: string; + createdAt: string; + request: any; + agencyId: string; +} = await res.json(); +``` + + + +### Send a message + + + +```ts +await fetch( + `https://your-worker.example.com/agency/${agency.id}/agent/${thread.id}/invoke`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + messages: [{ role: "user", content: "Hello, what can you do?" }] + }) + } +); +// HTTP 202, run happens asynchronously inside the DO +``` + + + +### Poll state + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/agent/${thread.id}/state` +); +const { state, run } = await res.json(); + +/* +state: AgentState (messages, tools, thread, todos, files, subagents, ...) +run: RunState (runId, status, step, reason, nextAlarmAt) +*/ +``` + + + +### Listen to events live (WebSocket) + +`client.html` uses a WebSocket at: + +```text +/ws → /agency/:agencyId/agent/:agentId/ws +``` + +`Agent` base class calls `broadcast()` on the DO whenever an `AgentEvent` is emitted in `SystemAgent.emit`, so you can just reuse that endpoint if you want a custom UI. + +## Security: lock down the handler + +`createHandler` supports a simple shared-secret auth mechanism: + + + +```ts +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini", + handlerOptions: { + secret: "some-long-random-string" // clients must send X-SECRET header + } +}); +``` + + + +When `secret` is set: + +- All non-`GET /` requests must include `X-SECRET: ` or you get `401` +- This gates both the dashboard and the raw REST API + +Use this if your Worker is directly exposed to the public internet and you do not have another auth layer in front. + +## Example + +Refer to the `examples/deep/` folder in the [cloudflare/agents](https://github.com/cloudflare/agents) repository for a real setup: + +- `AgentSystem` with: + - a **security analytics subagent** blueprint (`security-agent`) + - a **manager** blueprint (`manager-agent`) that orchestrates subagents + +- Custom tools that talk to the Cloudflare Analytics GraphQL API +- A prompt that explains how the manager agent should: + - plan with todos + - spawn analytics subagents via `task` + - read/write `report.md` in the virtual filesystem + +This is a good reference for more complex multi-agent patterns. diff --git a/src/content/docs/agents/agent-system/index.mdx b/src/content/docs/agents/agent-system/index.mdx new file mode 100644 index 00000000000000..c9a0fc7ab46221 --- /dev/null +++ b/src/content/docs/agents/agent-system/index.mdx @@ -0,0 +1,23 @@ +--- +title: Agent System +pcx_content_type: navigation +sidebar: + order: 10 +--- + +The Agent System (`agents/sys`) is a framework for building agent systems with middleware, tools, and blueprints on Cloudflare Workers. + +Agent System provides: + +- **Middleware pipeline**: Extend agent behavior with reusable middleware for planning, filesystem operations, and subagents +- **Tool system**: Define and register tools that agents can use to perform tasks +- **Blueprint management**: Configure agent types with specific capabilities, prompts, and middleware +- **Multi-agent support**: Spawn child agents (subagents) that can work on subtasks independently +- **Built-in dashboard**: Web-based interface for managing agencies, agents, and threads +- **Event system**: Real-time event streaming via WebSocket for monitoring agent execution + +## Get started + +- [Getting started](/agents/agent-system/getting-started/) - Set up your first Agent System +- [Architecture](/agents/agent-system/architecture/) - Understand how Agent System works +- [API reference](/agents/agent-system/api-reference/) - Complete API documentation diff --git a/src/content/docs/agents/sys/api-reference.mdx b/src/content/docs/agents/sys/api-reference.mdx new file mode 100644 index 00000000000000..1586a774cf1e01 --- /dev/null +++ b/src/content/docs/agents/sys/api-reference.mdx @@ -0,0 +1,755 @@ +--- +title: API Reference +pcx_content_type: reference +sidebar: + order: 3 +--- + +import { TypeScriptExample } from "~/components"; + +This is a reference for the TypeScript/Worker API and the HTTP surface. + +## Module: agents/sys + +Everything in `packages/agents/src/sys` is re-exported from this entrypoint. + +### AgentSystem + + + +```ts +class AgentSystem> { + constructor(options: AgentSystemOptions); + + defaults(): AgentSystem; + + addTool(handler: ToolHandler, tags?: string[]): AgentSystem; + + use( + mw: AgentMiddleware, + tags?: string[] + ): AgentSystem; + + addAgent(blueprint: AgentBlueprint>): AgentSystem; + + export(): { + SystemAgent: typeof SystemAgent; + Agency: typeof Agency; + handler: ReturnType; + }; +} +``` + + + +#### Options + + + +```ts +type AgentSystemOptions = { + defaultModel: string; // e.g. "openai:gpt-4.1-mini" + provider?: Provider; // optional custom provider + handlerOptions?: HandlerOptions; +}; +``` + + + +- `defaultModel` – fallback model id for agents that do not set `blueprint.model` +- `provider` – optional `Provider` implementation. If omitted, OpenAI is used via `makeOpenAI(LLM_API_KEY, LLM_API_BASE)` +- `handlerOptions` – passed into `createHandler` + +#### Methods + +- `defaults()` – registers the stock middleware: + - `planning`, `filesystem`, `subagents` (all tagged `"default"` plus their own tags) + +- `addTool(handler, tags?)` – registers a global tool. It will be available to any blueprint whose `tags` intersect with the tags you pass here + +- `use(middleware, tags?)` – registers middleware globally with tags = union of `tags` and `middleware.tags` + +- `addAgent(blueprint)` – registers an `AgentBlueprint` by name in the internal registry. These become the static defaults for agencies + +- `export()` – returns: + - `SystemAgent` – configured DO class + - `Agency` – DO class for agencies + - `handler` – HTTP handler (to be `export default handler`) + +### SystemAgent + +Base class: + + + +```ts +abstract class SystemAgent extends Agent { + // Persisted state + readonly info: Info; + readonly runState: RunState; + readonly store: Store; + + abstract get blueprint(): AgentBlueprint; + abstract get middleware(): AgentMiddleware[]; + abstract get tools(): Record; + abstract get systemPrompt(): string; + abstract get model(): string; + abstract get config(): AgentConfig; + abstract get provider(): Provider; + + get messages(): ChatMessage[]; + get mwContext(): MWContext; + get isPaused(): boolean; + get isWaitingSubagents(): boolean; + get isDone(): boolean; + + emit(type: AgentEventType, data: unknown): void; + + // HTTP entry points + protected async onRequest(req: Request): Promise; + protected abstract onDone(ctx: { + agent: SystemAgent; + final: string; + }): Promise; +} +``` + + + +`AgentSystem.export()` returns a concrete subclass of `SystemAgent` that: +- Resolves `blueprint`, `middleware`, `tools`, `model`, `config`, `provider` +- Wires events to the `Provider` +- Implements `onRegister` to pull dynamic blueprints from the `Agency` DO + +You typically do not subclass this yourself; you just consume the class exported by `AgentSystem`. + +### Agent blueprints + + + +```ts +type AgentBlueprint> = { + name: string; + description: string; + prompt: string; + tags: string[]; + model?: string; + config?: AgentConfig; +}; +``` + + + +- `name` – identifier used as `agentType` +- `description` – human-readable description shown in the dashboard +- `prompt` – system prompt used for this agent +- `tags` – used to select middleware/tools +- `model` – per-agent model override +- `config` – configuration blob consumed by middleware + +The blueprint for an agent thread can come from: + +1. Static registration via `AgentSystem.addAgent(...)` +2. Dynamic override in the `Agency` DO via `POST /agency/:id/blueprints` + +### Middleware + +Middleware type: + + + +```ts +interface AgentMiddleware { + name: string; + tags: string[]; + + // Attach extra state to AgentState + state?(ctx: MWContext): Record; + + // Lifecycle hooks + onInit?(ctx: MWContext): Promise; + onTick?(ctx: MWContext): Promise; + beforeModel?(ctx: MWContext, plan: ModelPlanBuilder): Promise; + onModelResult?(ctx: MWContext, res: { message: ChatMessage }): Promise; + + onToolStart?(ctx: MWContext, call: ToolCall): Promise; + onToolResult?(ctx: MWContext, call: ToolCall, result: unknown): Promise; + onToolError?(ctx: MWContext, call: ToolCall, error: Error): Promise; + + onResume?(ctx: MWContext, reason: string, payload: unknown): Promise; + onChildReport?( + ctx: MWContext, + child: { threadId: string; token: string; report?: string } + ): Promise; +} +``` + + + +Helper: + + + +```ts +function defineMiddleware( + mw: Omit, "__configType"> +): AgentMiddleware; +``` + + + +The `MWContext`: + + + +```ts +type MWContext = { + provider: Provider; + agent: SystemAgent; + registerTool: (handler: ToolHandler) => void; +}; +``` + + + +Use `ctx.registerTool` when your middleware wants to attach tools dynamically (for example, `planning` registers `write_todos` this way). + +### Tools + +Tool handler type: + + + +```ts +type ToolHandler = (( + input: any, + ctx: ToolContext +) => Promise) & { __tool?: ToolMeta }; +``` + + + +Where: + + + +```ts +type ToolMeta = { + name: string; + description?: string; + parameters?: ToolJsonSchema; +}; + +type ToolContext = { + agent: SystemAgent; + env: typeof env; // Cloudflare worker env binding + callId: string; // tool call ID from the LLM +}; +``` + + + +Helpers: + + + +```ts +function defineTool(meta: ToolMeta, handler: ToolHandler): ToolHandler; +function getToolMeta(fn: ToolHandler, fallbackName?: string): ToolMeta | null; +``` + + + +`defineTool` attaches metadata on `handler.__tool`. This metadata is used: + +- To build the tool definitions passed into the LLM (`ModelRequest.toolDefs`) +- To expose the tools in `AgentState.tools` for the dashboard + +Return value semantics: + +- `string | object` becomes a tool result message (`role: "tool"`) attached to that `toolCallId` +- `null` means this tool does not produce a direct message; used by the `task` tool (subagent spawner) + +### Built-in middleware and tools + +#### planning + +Exports: + +- `planning: AgentMiddleware` +- Adds `write_todos` tool +- Adds `WRITE_TODOS_SYSTEM_PROMPT` to the system prompt +- Persists todos in a `todos` table +- Exposes `state.todos: Todo[]` + +Schema for `write_todos`: + + + +```ts +type Todo = { + content: string; + status: "pending" | "in_progress" | "completed"; +}; +``` + + + +#### filesystem + +Exports: + +- `filesystem: AgentMiddleware` + +Registers tools: + +- `ls()` – list file paths in the virtual filesystem +- `read_file({ path, offset?, limit? })` +- `write_file({ path, content })` +- `edit_file({ path, oldString, newString, replaceAll? })` + +Also: + +- Creates `files` table in SQLite (path to content) +- Exposes `state.files: Record` +- Tracks `lastReadPaths` in KV and enforces must read before edit + +#### subagents + +Exports: + +- `subagents: AgentMiddleware` + +Config type: + + + +```ts +type SubagentsConfig = { + subagents?: { + subagents: AgentBlueprint[]; // list of available subagent blueprints + }; +}; +``` + + + +Adds: + +- `TASK_SYSTEM_PROMPT` to the system prompt +- `task` tool: + + + + ```ts + type TaskInput = { + description: string; + subagentType: string; + timeoutMs?: number; + }; + ``` + + + +It also: + +- Emits `SUBAGENT_SPAWNED` when a child is launched +- Pauses the parent run with `reason: "subagent"` +- Updates `state.subagents: SubagentLink[]` via `Store.listSubagentLinks()` + +#### hitl + +Exports: + +- `hitl: AgentMiddleware` + +Config: + + + +```ts +type HitlConfig = { + hitl?: { + tools: string[]; // names of tools that require approval + }; +}; +``` + + + +If the model proposes a tool call whose name is in `tools`, the middleware: + +- Sets `runState.status = "paused"` and `reason = "hitl"` +- Emits `RUN_PAUSED` +- The dashboard shows HITL buttons that call `/approve` + +### Providers + +Exports: + + + +```ts +interface Provider { + invoke( + req: ModelRequest, + opts: { signal?: AbortSignal } + ): Promise; + stream( + req: ModelRequest, + onDelta: (chunk: string) => void + ): Promise; +} + +type ModelResult = { + message: ChatMessage; // assistant message (may include toolCalls) + usage?: { promptTokens: number; completionTokens: number; costUsd?: number }; +}; + +function parseModel(modelId: string): string; +function makeOpenAI(apiKey: string, baseUrl?: string): Provider; +function makeAnthropic(baseUrl: string, apiKey: string): Provider; +function makeWorkersAI(ai: unknown): Provider; +``` + + + +- `parseModel` lets you pass IDs like `"openai:gpt-4.1-mini"`; it strips the prefix before sending to the provider +- `makeOpenAI` adapts the internal `ModelRequest` and `ChatMessage` format to OpenAI Chat Completions +- `makeAnthropic` and `makeWorkersAI` are placeholders in the current code (they return a dummy `"Hello, world!"` response) + +`SystemAgent.provider` wraps any `Provider` with event emission: + +- Emits `MODEL_STARTED` before calling the provider +- Emits `MODEL_COMPLETED` after + +### Worker handler + +Exports from `agents/sys/worker`: + + + +```ts +type HandlerOptions = { + baseUrl?: string; // currently unused + secret?: string; // optional shared secret for X-SECRET auth + agentDefinitions?: AgentBlueprint[]; // static blueprints +}; + +type HandlerEnv = { + SYSTEM_AGENT: DurableObjectNamespace; + AGENCY: DurableObjectNamespace; + AGENCY_REGISTRY: KVNamespace; +}; + +function createHandler(opts?: HandlerOptions): { + fetch( + req: Request, + env: HandlerEnv, + ctx: ExecutionContext + ): Promise; +}; +``` + + + +`AgentSystem.export()` calls `createHandler` for you and injects `agentDefinitions` from its internal agent registry if you did not set them. + +#### HTTP routes + +- `GET /` – serves `client.html` dashboard +- `GET /agencies` – list agencies from `AGENCY_REGISTRY` +- `POST /agencies` – create a new Agency DO and store metadata + +Per agency: + +- `GET /agency/:agencyId/blueprints` – static plus dynamic blueprints +- `POST /agency/:agencyId/blueprints` – write blueprint into Agency DO +- `GET /agency/:agencyId/agents` – list agent threads +- `POST /agency/:agencyId/agents` – create new agent thread; injects request context (`ThreadRequestContext`) + +Per agent thread: + +- `POST /agency/:agencyId/agent/:agentId/invoke` – forwards to DO `/invoke`, injecting `threadId` into the body +- `GET /agency/:agencyId/agent/:agentId/state` – returns DO `/state` +- `GET /agency/:agencyId/agent/:agentId/events` – returns DO `/events` +- `POST /agency/:agencyId/agent/:agentId/approve` – forwards to DO `/approve` +- `POST /agency/:agencyId/agent/:agentId/cancel` – forwards to DO `/cancel` +- `GET /agency/:agencyId/agent/:agentId/ws` – WebSocket for live events (implemented in the base `Agent` class) + +Auth: + +- If `opts.secret` is set, all non-`GET /` requests must include `X-SECRET: ` or they get `401` + +## Types + +### Messages and threads + + + +```ts +type ChatMessage = + | { role: "system" | "user" | "assistant"; content: string } + | { role: "assistant"; toolCalls?: ToolCall[] } + | { role: "tool"; content: string; toolCallId: string }; + +type ToolCall = { + name: string; + args: unknown; + id: string; +}; + +type ThreadRequestContext = { + userAgent?: string; + ip?: string; + referrer?: string; + origin?: string; + cf?: Record; // colo, country, city, region, timezone, postalCode, asOrganization +}; + +type ParentInfo = { + threadId: string; + token: string; +}; + +interface ThreadMetadata { + id: string; + createdAt: string; + request: ThreadRequestContext; + parent?: ParentInfo; + agentType: string; + agencyId: string; +} + +interface InvokeBody { + threadId?: string; + messages?: ChatMessage[]; + files?: Record; + idempotencyKey?: string; + agentType?: string; + parent?: ParentInfo; +} +``` + + + +`AgentState` (what `/state` returns): + + + +```ts +type AgentState = { + messages: ChatMessage[]; + tools: ToolMeta[]; + thread: ThreadMetadata; + threadId?: string; + parent?: ParentInfo; + agentType?: string; + model?: string; +} & Record; // middleware injects more (todos, files, subagents, ...) +``` + + + +### Subagents + + + +```ts +type SubagentLinkStatus = "waiting" | "completed" | "canceled"; + +interface SubagentLink { + childThreadId: string; + token: string; + status: SubagentLinkStatus; + createdAt: number; + completedAt?: number; + report?: string; + toolCallId?: string; +} +``` + + + +When `subagents` middleware is active, `state.subagents` is a `SubagentLink[]`. + +### Events + + + +```ts +enum AgentEventType { + THREAD_CREATED = "thread.created", + REQUEST_ACCEPTED = "request.accepted", + RUN_STARTED = "run.started", + RUN_TICK = "run.tick", + RUN_PAUSED = "run.paused", + RUN_RESUMED = "run.resumed", + RUN_CANCELED = "run.canceled", + AGENT_STARTED = "agent.started", + AGENT_COMPLETED = "agent.completed", + AGENT_ERROR = "agent.error", + CHECKPOINT_SAVED = "checkpoint.saved", + MODEL_STARTED = "model.started", + MODEL_DELTA = "model.delta", + MODEL_COMPLETED = "model.completed", + MIDDLEWARE_BEFORE_MODEL = "middleware.before_model", + MIDDLEWARE_AFTER_MODEL = "middleware.after_model", + TOOL_STARTED = "tool.started", + TOOL_OUTPUT = "tool.output", + TOOL_ERROR = "tool.error", + HITL_INTERRUPT = "hitl.interrupt", + HITL_RESUME = "hitl.resume", + SUBAGENT_SPAWNED = "subagent.spawned", + SUBAGENT_COMPLETED = "subagent.completed" +} + +type AgentEvent = { + threadId: string; + ts: string; + seq?: number; +} & AgentEventData; + +type AgentEventData = + | { type: AgentEventType.THREAD_CREATED; data: { threadId: string } } + | { type: AgentEventType.REQUEST_ACCEPTED; data: { idempotencyKey: string } } + | { type: AgentEventType.RUN_STARTED; data: { runId: string } } + | { type: AgentEventType.RUN_TICK; data: { runId: string; step: number } } + | { + type: AgentEventType.RUN_PAUSED; + data: { + runId: string; + reason: "hitl" | "error" | "exhausted" | "subagent"; + }; + } + | { type: AgentEventType.RUN_RESUMED; data: { runId: string } } + | { type: AgentEventType.RUN_CANCELED; data: { runId: string } } + | { type: AgentEventType.AGENT_STARTED; data: Record } + | { type: AgentEventType.AGENT_COMPLETED; data: { result?: unknown } } + | { + type: AgentEventType.AGENT_ERROR; + data: { error: string; stack?: string }; + } + | { + type: AgentEventType.CHECKPOINT_SAVED; + data: { stateHash: string; size: number }; + } + | { type: AgentEventType.MODEL_STARTED; data: { model: string } } + | { type: AgentEventType.MODEL_DELTA; data: { delta: string } } + | { + type: AgentEventType.MODEL_COMPLETED; + data: { usage?: { inputTokens: number; outputTokens: number } }; + } + | { + type: AgentEventType.MIDDLEWARE_BEFORE_MODEL; + data: { middlewareName: string }; + } + | { + type: AgentEventType.MIDDLEWARE_AFTER_MODEL; + data: { middlewareName: string }; + } + | { + type: AgentEventType.TOOL_STARTED; + data: { toolName: string; args: unknown }; + } + | { + type: AgentEventType.TOOL_OUTPUT; + data: { toolName: string; output: unknown }; + } + | { + type: AgentEventType.TOOL_ERROR; + data: { toolName: string; error: string }; + } + | { + type: AgentEventType.HITL_INTERRUPT; + data: { proposedToolCalls: Array<{ toolName: string; args: unknown }> }; + } + | { + type: AgentEventType.HITL_RESUME; + data: { + approved: boolean; + modifiedToolCalls?: Array<{ toolName: string; args: unknown }>; + }; + } + | { type: AgentEventType.SUBAGENT_SPAWNED; data: { childThreadId: string } } + | { + type: AgentEventType.SUBAGENT_COMPLETED; + data: { childThreadId: string; result?: unknown }; + }; +``` + + + +`GET /agency/:agencyId/agent/:threadId/events` returns `{ events: AgentEvent[] }`. + +## SystemAgent HTTP API (per-thread) + +These are internal to the handler, but you might call them directly from another Worker if you have the DO stub. + +- `POST /register` – thread metadata registration + + Body: `ThreadMetadata` + +- `POST /invoke` – start/continue a run + + Body: `InvokeBody` + + Returns 202 with: + + ```json + { "runId": "uuid", "status": "running" | "paused" | "completed" | ... } + ``` + +- `POST /approve` – HITL approval with body `ApproveBody`: + + + + ```ts + type ApproveBody = { + approved: boolean; + modifiedToolCalls?: ToolCall[]; + }; + ``` + + + +- `POST /cancel` – cancel current run (also propagates to child subagents if any) + +- `GET /state` – returns `{ state: AgentState, run: RunState }` + +- `GET /events` – returns `{ events: AgentEvent[] }` + +- `POST /child_result` – internal; used by subagents to report back to parents + +You usually interact via the higher-level `/agency/...` routes instead of calling these directly. + +## Integration workflow + +Typical integration looks like: + +1. Configure an `AgentSystem` with: + - a default LLM model + - a set of tools and middleware + - a bundle of blueprints + +2. Export it and bind Durable Objects and KV in Wrangler + +3. Spin up an Agency via `POST /agencies` + +4. Spawn one or more SystemAgent threads via `POST /agency/:id/agents` + +5. Talk to threads via: + - `POST /invoke` to send messages + - `GET /state` to inspect current state + - `/ws` to stream events + +6. Let the built-in middleware handle: + - planning (todo lists) + - filesystem (files) + - subagents (`task`) + - optional HITL + +You can extend the system in two main ways: + +- **New tools**: use `defineTool` and `system.addTool` or register them from a middleware +- **New middleware**: use `defineMiddleware`, register it with `.use()`, and drive the agent via hooks (`beforeModel`, `onModelResult`, `onToolResult`, etc.) diff --git a/src/content/docs/agents/sys/architecture.mdx b/src/content/docs/agents/sys/architecture.mdx new file mode 100644 index 00000000000000..d5c79f6f938925 --- /dev/null +++ b/src/content/docs/agents/sys/architecture.mdx @@ -0,0 +1,479 @@ +--- +title: Architecture +pcx_content_type: concept +sidebar: + order: 2 +--- + +import { TypeScriptExample } from "~/components"; + +This document explains the mental model for how the Agent System components work together. + +## High-level components + +There are three main layers: + +1. **Worker handler** (exported `handler`) + - HTTP surface and dashboard + - Routes API calls to the right Durable Objects + - Provides the HTML UI at `GET /` + +2. **Agency** Durable Object (`Agency`) + - Control plane per agency + - Stores and overrides agent blueprints + - Tracks which agent threads exist in this agency + +3. **SystemAgent** Durable Object (`SystemAgent`) + - One instance per agent thread + - Runs the agent loop: + - stores messages, files, and todos + - calls the LLM provider + - executes tools and subagents + - emits events + +All the interesting work happens inside `SystemAgent` instances. + +## Agencies and agent threads + +### Agency DO + +The `Agency` Durable Object is responsible for: + +- **Blueprint management** in SQLite: + + ```sql + CREATE TABLE IF NOT EXISTS blueprints ( + name TEXT PRIMARY KEY, + data TEXT NOT NULL, -- JSON AgentBlueprint + updated_at INTEGER NOT NULL + ); + ``` + +- **Agent registry** per agency: + + ```sql + CREATE TABLE IF NOT EXISTS agents ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + created_at INTEGER NOT NULL, + metadata TEXT -- JSON + ); + ``` + +Each agency is one Durable Object instance, identified by a DO ID string. Agencies are tracked in Workers KV through `AGENCY_REGISTRY`. + +### Agency HTTP API + +The handler routes: + +- `GET /agencies` – list agencies from KV +- `POST /agencies` – create a new `Agency` DO instance and store its metadata + +Per agency: + +- `GET /agency/:agencyId/blueprints` + - Merges static blueprints (from `AgentSystem.addAgent`) and dynamic ones stored in the Agency's SQLite +- `POST /agency/:agencyId/blueprints` + - Write or override a blueprint in the Agency's SQLite + +Agents within an agency: + +- `GET /agency/:agencyId/agents` – list agent threads +- `POST /agency/:agencyId/agents` + - Creates a new thread: + - generates an ID + - writes to `agents` table + - spawns a `SystemAgent` DO + - calls `/register` on that DO with `ThreadMetadata` + +The `Agency` DO never calls the LLM; it just manages metadata and blueprints. + +## SystemAgent: per-thread runtime + +Each `SystemAgent` Durable Object is the actual agent brain for one thread. + +### Persistent info and run state + +Two key objects are persisted in KV via `PersistedObject`: + +- `info: Info` (thread metadata) + + + + ```ts + type Info = { + threadId: string; + agencyId: string; + createdAt: string; + request: ThreadRequestContext; + agentType: string; + parentInfo?: ParentInfo; // if this is a subagent + pendingToolCalls?: ToolCall[]; + blueprint?: AgentBlueprint; + }; + ``` + + + +- `runState: RunState` + + + + ```ts + type RunState = { + runId: string; + status: "idle" | "registered" | "running" | "paused" | "completed" | "canceled" | "error"; + step: number; + reason?: string; + nextAlarmAt?: number | null; + }; + ``` + + + +`PersistedObject` maps object properties to KV keys and adds mutation warnings if you try to mutate nested objects in place instead of reassigning them (to avoid accidental non-persisting changes). + +### Store: messages, events, files, subagents + +`Store` wraps the Durable Object SQLite and KV: + +#### Messages + +```sql +CREATE TABLE IF NOT EXISTS messages ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + role TEXT NOT NULL CHECK(role IN ('user','assistant','tool')), + content TEXT, + tool_call_id TEXT, + tool_calls_json TEXT, + created_at INTEGER NOT NULL +); +``` + +- `appendMessages()` records user, assistant, and tool messages +- `listMessages()` reconstructs `ChatMessage[]` for the agent +- `appendToolResult()` inserts a single tool message + +#### Events + +```sql +CREATE TABLE IF NOT EXISTS events ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + data_json TEXT NOT NULL, + ts TEXT NOT NULL +); +``` + +- `addEvent()` stores `AgentEvent`s and returns their `seq` +- `listEvents()` returns chronological agent events + +#### Files (virtual filesystem) + +```sql +CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + content BLOB, + updated_at INTEGER NOT NULL +); +``` + +- `mergeFiles()`, `listFiles()`, `readFile()`, `writeFile()`, `editFile()` + +#### Subagent bookkeeping + +```sql +CREATE TABLE IF NOT EXISTS waiting_subagents (...); +CREATE TABLE IF NOT EXISTS subagent_links (...); +``` + +This tracks: + +- which tool call spawned which child thread +- which children are still pending +- completion, cancel, and reports for each subagent + +#### Todos + +```sql +CREATE TABLE IF NOT EXISTS todos ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('pending','in_progress','completed')), + pos INTEGER NOT NULL DEFAULT 0, + updated_at INTEGER NOT NULL +); +``` + +Only created when `planning` is enabled. + +### Middleware pipeline + +`SystemAgent` exposes: + + + +```ts +abstract get middleware(): AgentMiddleware[]; +abstract get tools(): Record; +abstract get systemPrompt(): string; +abstract get model(): string; +abstract get config(): AgentConfig; +abstract get provider(): Provider; +``` + + + +Most of those are resolved dynamically from the `AgentSystem` configuration and the current agent blueprint. + +The run loop (`run()`) is: + +1. Emit `RUN_TICK` +2. If there are no pending tool calls and no waiting subagents: + - build `ModelRequest` via `ModelPlanBuilder` using all middleware + - call `provider.invoke(req)` + - let middleware react via `onModelResult` + - append the assistant's message + - capture any tool calls from the assistant into `info.pendingToolCalls` +3. If the agent paused (HITL or subagent), stop here +4. If the agent looks done (`isDone` checks last assistant message): + - mark `runState.status = "completed"` + - emit `AGENT_COMPLETED` + - if this is a subagent, call `/child_result` on the parent and stop +5. Otherwise, execute pending tools in batches (`executePendingTools`) +6. Reschedule via Durable Object alarm and repeat + +## AgentSystem and blueprints + +### AgentSystem + +`AgentSystem` is a config-time builder: + + + +```ts +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini", + provider?: Provider, + handlerOptions?: HandlerOptions +}) + .defaults() + .addTool(myTool, ["analytics"]) + .use(myMiddleware, ["my-tag"]) + .addAgent({ + name: "my-agent", + description: "Does things.", + prompt: "You are...", + tags: ["default", "analytics"], + config: { ... } // config consumed by middleware + }); + +const { SystemAgent, Agency, handler } = system.export(); +``` + + + +Internally: + +- Tools registered via `.addTool()` go into a `ToolRegistry` +- Middleware registered via `.use()` go into a `MiddlewareRegistry` +- Blueprints registered via `.addAgent()` go into an `agentRegistry` + +The generic type `AgentSystem` grows as you add middleware, so you can get type-safe `config` for your blueprints. + +### Blueprint tags and selection + +Each `AgentBlueprint` has `tags: string[]`. + +At runtime: + +- `SystemAgent.middleware` selects middlewares whose tags intersect with the blueprint's tags +- `SystemAgent.tools` selects tools whose tags intersect with the blueprint's tags, plus any tools that middleware registered dynamically with `ctx.registerTool()` + +For example: + +- `system.defaults()` registers `planning`, `filesystem`, `subagents` with tag `"default"` (plus their internal tags like `"planning"`, `"fs"`, `"subagents"`) +- The `"manager-agent"` blueprint has `tags: ["default"]`, so it gets all of them +- Custom analytics tools are added with tag `"security"` and the `"security-agent"` blueprint uses `tags: ["security"]`, so they show up only there + +## Subagents and the task tool + +The `subagents` middleware defines one special tool: `task`. + +### What task does + +When the model calls: + +```json +{ + "name": "task", + "args": { + "description": "Analyze top IPs in this window...", + "subagentType": "security-agent" + } +} +``` + +The handler: + +1. Generates a `token` and a `childThreadId` +2. Emits `SUBAGENT_SPAWNED` +3. Creates a stub for the child `SystemAgent` DO +4. Calls `POST /register` on the child with thread metadata and parent info +5. Calls `POST /invoke` on the child with a single user message = `description` +6. Registers the waiting subagent in the parent's SQLite (token, childThreadId, toolCallId) +7. Pauses the parent run: + ```ts + runState.status = "paused"; + runState.reason = "subagent"; + ``` +8. Returns `null` from the tool, so no immediate tool-result message is added + +The child runs independently until it completes. + +### How results come back + +When a subagent completes: + +1. `SystemAgent.run()` in the child sets status `completed` and emits `AGENT_COMPLETED` +2. Because the child has `parentInfo`, it also: + - computes the final text output (`final`) + - calls `POST /child_result` on the parent thread with `{ token, childThreadId, report: final }` + +On the parent: + +- `childResult()`: + - pops the waiting subagent via token and childThreadId + - appends a tool message on the waiting tool call ID with the `report` + - marks the subagent link as `completed` in SQLite + - emits `SUBAGENT_COMPLETED` + - if no waiting subagents remain: + - moves `runState.status` back to `"running"` + - emits `RUN_RESUMED` + - schedules the agent again + +From the agent's perspective, it looks like one tool call that took multiple ticks and eventually produced a single tool output message. + +## HITL (Human-in-the-loop) middleware + +The optional `hitl` middleware lets you pause runs when certain tools are proposed. + +Config shape: + + + +```ts +export type HitlConfig = { + hitl?: { + tools: string[]; // list of tool names that require approval + }; +}; +``` + + + +When enabled on a blueprint: + +1. `hitl.onModelResult()` inspects the last assistant message +2. If any tool call's name is in `config.hitl.tools`: + - it sets `runState.status = "paused"`, `reason = "hitl"` + - emits `RUN_PAUSED` + +The dashboard shows HITL buttons when there are pending tool calls: + +- `POST /agency/:agencyId/agent/:threadId/approve` with: + + + + ```ts + { + approved: boolean; + modifiedToolCalls?: ToolCall[]; // optional edits + } + ``` + + + +On approve: + +- The parent `SystemAgent.approve()` stores the (possibly modified) tool calls in `info.pendingToolCalls` +- Emits `HITL_RESUME` and `RUN_RESUMED` +- Schedules the run again + +## Observability and graph + +Every important action emits an `AgentEvent`: + +- `THREAD_CREATED`, `REQUEST_ACCEPTED` +- `RUN_*` (`started`, `tick`, `paused`, `resumed`, `canceled`) +- `AGENT_*` (`started`, `completed`, `error`) +- `MODEL_*` (`started`, `delta`, `completed`) +- `TOOL_*` (`started`, `output`, `error`) +- `SUBAGENT_*` (`spawned`, `completed`) +- `HITL_*` (`interrupt`, `resume`) +- `CHECKPOINT_SAVED` + +`SystemAgent.emit()`: + +- Writes the event to SQLite (`events` table) +- Broadcasts it as JSON over WebSocket to any connected clients + +The dashboard (`client.html`) subscribes to: + +- `GET /agency/:agencyId/agent/:threadId/events` for the history +- `/ws` for live events + +It then reconstructs a graph with: + +- Nodes: ticks, model calls, tools, done/error markers +- Edges: sequential flow and dashed spawn/completion edges between parent and child agents + +## Filesystem and planning + +Because `.defaults()` enables both `planning` and `filesystem`, typical agents get: + +- A todo list backed by SQLite (`todos` table) +- A virtual filesystem in SQLite (`files` table) + +These show up in `AgentState`: + +- `state.todos` – current todo list +- `state.files` – map of path to string content + +And as tools: + +- `write_todos(todos: Todo[])` +- `ls()` +- `read_file({ path, offset?, limit? })` +- `write_file({ path, content })` +- `edit_file({ path, oldString, newString, replaceAll? })` + +The `filesystem` middleware also enforces a simple safety rule: + +- You must use `read_file` on a path at least once before you can `edit_file` it + +It tracks this via a KV entry `lastReadPaths`. + +## Dashboard UI structure + +The HTML dashboard includes: + +- Left sidebar: + - Agencies dropdown + - Threads list (root threads and nested subagents) +- Main area: + - **Chat and Todos** tab: + - Chat transcript + - Run status (running, paused, completed, error) + - Message input + - HITL controls + - Todos panel + - Raw state JSON + - **Graph** tab: + - Cytoscape graph of events + - Zoom, fit, and export controls + - **Files** tab: + - List of files in `state.files` + - Markdown and code preview with line numbers and syntax highlighting + +All of this is built on the HTTP and WebSocket endpoints exposed by the `handler`. diff --git a/src/content/docs/agents/sys/getting-started.mdx b/src/content/docs/agents/sys/getting-started.mdx new file mode 100644 index 00000000000000..a43d6226d5dd67 --- /dev/null +++ b/src/content/docs/agents/sys/getting-started.mdx @@ -0,0 +1,324 @@ +--- +title: Getting Started +pcx_content_type: get-started +sidebar: + order: 1 +--- + +import { PackageManagers, TypeScriptExample, WranglerConfig } from "~/components"; + +This guide walks through integrating the Agent System runtime into a Cloudflare Worker, defining an agent, and communicating with it. + +## Add the runtime to your Worker + +In your Worker entrypoint (for example, `src/index.ts`): + + + +```ts +import { AgentSystem } from "agents/sys"; + +// Build an AgentSystem with a default LLM model +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini" +}) + .defaults() // planning + filesystem + subagents + .addAgent({ + name: "manager-agent", + description: "Main agent.", + prompt: "You are a helpful assistant.", + tags: ["default"] // selects which tools/middleware apply + }); + +// Export configured DO classes and HTTP handler +const { SystemAgent, Agency, handler } = system.export(); + +export { SystemAgent, Agency }; +export default handler; +``` + + + +What `.defaults()` does: + +- Registers `planning` middleware (todo list and `write_todos` tool) +- Registers `filesystem` middleware (`ls`, `read_file`, `write_file`, `edit_file`) +- Registers `subagents` middleware (`task` tool for child agents) + +All of these middlewares are tagged with `"default"`, so any agent blueprint that includes `"default"` in its `tags` will use them. + +## Bind Durable Objects and KV + +Configure the `SystemAgent` and `Agency` Durable Objects and the KV registry that stores agencies: + + + +```jsonc +{ + "name": "my-agent-worker", + "main": "src/index.ts", + "compatibility_date": "2025-03-14", + "compatibility_flags": ["nodejs_compat"], + + "durable_objects": { + "bindings": [ + { + "name": "SYSTEM_AGENT", + "class_name": "SystemAgent" + }, + { + "name": "AGENCY", + "class_name": "Agency" + } + ] + }, + + "kv_namespaces": [ + { + "binding": "AGENCY_REGISTRY", + "id": "my-agency-registry-kv-namespace-id" + } + ], + + "migrations": [ + { + "tag": "v1", + "new_sqlite_classes": ["SystemAgent", "Agency"] + } + ] +} +``` + + + +Run the migration once: + + + +```sh +wrangler deploy --migrations +``` + +## Configure an LLM provider + +By default, `AgentSystem` will use OpenAI's Chat Completions API via `makeOpenAI` if you do not pass a provider explicitly. + +You need: + +- `LLM_API_KEY` – secret (OpenAI API key or gateway key) +- Optional: `LLM_API_BASE` – custom base URL (proxy/gateway) + +Set them as Worker secrets: + +```sh +wrangler secret put LLM_API_KEY +wrangler secret put LLM_API_BASE # optional +``` + +Then `SystemAgent.provider` will: + +- Build an OpenAI provider using `makeOpenAI(apiKey, apiBase)` +- Wrap it to automatically emit `MODEL_STARTED` and `MODEL_COMPLETED` events + +If you want to use a custom provider, you can pass a `Provider` into `new AgentSystem({ provider })`. That provider is a simple interface: + + + +```ts +interface Provider { + invoke( + req: ModelRequest, + opts: { signal?: AbortSignal } + ): Promise; + stream( + req: ModelRequest, + onDelta: (chunk: string) => void + ): Promise; +} +``` + + + +## Run locally + +Start dev mode: + +```sh +wrangler dev +``` + +Then open the Worker URL (default `http://127.0.0.1:8787/`) in a browser. + +You should see the built-in Agent Dashboard (`client.html`), which is being served by the exported `handler`. + +## Create an Agency and an Agent + +In the dashboard: + +1. Use the **New Agency** button to create an agency. + + Under the hood this calls: + - `POST /agencies` creates a new `Agency` Durable Object instance + - The metadata (ID, name, createdAt) is stored in `AGENCY_REGISTRY` KV + +2. Select that agency from the **Agencies** dropdown. + +3. Select **New Thread** to create an agent thread, and pick your agent type + (for example, `"manager-agent"` from the example). + + Under the hood this calls: + - `POST /agency/:agencyId/agents` with `{ agentType }` + - The `Agency` DO: + - assigns a new `id` for the agent thread + - stores metadata in its local SQLite + - spawns a `SystemAgent` DO, calling `/register` with `ThreadMetadata` + +4. Select the thread in the sidebar and start sending messages from the chat panel. + +## Programmatic API + +If you do not want to use the dashboard, you can interact with the REST endpoints directly. + +### Create an agency + + + +```ts +const res = await fetch("https://your-worker.example.com/agencies", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ name: "Production" }) +}); + +const agency = await res.json(); // { id, name, createdAt } +``` + + + +### List blueprints + +Static blueprints from `AgentSystem.addAgent` plus any overrides stored inside the `Agency` DO: + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/blueprints` +); +const { blueprints } = await res.json(); +``` + + + +### Create a new agent thread + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/agents`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ agentType: "manager-agent" }) + } +); + +const thread: { + id: string; + agentType: string; + createdAt: string; + request: any; + agencyId: string; +} = await res.json(); +``` + + + +### Send a message + + + +```ts +await fetch( + `https://your-worker.example.com/agency/${agency.id}/agent/${thread.id}/invoke`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + messages: [{ role: "user", content: "Hello, what can you do?" }] + }) + } +); +// HTTP 202, run happens asynchronously inside the DO +``` + + + +### Poll state + + + +```ts +const res = await fetch( + `https://your-worker.example.com/agency/${agency.id}/agent/${thread.id}/state` +); +const { state, run } = await res.json(); + +/* +state: AgentState (messages, tools, thread, todos, files, subagents, ...) +run: RunState (runId, status, step, reason, nextAlarmAt) +*/ +``` + + + +### Listen to events with WebSocket + +`client.html` uses a WebSocket at: + +```text +/ws → /agency/:agencyId/agent/:agentId/ws +``` + +The `Agent` base class calls `broadcast()` on the Durable Object whenever an `AgentEvent` is emitted in `SystemAgent.emit`, so you can reuse that endpoint for a custom UI. + +## Security considerations + +`createHandler` supports a simple shared-secret auth mechanism: + + + +```ts +const system = new AgentSystem({ + defaultModel: "openai:gpt-4.1-mini", + handlerOptions: { + secret: "some-long-random-string" // clients must send X-SECRET header + } +}); +``` + + + +When `secret` is set: + +- All non-`GET /` requests must include `X-SECRET: ` or you get `401` +- This gates both the dashboard and the raw REST API + +Use this if your Worker is directly exposed to the public internet and you do not have another auth layer in front. + +## Example implementation + +The [`examples/deep/` folder](https://github.com/cloudflare/agents/tree/main/examples/deep) demonstrates a real setup: + +- `AgentSystem` with: + - A security analytics subagent blueprint (`security-agent`) + - A manager blueprint (`manager-agent`) that orchestrates subagents + +- Custom tools that communicate with the Cloudflare Analytics GraphQL API +- A prompt that explains how the manager agent should: + - Plan with todos + - Spawn analytics subagents via `task` + - Read and write `report.md` in the virtual filesystem + +This is a useful reference for more complex multi-agent patterns. diff --git a/src/content/docs/agents/sys/index.mdx b/src/content/docs/agents/sys/index.mdx new file mode 100644 index 00000000000000..6ac93aed3a2175 --- /dev/null +++ b/src/content/docs/agents/sys/index.mdx @@ -0,0 +1,14 @@ +--- +title: Agent System +pcx_content_type: overview +sidebar: + order: 5 + group: + hideIndex: true +--- + +import { DirectoryListing } from "~/components"; + +The Agent System is a framework for building stateful, multi-agent AI systems on Cloudflare Workers. It provides built-in middleware for planning, file management, and hierarchical agent orchestration. + +