diff --git a/.changeset/parallel-tool-calls.md b/.changeset/parallel-tool-calls.md new file mode 100644 index 00000000..0f147a71 --- /dev/null +++ b/.changeset/parallel-tool-calls.md @@ -0,0 +1,32 @@ +--- +"@perstack/core": patch +"@perstack/runtime": patch +"@perstack/api-client": patch +"@perstack/base": patch +"@perstack/tui": patch +"perstack": patch +--- + +Add parallel tool call support and mixed tool call handling + +Features: + +- Process all tool calls from a single LLM response instead of only the first one +- MCP tools execute in parallel using `Promise.all` +- Support mixed tool calls (MCP + Delegate + Interactive in same response) +- Process tools in priority order: MCP → Delegate → Interactive +- Preserve partial results across checkpoint boundaries + +Schema Changes: + +- `Step.toolCall` → `Step.toolCalls` (array) +- `Step.toolResult` → `Step.toolResults` (array) +- Add `Step.pendingToolCalls` for tracking unprocessed tool calls +- Add `Checkpoint.pendingToolCalls` and `Checkpoint.partialToolResults` for resume + +Event Changes: + +- `callTool` → `callTools` +- `resolveToolResult` → `resolveToolResults` +- Add `resumeToolCalls` and `finishAllToolCalls` events + diff --git a/AGENTS.md b/AGENTS.md index abe454c2..78f2c88d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -416,7 +416,7 @@ Key points: ## Testing - **Unit tests:** Vitest (`*.test.ts` files), run with `pnpm test` -- **E2E tests:** Manual testing by following `E2E.md` — agent should read and execute the procedures +- **E2E tests:** Vitest (`e2e/*.test.ts` files), run with `pnpm test:e2e` - **Coverage:** V8 provider, lcov output ### Unit Test Scope @@ -523,11 +523,11 @@ pnpm build # Build all packages ### E2E Testing (MANDATORY) -After build passes, run E2E tests by following `E2E.md`: +After build passes, run E2E tests: ```bash -pnpm build # Must build first -# Then run E2E tests as documented in E2E.md +pnpm build # Must build first +pnpm test:e2e # Run E2E tests ``` **E2E tests must pass before pushing.** This catches runtime issues that unit tests miss. @@ -599,5 +599,5 @@ pick = ["attemptCompletion", "think"] - [ ] `pnpm check-deps` passes - [ ] `pnpm reset && pnpm test` passes - [ ] `pnpm build` passes -- [ ] E2E tests pass (follow `E2E.md`) +- [ ] `pnpm test:e2e` passes - [ ] Versioning rules in `CONTRIBUTING.md` are followed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2c5ab59f..a39eb518 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,7 +78,8 @@ pnpm build git checkout -b feature/your-feature # ... edit code ... pnpm changeset -pnpm typecheck && pnpm test +pnpm typecheck && pnpm test && pnpm build +pnpm test:e2e # Run E2E tests git commit -m "feat: your changes" ``` @@ -195,6 +196,7 @@ pnpm changeset pnpm typecheck # Must pass pnpm test # Must pass pnpm build # Must succeed +pnpm test:e2e # Run E2E tests ``` ### 4. Commit and Push @@ -428,8 +430,13 @@ Perstack uses a two-stage release workflow powered by [changesets/action](https: - Updated `CHANGELOG.md` with PR links and author attribution **Stage 2: Publish** -1. Review and merge "Version Packages" PR -2. Release workflow automatically: +1. Review "Version Packages" PR +2. **Run E2E tests locally before merging:** + ```bash + pnpm build && pnpm test:e2e + ``` +3. Merge "Version Packages" PR +4. Release workflow automatically: - Publishes packages to npm - Creates git tags - Creates GitHub Releases @@ -571,6 +578,7 @@ Before requesting review, ensure: - [ ] Changeset created with appropriate version bump - [ ] All tests pass (`pnpm test`) - [ ] Types check across all packages (`pnpm typecheck`) +- [ ] E2E tests pass (`pnpm test:e2e`) - [ ] Documentation updated (README, JSDoc, CHANGELOG via changeset) - [ ] Migration guide included (for breaking changes) - [ ] No unintended version sync issues diff --git a/E2E.md b/E2E.md deleted file mode 100644 index 5cc4f0bb..00000000 --- a/E2E.md +++ /dev/null @@ -1,113 +0,0 @@ -# E2E Testing Guide - -Manual E2E testing procedures for perstack CLI. - -## Prerequisites - -```bash -pnpm build -``` - -## Test Commands - -Use `npx tsx` or `bun` to run the CLI: - -```bash -CLI="npx tsx packages/perstack/dist/bin/cli.js" -``` - -### 1. Help and Version - -```bash -$CLI --help -$CLI --version -$CLI run --help -$CLI publish --help -$CLI unpublish --help -$CLI tag --help -$CLI status --help -``` - -**Expected**: All commands display help/version without errors. - -### 2. Publish Dry Run - -```bash -# Valid expert -$CLI publish tic-tac-toe --dry-run - -# Invalid expert -$CLI publish nonexistent --dry-run -``` - -**Expected**: -- Valid: Outputs JSON payload -- Invalid: Error message with available experts, exit code 1 - -### 3. Argument Validation - -```bash -# Missing required args -$CLI run -$CLI run expertOnly - -# Invalid format (missing version) -$CLI unpublish no-version --force -$CLI tag no-version tag1 -$CLI status no-version available - -# Invalid status value -$CLI status expert@1.0.0 invalid-status - -# Missing tags -$CLI tag expert@1.0.0 -``` - -**Expected**: All return appropriate error messages with exit code 1. - -### 4. Config File Handling - -```bash -# Nonexistent config -$CLI publish tic-tac-toe --dry-run --config nonexistent.toml - -# No config in directory -cd /tmp && $CLI publish tic-tac-toe --dry-run -``` - -**Expected**: Error message indicating config file not found, exit code 1. - -### 5. Run Command Error Handling - -```bash -# Nonexistent expert -$CLI run nonexistent-expert "test query" -``` - -**Expected**: Error message with exit code 1. - -## Quick Test Script - -```bash -#!/bin/bash -set -e -CLI="npx tsx packages/perstack/dist/bin/cli.js" - -echo "=== Help Commands ===" -$CLI --help > /dev/null && echo "OK: --help" -$CLI --version > /dev/null && echo "OK: --version" - -echo "=== Publish Dry Run ===" -$CLI publish tic-tac-toe --dry-run > /dev/null && echo "OK: publish dry-run" -$CLI publish nonexistent --dry-run 2>&1 && exit 1 || echo "OK: publish invalid expert" - -echo "=== Argument Validation ===" -$CLI run 2>&1 && exit 1 || echo "OK: run missing args" -$CLI unpublish no-version --force 2>&1 && exit 1 || echo "OK: unpublish invalid format" -$CLI status expert@1.0.0 invalid-status 2>&1 && exit 1 || echo "OK: status invalid value" - -echo "=== Config Handling ===" -$CLI publish tic-tac-toe --dry-run --config nonexistent.toml 2>&1 && exit 1 || echo "OK: nonexistent config" - -echo "All tests passed!" -``` diff --git a/docs/content/making-experts/testing.mdx b/docs/content/making-experts/testing.mdx index 636e612b..6497df15 100644 --- a/docs/content/making-experts/testing.mdx +++ b/docs/content/making-experts/testing.mdx @@ -70,8 +70,8 @@ import { run } from "@perstack/runtime" const result = await run(params, { // Mock eventListener for assertions eventListener: (event) => { - if (event.type === "callTool") { - expect(event.toolCall.name).toBe("expectedTool") + if (event.type === "callTools") { + expect(event.toolCalls[0].toolName).toBe("expectedTool") } } }) diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 00000000..1d430bd5 --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,120 @@ +# E2E Tests + +End-to-end tests for Perstack CLI and runtime. + +## Prerequisites + +```bash +pnpm build +``` + +## Running Tests + +```bash +# Run all E2E tests (parallel execution) +pnpm test:e2e + +# Run specific test file +pnpm test:e2e -- run.test.ts + +# Run tests matching pattern +pnpm test:e2e -- --testNamePattern "publish" +``` + +## Test Structure + +``` +e2e/ +├── lib/ # Test utilities +│ ├── runner.ts # CLI and Expert execution +│ ├── event-parser.ts # Runtime event parsing +│ └── assertions.ts # Custom assertions +├── experts/ # Expert definitions for tests +│ ├── mixed-tools.toml # MCP + Delegate + Interactive +│ ├── parallel-mcp.toml # Parallel MCP calls +│ ├── delegate-chain.toml # Delegation chain +│ └── continue-resume.toml # Continue/resume functionality +├── run.test.ts # CLI run command +├── publish.test.ts # CLI publish command +├── unpublish.test.ts # CLI unpublish command +├── tag.test.ts # CLI tag command +├── status.test.ts # CLI status command +├── mixed-tools.test.ts # Mixed tool calls (MCP + Delegate + Interactive) +├── parallel-mcp.test.ts # Parallel MCP tool execution +├── delegate-chain.test.ts # Expert delegation chain +└── continue-resume.test.ts # --continue-run and --resume-from +``` + +## Test Categories + +### CLI Commands + +Tests for CLI argument validation and error handling. + +| File | Tests | Coverage | +|------|-------|----------| +| run.test.ts | 4 | Missing args, nonexistent expert, invalid config | +| publish.test.ts | 4 | dry-run success, nonexistent expert, config errors | +| unpublish.test.ts | 2 | Missing version, missing --force | +| tag.test.ts | 2 | Missing version, missing tags | +| status.test.ts | 3 | Missing version/status, invalid status | + +### Runtime Features + +Tests for parallel tool calls, delegation, and state management. + +| File | Tests | Coverage | +|------|-------|----------| +| mixed-tools.test.ts | 4 | MCP + Delegate + Interactive in single response | +| parallel-mcp.test.ts | 3 | Parallel MCP tool execution | +| delegate-chain.test.ts | 3 | Multi-level delegation | +| continue-resume.test.ts | 4 | --continue-run, --resume-from | + +## Writing Tests + +### CLI Command Tests + +```typescript +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI command", () => { + it("should fail with invalid args", async () => { + const result = await runCli(["command", "invalid-arg"]) + expect(result.exitCode).toBe(1) + }) +}) +``` + +### Runtime Tests + +```typescript +import { beforeAll, describe, expect, it } from "vitest" +import { assertEventSequenceContains } from "./lib/assertions.js" +import { type RunResult, runExpert } from "./lib/runner.js" + +describe("Runtime feature", () => { + let result: RunResult + + beforeAll(async () => { + result = await runExpert("expert-key", "query", { + configPath: "./e2e/experts/your-expert.toml", + timeout: 180000, + }) + }, 200000) + + it("should emit expected events", () => { + expect( + assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed, + ).toBe(true) + }) +}) +``` + +## Notes + +- Tests run in parallel via vitest +- Runtime tests require API keys (set in `.env.local`) +- TUI-based commands (`start`) are excluded from E2E tests +- API-calling tests (actual publish, unpublish) require registry access and are not included + diff --git a/e2e/continue-resume.test.ts b/e2e/continue-resume.test.ts new file mode 100644 index 00000000..a99e85bb --- /dev/null +++ b/e2e/continue-resume.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from "vitest" +import { assertEventSequenceContains } from "./lib/assertions.js" +import { filterEventsByType, getEventSequence } from "./lib/event-parser.js" +import { runExpert } from "./lib/runner.js" + +const CONFIG_PATH = "./e2e/experts/continue-resume.toml" +const TIMEOUT = 180000 + +describe("Continue and Resume From Checkpoint", () => { + it("should stop at interactive tool and get run ID", async () => { + const result = await runExpert("e2e-continue", "Test continue/resume functionality", { + configPath: CONFIG_PATH, + timeout: TIMEOUT, + }) + expect( + assertEventSequenceContains(result.events, [ + "startRun", + "callInteractiveTool", + "stopRunByInteractiveTool", + ]).passed, + ).toBe(true) + expect(result.runId).not.toBeNull() + }, 200000) + + it("should continue run with --continue-run", async () => { + const initialResult = await runExpert("e2e-continue", "Test continue/resume functionality", { + configPath: CONFIG_PATH, + timeout: TIMEOUT, + }) + expect(initialResult.runId).not.toBeNull() + const continueResult = await runExpert("e2e-continue", "User confirmed the test", { + configPath: CONFIG_PATH, + continueRunId: initialResult.runId!, + isInteractiveResult: true, + timeout: TIMEOUT, + }) + expect(assertEventSequenceContains(continueResult.events, ["startRun"]).passed).toBe(true) + expect( + continueResult.events.some( + (e) => + e.type === "startRun" && + (e as { initialCheckpoint?: { status?: string } }).initialCheckpoint?.status === + "stoppedByInteractiveTool", + ), + ).toBe(true) + }, 400000) + + it("should complete after continue", async () => { + const initialResult = await runExpert("e2e-continue", "Test continue/resume functionality", { + configPath: CONFIG_PATH, + timeout: TIMEOUT, + }) + expect(initialResult.runId).not.toBeNull() + const continueResult = await runExpert("e2e-continue", "User confirmed the test", { + configPath: CONFIG_PATH, + continueRunId: initialResult.runId!, + isInteractiveResult: true, + timeout: TIMEOUT, + }) + expect(getEventSequence(continueResult.events)).toContain("completeRun") + }, 400000) + + it("should capture checkpoint for resume", async () => { + const result = await runExpert("e2e-resume", "Test continue/resume functionality", { + configPath: CONFIG_PATH, + timeout: TIMEOUT, + }) + const stopEvent = filterEventsByType(result.events, "stopRunByInteractiveTool")[0] + expect(stopEvent).toBeDefined() + expect((stopEvent as { checkpoint?: { id?: string } }).checkpoint?.id).toBeDefined() + expect(result.runId).not.toBeNull() + }, 200000) +}) diff --git a/e2e/delegate-chain.test.ts b/e2e/delegate-chain.test.ts new file mode 100644 index 00000000..7aa05d03 --- /dev/null +++ b/e2e/delegate-chain.test.ts @@ -0,0 +1,33 @@ +import { beforeAll, describe, expect, it } from "vitest" +import { assertEventSequenceContains } from "./lib/assertions.js" +import { getEventSequence } from "./lib/event-parser.js" +import { type RunResult, runExpert } from "./lib/runner.js" + +describe("Delegate Chain", () => { + let result: RunResult + + beforeAll(async () => { + result = await runExpert( + "e2e-delegate-chain", + "Test delegate chain: process this request through multiple levels", + { configPath: "./e2e/experts/delegate-chain.toml", timeout: 180000 }, + ) + }, 200000) + + it("should delegate through chain", () => { + expect( + assertEventSequenceContains(result.events, ["startRun", "callDelegate", "stopRunByDelegate"]).passed, + ).toBe(true) + }) + + it("should have multiple delegation levels", () => { + const sequence = getEventSequence(result.events) + expect(sequence.filter((e) => e === "callDelegate").length).toBeGreaterThanOrEqual(2) + expect(sequence.filter((e) => e === "stopRunByDelegate").length).toBeGreaterThanOrEqual(2) + }) + + it("should return through chain and complete", () => { + const sequence = getEventSequence(result.events) + expect(sequence.filter((e) => e === "completeRun").length).toBeGreaterThanOrEqual(3) + }) +}) diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml new file mode 100644 index 00000000..d08151ca --- /dev/null +++ b/e2e/experts/continue-resume.toml @@ -0,0 +1,64 @@ +model = "claude-sonnet-4-5" +temperature = 0.3 + +[provider] +providerName = "anthropic" + +envPath = [".env", ".env.local"] + +[experts."e2e-continue"] +version = "1.0.0" +description = "E2E test expert for continue functionality" +instruction = """ +You are an E2E test expert that tests run continuation. + +When given a query: +1. First, ask the user for confirmation using askUser +2. After receiving user input, summarize and call attemptCompletion + +This tests the --continue and --continue-run functionality. +""" + +[experts."e2e-continue".skills."user-input"] +type = "interactiveSkill" +description = "User interaction" + +[experts."e2e-continue".skills."user-input".tools.askUser] +name = "askUser" +description = "Ask the user a question" +inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}' + +[experts."e2e-continue".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think"] + +[experts."e2e-resume"] +version = "1.0.0" +description = "E2E test expert for resume-from functionality" +instruction = """ +You are an E2E test expert that tests checkpoint resumption. + +When given a query: +1. Call think tool to process the query +2. Ask the user for confirmation using askUser +3. After receiving user input, summarize and call attemptCompletion + +This tests the --resume-from functionality with specific checkpoint. +""" + +[experts."e2e-resume".skills."user-input"] +type = "interactiveSkill" +description = "User interaction" + +[experts."e2e-resume".skills."user-input".tools.askUser] +name = "askUser" +description = "Ask the user a question" +inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}' + +[experts."e2e-resume".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think"] diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml new file mode 100644 index 00000000..2d6084ee --- /dev/null +++ b/e2e/experts/delegate-chain.toml @@ -0,0 +1,55 @@ +model = "claude-sonnet-4-5" +temperature = 0.3 + +[provider] +providerName = "anthropic" + +envPath = [".env", ".env.local"] + +[experts."e2e-delegate-chain"] +version = "1.0.0" +description = "E2E test expert for delegate chain" +instruction = """ +You are an E2E test expert that tests delegate chain execution. + +When given a test query, delegate to "e2e-delegate-level1" to process the request. +Wait for the delegation result and summarize it, then call attemptCompletion. +""" +delegates = ["e2e-delegate-level1"] + +[experts."e2e-delegate-chain".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think"] + +[experts."e2e-delegate-level1"] +version = "1.0.0" +description = "First level delegate expert" +instruction = """ +You are a level 1 delegate expert. +When given a query, delegate to "e2e-delegate-level2" for further processing. +Return the combined result. +""" +delegates = ["e2e-delegate-level2"] + +[experts."e2e-delegate-level1".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion"] + +[experts."e2e-delegate-level2"] +version = "1.0.0" +description = "Second level delegate expert" +instruction = """ +You are a level 2 delegate expert. +When given a query, respond with "Level 2 processing complete: [query summary]". +Call attemptCompletion with your response. +""" + +[experts."e2e-delegate-level2".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion"] diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml new file mode 100644 index 00000000..2a6122e2 --- /dev/null +++ b/e2e/experts/mixed-tools.toml @@ -0,0 +1,59 @@ +model = "claude-sonnet-4-5" +temperature = 0.3 + +[provider] +providerName = "anthropic" + +envPath = [".env", ".env.local"] + +[experts."e2e-mixed-tools"] +version = "1.0.0" +description = "E2E test expert for mixed tool calls (MCP + Delegate + Interactive)" +instruction = """ +You are an E2E test expert that tests parallel tool execution with mixed tool types. + +When given a test query, you MUST call ALL THREE tools in a SINGLE response: +1. web_search_exa - Search for information (MCP tool) +2. e2e-helper - Delegate to helper expert (Delegate tool) +3. askUser - Ask user for input (Interactive tool) + +CRITICAL: Make ALL THREE tool calls in ONE response. +The runtime will process them in order: MCP first, then Delegate, then Interactive. +""" +delegates = ["e2e-helper"] + +[experts."e2e-mixed-tools".skills."exa"] +type = "mcpStdioSkill" +description = "Web search" +command = "npx" +args = ["-y", "exa-mcp-server"] +requiredEnv = ["EXA_API_KEY"] + +[experts."e2e-mixed-tools".skills."user-input"] +type = "interactiveSkill" +description = "User interaction" + +[experts."e2e-mixed-tools".skills."user-input".tools.askUser] +name = "askUser" +description = "Ask the user a question" +inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}' + +[experts."e2e-mixed-tools".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think"] + +[experts."e2e-helper"] +version = "1.0.0" +description = "E2E test helper expert" +instruction = """ +You are a helper expert for E2E testing. +When given a query, respond briefly with "Helper analysis complete." +""" + +[experts."e2e-helper".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion"] diff --git a/e2e/experts/parallel-mcp.toml b/e2e/experts/parallel-mcp.toml new file mode 100644 index 00000000..1a07a825 --- /dev/null +++ b/e2e/experts/parallel-mcp.toml @@ -0,0 +1,34 @@ +model = "claude-sonnet-4-5" +temperature = 0.3 + +[provider] +providerName = "anthropic" + +envPath = [".env", ".env.local"] + +[experts."e2e-parallel-mcp"] +version = "1.0.0" +description = "E2E test expert for parallel MCP tool calls" +instruction = """ +You are an E2E test expert that tests parallel MCP tool execution. + +When given a test query, you MUST call MULTIPLE MCP tools in a SINGLE response: +1. web_search_exa - Search for the topic +2. web_search_exa - Search for related information (different query) + +CRITICAL: Make BOTH tool calls in ONE response to test parallel MCP execution. +After getting results, summarize briefly and call attemptCompletion. +""" + +[experts."e2e-parallel-mcp".skills."exa"] +type = "mcpStdioSkill" +description = "Web search" +command = "npx" +args = ["-y", "exa-mcp-server"] +requiredEnv = ["EXA_API_KEY"] + +[experts."e2e-parallel-mcp".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think"] diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml new file mode 100644 index 00000000..25914dcb --- /dev/null +++ b/e2e/experts/special-tools.toml @@ -0,0 +1,36 @@ +model = "claude-sonnet-4-5" +temperature = 0.3 + +[provider] +providerName = "anthropic" + +envPath = [".env", ".env.local"] + +[experts."e2e-special-tools"] +version = "1.0.0" +description = "E2E test expert for special tool parallel execution" +instruction = """ +You are an E2E test expert that tests parallel execution of special tools with regular MCP tools. + +When given a test query, you MUST call ALL of these tools in a SINGLE response: +1. think - Think about the approach +2. readPdfFile - Read the PDF at e2e/fixtures/test.pdf +3. readImageFile - Read the image at e2e/fixtures/test.gif +4. web_search_exa - Search for related information + +CRITICAL: Make ALL 4 tool calls in ONE response to test that special tools (think, readPdfFile, readImageFile) execute in parallel with regular MCP tools. +After getting results, summarize briefly what you found and call attemptCompletion. +""" + +[experts."e2e-special-tools".skills."exa"] +type = "mcpStdioSkill" +description = "Web search" +command = "npx" +args = ["-y", "exa-mcp-server"] +requiredEnv = ["EXA_API_KEY"] + +[experts."e2e-special-tools".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["attemptCompletion", "think", "readPdfFile", "readImageFile"] diff --git a/e2e/fixtures/test.gif b/e2e/fixtures/test.gif new file mode 100644 index 00000000..28e7c216 Binary files /dev/null and b/e2e/fixtures/test.gif differ diff --git a/e2e/fixtures/test.pdf b/e2e/fixtures/test.pdf new file mode 100644 index 00000000..fc2928d7 Binary files /dev/null and b/e2e/fixtures/test.pdf differ diff --git a/e2e/lib/assertions.ts b/e2e/lib/assertions.ts new file mode 100644 index 00000000..9a8aa7c9 --- /dev/null +++ b/e2e/lib/assertions.ts @@ -0,0 +1,132 @@ +import { + type CheckpointState, + extractCheckpointState, + extractToolCalls, + filterEventsByType, + getEventSequence, + type ParsedEvent, +} from "./event-parser.js" + +export type AssertionResult = { + passed: boolean + message: string + details?: unknown +} + +export function assertEventSequenceContains( + events: ParsedEvent[], + expectedSubsequence: string[], +): AssertionResult { + const actual = getEventSequence(events) + let matchIndex = 0 + for (const eventType of actual) { + if (eventType === expectedSubsequence[matchIndex]) { + matchIndex++ + if (matchIndex === expectedSubsequence.length) break + } + } + const passed = matchIndex === expectedSubsequence.length + return { + passed, + message: passed + ? `Event sequence contains: ${expectedSubsequence.join(" → ")}` + : `Event sequence missing expected subsequence`, + details: passed ? undefined : { expected: expectedSubsequence, actual }, + } +} + +export function assertToolCallCount( + events: ParsedEvent[], + eventType: "callTools", + expectedCount: number, +): AssertionResult { + const callToolsEvents = filterEventsByType(events, eventType) + if (callToolsEvents.length === 0) { + return { passed: false, message: `No ${eventType} events found` } + } + const matchingEvent = callToolsEvents.find((e) => extractToolCalls(e).length === expectedCount) + if (matchingEvent) { + return { passed: true, message: `Tool call count matches: ${expectedCount}` } + } + const allCounts = callToolsEvents.map((e) => extractToolCalls(e).length) + return { + passed: false, + message: `No ${eventType} event with ${expectedCount} tool calls found`, + details: { foundCounts: allCounts }, + } +} + +export function assertCheckpointState( + events: ParsedEvent[], + eventType: string, + expectedState: Partial, +): AssertionResult { + const targetEvent = events.find((e) => e.type === eventType) + if (!targetEvent) { + return { passed: false, message: `Event ${eventType} not found` } + } + const state = extractCheckpointState(targetEvent) + if (!state) { + return { passed: false, message: `No checkpoint in ${eventType} event` } + } + type Check = { key: string; passed: boolean; expected: unknown; actual: unknown } + const checks: Check[] = [] + if (expectedState.status !== undefined) { + checks.push({ + key: "status", + passed: state.status === expectedState.status, + expected: expectedState.status, + actual: state.status, + }) + } + if (expectedState.pendingToolCalls !== undefined) { + checks.push({ + key: "pendingToolCalls.length", + passed: state.pendingToolCalls.length === expectedState.pendingToolCalls.length, + expected: expectedState.pendingToolCalls.length, + actual: state.pendingToolCalls.length, + }) + } + if (expectedState.partialToolResults !== undefined) { + checks.push({ + key: "partialToolResults.length", + passed: state.partialToolResults.length === expectedState.partialToolResults.length, + expected: expectedState.partialToolResults.length, + actual: state.partialToolResults.length, + }) + } + const allPassed = checks.every((c) => c.passed) + return { + passed: allPassed, + message: allPassed + ? `Checkpoint state matches for ${eventType}` + : `Checkpoint state mismatch for ${eventType}`, + details: allPassed + ? undefined + : { failedChecks: checks.filter((c) => !c.passed), actualState: state }, + } +} + +export function assertPartialResultsContain( + events: ParsedEvent[], + eventType: string, + expectedToolNames: string[], +): AssertionResult { + const targetEvent = events.find((e) => e.type === eventType) + if (!targetEvent) { + return { passed: false, message: `Event ${eventType} not found` } + } + const state = extractCheckpointState(targetEvent) + if (!state) { + return { passed: false, message: `No checkpoint in ${eventType} event` } + } + const actualToolNames = state.partialToolResults.map((tr) => tr.toolName) + const allFound = expectedToolNames.every((name) => actualToolNames.includes(name)) + return { + passed: allFound, + message: allFound + ? `Partial results contain: ${expectedToolNames.join(", ")}` + : `Missing partial results`, + details: allFound ? undefined : { expected: expectedToolNames, actual: actualToolNames }, + } +} diff --git a/e2e/lib/event-parser.ts b/e2e/lib/event-parser.ts new file mode 100644 index 00000000..f84fa308 --- /dev/null +++ b/e2e/lib/event-parser.ts @@ -0,0 +1,85 @@ +import type { RunEvent } from "@perstack/core" + +export type ParsedEvent = RunEvent & { raw: string } + +export type ToolCallInfo = { + id: string + skillName: string + toolName: string +} + +export type CheckpointState = { + status: string + pendingToolCalls: ToolCallInfo[] + partialToolResults: ToolCallInfo[] +} + +const RELEVANT_EVENT_TYPES = [ + "startRun", + "callTools", + "callDelegate", + "callInteractiveTool", + "stopRunByDelegate", + "stopRunByInteractiveTool", + "resumeToolCalls", + "finishAllToolCalls", + "completeRun", + "resolveToolResults", +] as const + +export function parseEvents(output: string): ParsedEvent[] { + const events: ParsedEvent[] = [] + for (const line of output.split("\n")) { + try { + const data = JSON.parse(line) as RunEvent + if (data.type) { + events.push({ ...data, raw: line }) + } + } catch { + // skip + } + } + return events +} + +export function filterEventsByType( + events: ParsedEvent[], + type: T, +): Extract[] { + return events.filter((e) => e.type === type) as Extract[] +} + +export function getEventSequence(events: ParsedEvent[]): string[] { + return events.filter((e) => RELEVANT_EVENT_TYPES.includes(e.type as never)).map((e) => e.type) +} + +export function extractToolCalls(event: ParsedEvent): ToolCallInfo[] { + if (event.type === "callTools") { + return (event.toolCalls ?? []).map((tc) => ({ + id: tc.id, + skillName: tc.skillName, + toolName: tc.toolName, + })) + } + return [] +} + +export function extractCheckpointState(event: ParsedEvent): CheckpointState | null { + const checkpoint = (event as { checkpoint?: Record }).checkpoint + if (!checkpoint) return null + const pending = (checkpoint.pendingToolCalls ?? []) as ToolCallInfo[] + const partial = (checkpoint.partialToolResults ?? []) as ToolCallInfo[] + return { + status: checkpoint.status as string, + pendingToolCalls: pending.map((tc) => ({ + id: tc.id, + skillName: tc.skillName, + toolName: tc.toolName, + })), + partialToolResults: partial.map((tr) => ({ + id: tr.id, + skillName: tr.skillName, + toolName: tr.toolName, + })), + } +} diff --git a/e2e/lib/runner.ts b/e2e/lib/runner.ts new file mode 100644 index 00000000..2d9c70df --- /dev/null +++ b/e2e/lib/runner.ts @@ -0,0 +1,108 @@ +import { spawn } from "node:child_process" +import { type ParsedEvent, parseEvents } from "./event-parser.js" + +export type CommandResult = { + stdout: string + stderr: string + exitCode: number +} + +export type RunResult = CommandResult & { + events: ParsedEvent[] + runId: string | null +} + +export async function runCli( + args: string[], + options?: { timeout?: number; cwd?: string }, +): Promise { + const timeout = options?.timeout ?? 30000 + const cwd = options?.cwd ?? process.cwd() + return new Promise((resolve, reject) => { + let stdout = "" + let stderr = "" + const proc = spawn("npx", ["tsx", "./packages/perstack/bin/cli.ts", ...args], { + cwd, + env: { ...process.env }, + stdio: ["pipe", "pipe", "pipe"], + }) + const timer = setTimeout(() => { + proc.kill("SIGTERM") + reject(new Error(`Timeout after ${timeout}ms`)) + }, timeout) + proc.stdout.on("data", (data) => { + stdout += data.toString() + }) + proc.stderr.on("data", (data) => { + stderr += data.toString() + }) + proc.on("close", (code) => { + clearTimeout(timer) + resolve({ stdout, stderr, exitCode: code ?? 0 }) + }) + proc.on("error", (err) => { + clearTimeout(timer) + reject(err) + }) + }) +} + +export async function runExpert( + expertKey: string, + query: string, + options?: { + configPath?: string + timeout?: number + continueRunId?: string + isInteractiveResult?: boolean + }, +): Promise { + const timeout = options?.timeout ?? 120000 + const args = ["run"] + if (options?.configPath) { + args.push("--config", options.configPath) + } + if (options?.continueRunId) { + args.push("--continue-run", options.continueRunId) + } + if (options?.isInteractiveResult) { + args.push("-i") + } + args.push(expertKey, query) + return new Promise((resolve, reject) => { + let stdout = "" + let stderr = "" + const proc = spawn("npx", ["tsx", "./packages/perstack/bin/cli.ts", ...args], { + cwd: process.cwd(), + env: { ...process.env }, + stdio: ["pipe", "pipe", "pipe"], + }) + const timer = setTimeout(() => { + proc.kill("SIGTERM") + reject(new Error(`Timeout after ${timeout}ms`)) + }, timeout) + proc.stdout.on("data", (data) => { + stdout += data.toString() + }) + proc.stderr.on("data", (data) => { + stderr += data.toString() + }) + proc.on("close", (code) => { + clearTimeout(timer) + const events = parseEvents(stdout) + const startRunEvent = events.find((e) => e.type === "startRun") + const runId = startRunEvent ? ((startRunEvent as { runId?: string }).runId ?? null) : null + resolve({ + stdout, + stderr, + events, + exitCode: code ?? 0, + runId, + }) + }) + proc.on("error", (err) => { + clearTimeout(timer) + reject(err) + }) + }) +} diff --git a/e2e/mixed-tools.test.ts b/e2e/mixed-tools.test.ts new file mode 100644 index 00000000..fd737e2c --- /dev/null +++ b/e2e/mixed-tools.test.ts @@ -0,0 +1,71 @@ +import { beforeAll, describe, expect, it } from "vitest" +import { + assertCheckpointState, + assertEventSequenceContains, + assertPartialResultsContain, + assertToolCallCount, +} from "./lib/assertions.js" +import type { ToolCallInfo } from "./lib/event-parser.js" +import { type RunResult, runExpert } from "./lib/runner.js" + +describe("Mixed Tool Calls (MCP + Delegate + Interactive)", () => { + let result: RunResult + + beforeAll(async () => { + result = await runExpert( + "e2e-mixed-tools", + "Test mixed tool calls: search, delegate, and ask user", + { + configPath: "./e2e/experts/mixed-tools.toml", + timeout: 180000, + }, + ) + }, 200000) + + it("should generate 3 tool calls in priority order", () => { + expect(assertToolCallCount(result.events, "callTools", 3).passed).toBe(true) + expect( + assertEventSequenceContains(result.events, [ + "startRun", + "callTools", + "callDelegate", + "stopRunByDelegate", + ]).passed, + ).toBe(true) + }) + + it("should collect MCP result before delegate", () => { + const checkResult = assertCheckpointState(result.events, "stopRunByDelegate", { + status: "stoppedByDelegate", + partialToolResults: [{}] as ToolCallInfo[], + pendingToolCalls: [{}, {}] as ToolCallInfo[], + }) + expect(checkResult.passed).toBe(true) + expect( + assertPartialResultsContain(result.events, "stopRunByDelegate", ["web_search_exa"]).passed, + ).toBe(true) + }) + + it("should resume with delegate result and process interactive", () => { + expect( + assertEventSequenceContains(result.events, [ + "stopRunByDelegate", + "startRun", + "completeRun", + "startRun", + "resumeToolCalls", + "callInteractiveTool", + "stopRunByInteractiveTool", + ]).passed, + ).toBe(true) + }) + + it("should have all partial results after interactive stop", () => { + const checkResult = assertCheckpointState(result.events, "stopRunByInteractiveTool", { + status: "stoppedByInteractiveTool", + partialToolResults: [{}, {}] as ToolCallInfo[], + pendingToolCalls: [{}] as ToolCallInfo[], + }) + expect(checkResult.passed).toBe(true) + }) +}) diff --git a/e2e/parallel-mcp.test.ts b/e2e/parallel-mcp.test.ts new file mode 100644 index 00000000..32e56571 --- /dev/null +++ b/e2e/parallel-mcp.test.ts @@ -0,0 +1,41 @@ +import { beforeAll, describe, expect, it } from "vitest" +import { assertEventSequenceContains, assertToolCallCount } from "./lib/assertions.js" +import { filterEventsByType } from "./lib/event-parser.js" +import { type RunResult, runExpert } from "./lib/runner.js" + +describe("Parallel MCP Tool Calls", () => { + let result: RunResult + + beforeAll(async () => { + result = await runExpert( + "e2e-parallel-mcp", + "Test parallel MCP: search TypeScript and JavaScript", + { + configPath: "./e2e/experts/parallel-mcp.toml", + timeout: 180000, + }, + ) + }, 200000) + + it("should execute multiple MCP tools in parallel", () => { + expect(assertToolCallCount(result.events, "callTools", 2).passed).toBe(true) + expect( + assertEventSequenceContains(result.events, ["startRun", "callTools", "resolveToolResults"]) + .passed, + ).toBe(true) + }) + + it("should resolve all MCP results before next step", () => { + const resolveEvents = filterEventsByType(result.events, "resolveToolResults") + const hasMultipleResults = resolveEvents.some((e) => { + const toolResults = (e as { toolResults?: unknown[] }).toolResults ?? [] + return toolResults.length >= 2 + }) + expect(hasMultipleResults).toBe(true) + }) + + it("should complete run successfully", () => { + expect(assertEventSequenceContains(result.events, ["completeRun"]).passed).toBe(true) + expect(result.exitCode).toBe(0) + }) +}) diff --git a/e2e/publish.test.ts b/e2e/publish.test.ts new file mode 100644 index 00000000..8f2cf4fa --- /dev/null +++ b/e2e/publish.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI publish", () => { + it("should output JSON payload for valid expert with --dry-run", async () => { + const result = await runCli(["publish", "tic-tac-toe", "--dry-run"]) + expect(result.exitCode).toBe(0) + expect(result.stdout).toBeTruthy() + }) + + it("should fail for nonexistent expert", async () => { + const result = await runCli(["publish", "nonexistent", "--dry-run"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail with nonexistent config file", async () => { + const result = await runCli([ + "publish", + "tic-tac-toe", + "--dry-run", + "--config", + "nonexistent.toml", + ]) + expect(result.exitCode).toBe(1) + }) + + it("should fail when no config in directory", async () => { + const result = await runCli(["publish", "tic-tac-toe", "--dry-run"], { cwd: "/tmp" }) + expect(result.exitCode).toBe(1) + }) +}) + diff --git a/e2e/run.test.ts b/e2e/run.test.ts new file mode 100644 index 00000000..13cb09f8 --- /dev/null +++ b/e2e/run.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI run", () => { + it("should fail without arguments", async () => { + const result = await runCli(["run"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail with only expert key", async () => { + const result = await runCli(["run", "expertOnly"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail for nonexistent expert", async () => { + const result = await runCli(["run", "nonexistent-expert", "test query"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail with nonexistent config file", async () => { + const result = await runCli(["run", "expert", "query", "--config", "nonexistent.toml"]) + expect(result.exitCode).toBe(1) + }) +}) + diff --git a/e2e/special-tools.test.ts b/e2e/special-tools.test.ts new file mode 100644 index 00000000..e227cb68 --- /dev/null +++ b/e2e/special-tools.test.ts @@ -0,0 +1,68 @@ +import { beforeAll, describe, expect, it } from "vitest" +import { assertEventSequenceContains, assertToolCallCount } from "./lib/assertions.js" +import { filterEventsByType } from "./lib/event-parser.js" +import { type RunResult, runExpert } from "./lib/runner.js" + +describe("Special Tools Parallel Execution", () => { + let result: RunResult + + beforeAll(async () => { + result = await runExpert( + "e2e-special-tools", + "Test all special tools: think, read the PDF, read the GIF image, and search", + { + configPath: "./e2e/experts/special-tools.toml", + timeout: 180000, + }, + ) + }, 200000) + + it("should execute all 4 tools in parallel", () => { + expect(assertToolCallCount(result.events, "callTools", 4).passed).toBe(true) + expect( + assertEventSequenceContains(result.events, ["startRun", "callTools", "resolveToolResults"]) + .passed, + ).toBe(true) + }) + + it("should resolve all tool results together", () => { + const resolveEvents = filterEventsByType(result.events, "resolveToolResults") + const hasAllResults = resolveEvents.some((e) => { + const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? [] + return toolResults.length >= 4 + }) + expect(hasAllResults).toBe(true) + }) + + it("should include think tool in resolved results", () => { + const resolveEvents = filterEventsByType(result.events, "resolveToolResults") + const hasThinkResult = resolveEvents.some((e) => { + const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? [] + return toolResults.some((tr) => tr.toolName === "think") + }) + expect(hasThinkResult).toBe(true) + }) + + it("should include readPdfFile in resolved results", () => { + const resolveEvents = filterEventsByType(result.events, "resolveToolResults") + const hasPdfResult = resolveEvents.some((e) => { + const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? [] + return toolResults.some((tr) => tr.toolName === "readPdfFile") + }) + expect(hasPdfResult).toBe(true) + }) + + it("should include readImageFile in resolved results", () => { + const resolveEvents = filterEventsByType(result.events, "resolveToolResults") + const hasImageResult = resolveEvents.some((e) => { + const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? [] + return toolResults.some((tr) => tr.toolName === "readImageFile") + }) + expect(hasImageResult).toBe(true) + }) + + it("should complete run successfully", () => { + expect(assertEventSequenceContains(result.events, ["completeRun"]).passed).toBe(true) + expect(result.exitCode).toBe(0) + }) +}) diff --git a/e2e/status.test.ts b/e2e/status.test.ts new file mode 100644 index 00000000..3f67f758 --- /dev/null +++ b/e2e/status.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI status", () => { + it("should fail without version", async () => { + const result = await runCli(["status", "no-version", "available"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail without status value", async () => { + const result = await runCli(["status", "expert@1.0.0"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail with invalid status value", async () => { + const result = await runCli(["status", "expert@1.0.0", "invalid-status"]) + expect(result.exitCode).toBe(1) + }) +}) + diff --git a/e2e/tag.test.ts b/e2e/tag.test.ts new file mode 100644 index 00000000..d5079eb7 --- /dev/null +++ b/e2e/tag.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI tag", () => { + it("should fail without version", async () => { + const result = await runCli(["tag", "no-version", "tag1"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail without tags", async () => { + const result = await runCli(["tag", "expert@1.0.0"]) + expect(result.exitCode).toBe(1) + }) +}) + diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json new file mode 100644 index 00000000..4e6d45a5 --- /dev/null +++ b/e2e/tsconfig.json @@ -0,0 +1,13 @@ +{ + "extends": "@tsconfig/node22/tsconfig.json", + "compilerOptions": { + "resolveJsonModule": true, + "paths": { + "@perstack/core": ["../packages/core/src/index.ts"], + "@perstack/runtime": ["../packages/runtime/src/index.ts"] + } + }, + "include": ["**/*.ts"], + "exclude": ["node_modules"] +} + diff --git a/e2e/unpublish.test.ts b/e2e/unpublish.test.ts new file mode 100644 index 00000000..6df3e85e --- /dev/null +++ b/e2e/unpublish.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest" +import { runCli } from "./lib/runner.js" + +describe("CLI unpublish", () => { + it("should fail without version", async () => { + const result = await runCli(["unpublish", "no-version", "--force"]) + expect(result.exitCode).toBe(1) + }) + + it("should fail without --force when version provided", async () => { + const result = await runCli(["unpublish", "expert@1.0.0"]) + expect(result.exitCode).toBe(1) + expect(result.stderr).toContain("--force") + }) +}) + diff --git a/knip.json b/knip.json index a25dca4c..c9bbe578 100644 --- a/knip.json +++ b/knip.json @@ -3,7 +3,7 @@ "ignoreDependencies": ["@tsconfig/node22", "vitest", "ts-dedent"], "ignoreExportsUsedInFile": true, "ignoreBinaries": ["perstack"], - "ignore": ["dist/**/*", "**/*.test.ts", "**/*.test.tsx", "docs/content/**/*", "scripts/**/*", "examples/**/*"], + "ignore": ["dist/**/*", "**/*.test.ts", "**/*.test.tsx", "docs/content/**/*", "scripts/**/*", "examples/**/*", "e2e/**/*"], "workspaces": { "packages/perstack": { "entry": ["bin/cli.ts", "src/**/*.ts"] diff --git a/package.json b/package.json index d9c6c108..d3df9f32 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "release": "pnpm run clean && pnpm run build && changeset publish", "test": "vitest run --project unit --coverage --coverage.reporter=lcov --coverage.reporter=text", "test:watch": "vitest watch --project unit", + "test:e2e": "vitest run --project e2e", "format-and-lint": "biome check .", "format-and-lint:fix": "biome check . --write", "typecheck": "turbo run typecheck --continue", diff --git a/packages/api-client/test/test-data.ts b/packages/api-client/test/test-data.ts index 14b21b4e..5b728f26 100644 --- a/packages/api-client/test/test-data.ts +++ b/packages/api-client/test/test-data.ts @@ -208,8 +208,8 @@ export const runtimeStep: z.input = { stepNumber: 1, inputMessages: [], newMessages: [], - toolCall: undefined, - toolResult: undefined, + toolCalls: undefined, + toolResults: undefined, usage: { inputTokens: 100, outputTokens: 100, diff --git a/packages/core/src/schemas/checkpoint.ts b/packages/core/src/schemas/checkpoint.ts index 68535499..9d5ae44c 100644 --- a/packages/core/src/schemas/checkpoint.ts +++ b/packages/core/src/schemas/checkpoint.ts @@ -1,6 +1,10 @@ import { z } from "zod" import type { Message } from "./message.js" import { messageSchema } from "./message.js" +import type { ToolCall } from "./tool-call.js" +import { toolCallSchema } from "./tool-call.js" +import type { ToolResult } from "./tool-result.js" +import { toolResultSchema } from "./tool-result.js" import type { Usage } from "./usage.js" import { usageSchema } from "./usage.js" @@ -84,6 +88,10 @@ export interface Checkpoint { contextWindow?: number /** Context window usage ratio (0-1) */ contextWindowUsage?: number + /** Tool calls waiting to be processed (for resume after delegate/interactive) */ + pendingToolCalls?: ToolCall[] + /** Partial tool results collected before stopping (for resume) */ + partialToolResults?: ToolResult[] } export const checkpointSchema = z.object({ @@ -124,5 +132,7 @@ export const checkpointSchema = z.object({ usage: usageSchema, contextWindow: z.number().optional(), contextWindowUsage: z.number().optional(), + pendingToolCalls: z.array(toolCallSchema).optional(), + partialToolResults: z.array(toolResultSchema).optional(), }) checkpointSchema satisfies z.ZodType diff --git a/packages/core/src/schemas/message-part.ts b/packages/core/src/schemas/message-part.ts index 2416e6ed..944cdc80 100644 --- a/packages/core/src/schemas/message-part.ts +++ b/packages/core/src/schemas/message-part.ts @@ -147,7 +147,7 @@ export interface ToolResultPart extends BasePart { /** Name of the tool that was called */ toolName: string /** Content of the tool result */ - contents: (TextPart | ImageInlinePart)[] + contents: (TextPart | ImageInlinePart | FileInlinePart)[] /** Whether the tool call resulted in an error */ isError?: boolean } @@ -156,7 +156,7 @@ export const toolResultPartSchema = basePartSchema.extend({ type: z.literal("toolResultPart"), toolCallId: z.string(), toolName: z.string(), - contents: z.array(z.union([textPartSchema, imageInlinePartSchema])), + contents: z.array(z.union([textPartSchema, imageInlinePartSchema, fileInlinePartSchema])), isError: z.boolean().optional(), }) toolResultPartSchema satisfies z.ZodType diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts index dfb9fc18..1740a185 100644 --- a/packages/core/src/schemas/runtime.ts +++ b/packages/core/src/schemas/runtime.ts @@ -222,13 +222,13 @@ type ExpertEventPayloads = { retry: { reason: string newMessages: (UserMessage | ExpertMessage | ToolMessage)[] - toolCall?: ToolCall - toolResult?: ToolResult + toolCalls?: ToolCall[] + toolResults?: ToolResult[] usage: Usage } - callTool: { + callTools: { newMessage: ExpertMessage - toolCall: ToolCall + toolCalls: ToolCall[] usage: Usage } callInteractiveTool: { @@ -241,8 +241,8 @@ type ExpertEventPayloads = { toolCall: ToolCall usage: Usage } - resolveToolResult: { - toolResult: ToolResult + resolveToolResults: { + toolResults: ToolResult[] } resolveThought: { toolResult: ToolResult @@ -259,6 +259,13 @@ type ExpertEventPayloads = { finishToolCall: { newMessages: (UserMessage | ToolMessage)[] } + resumeToolCalls: { + pendingToolCalls: ToolCall[] + partialToolResults: ToolResult[] + } + finishAllToolCalls: { + newMessages: (UserMessage | ToolMessage)[] + } continueToNextStep: { checkpoint: Checkpoint step: Step @@ -331,15 +338,17 @@ export function createEvent(type: T) { export const startRun = createEvent("startRun") export const startGeneration = createEvent("startGeneration") export const retry = createEvent("retry") -export const callTool = createEvent("callTool") +export const callTools = createEvent("callTools") export const callInteractiveTool = createEvent("callInteractiveTool") export const callDelegate = createEvent("callDelegate") -export const resolveToolResult = createEvent("resolveToolResult") +export const resolveToolResults = createEvent("resolveToolResults") export const resolveThought = createEvent("resolveThought") export const resolvePdfFile = createEvent("resolvePdfFile") export const resolveImageFile = createEvent("resolveImageFile") export const attemptCompletion = createEvent("attemptCompletion") export const finishToolCall = createEvent("finishToolCall") +export const resumeToolCalls = createEvent("resumeToolCalls") +export const finishAllToolCalls = createEvent("finishAllToolCalls") export const completeRun = createEvent("completeRun") export const stopRunByInteractiveTool = createEvent("stopRunByInteractiveTool") export const stopRunByDelegate = createEvent("stopRunByDelegate") diff --git a/packages/core/src/schemas/step.ts b/packages/core/src/schemas/step.ts index 60aecab7..23b7a735 100644 --- a/packages/core/src/schemas/step.ts +++ b/packages/core/src/schemas/step.ts @@ -15,7 +15,7 @@ import { usageSchema } from "./usage.js" /** * A single execution step in an Expert run. - * Each step represents one LLM generation cycle, optionally followed by a tool call. + * Each step represents one LLM generation cycle, optionally followed by tool calls. */ export interface Step { /** Sequential step number (1-indexed) */ @@ -24,10 +24,14 @@ export interface Step { inputMessages?: (InstructionMessage | UserMessage | ToolMessage)[] /** Messages generated during this step */ newMessages: Message[] - /** Tool call made during this step, if any */ - toolCall?: ToolCall - /** Result of the tool call, if any */ - toolResult?: ToolResult + /** Tool calls made during this step, if any */ + toolCalls?: ToolCall[] + /** Results of the tool calls, if any */ + toolResults?: ToolResult[] + /** Tool calls waiting to be processed (sorted: MCP → Delegate → Interactive) */ + pendingToolCalls?: ToolCall[] + /** Partial tool results collected so far (used during mixed tool call processing) */ + partialToolResults?: ToolResult[] /** Token usage for this step */ usage: Usage /** Unix timestamp (ms) when step started */ @@ -42,8 +46,10 @@ export const stepSchema = z.object({ .array(z.union([instructionMessageSchema, userMessageSchema, toolMessageSchema])) .optional(), newMessages: z.array(messageSchema), - toolCall: toolCallSchema.optional(), - toolResult: toolResultSchema.optional(), + toolCalls: z.array(toolCallSchema).optional(), + toolResults: z.array(toolResultSchema).optional(), + pendingToolCalls: z.array(toolCallSchema).optional(), + partialToolResults: z.array(toolResultSchema).optional(), usage: usageSchema, startedAt: z.number(), finishedAt: z.number().optional(), diff --git a/packages/perstack/src/lib/tui.tsx b/packages/perstack/src/lib/tui.tsx index caa8dd7d..cd50ed99 100644 --- a/packages/perstack/src/lib/tui.tsx +++ b/packages/perstack/src/lib/tui.tsx @@ -23,35 +23,37 @@ export function defaultEventListener(e: RunEvent): void { debug(e.reason) break } - case "callTool": { - log(`${header(e)} Calling tool`) - if (e.toolCall.skillName === "@perstack/base") { - switch (e.toolCall.toolName) { + case "callTools": { + log(`${header(e)} Calling ${e.toolCalls.length} tool(s)`) + for (const toolCall of e.toolCalls) { + if (toolCall.skillName === "@perstack/base") { + switch (toolCall.toolName) { case "think": { - const thought = e.toolCall.args.thought + const thought = toolCall.args.thought log(`${header(e)} Thought Updated:`) debug(thought) break } case "readPdfFile": { - const path = e.toolCall.args.path + const path = toolCall.args.path log(`${header(e)} Reading PDF: ${path}`) break } case "readImageFile": { - const path = e.toolCall.args.path + const path = toolCall.args.path log(`${header(e)} Reading Image: ${path}`) break } default: { - log(`${header(e)} Tool: ${e.toolCall.skillName}/${e.toolCall.toolName}`) - debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`) + log(`${header(e)} Tool: ${toolCall.skillName}/${toolCall.toolName}`) + debug(`${header(e)} Args: ${JSON.stringify(toolCall.args, null, 2)}`) break } } } else { - log(`${header(e)} Tool: ${e.toolCall.skillName}/${e.toolCall.toolName}`) - debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`) + log(`${header(e)} Tool: ${toolCall.skillName}/${toolCall.toolName}`) + debug(`${header(e)} Args: ${JSON.stringify(toolCall.args, null, 2)}`) + } } break } @@ -67,12 +69,13 @@ export function defaultEventListener(e: RunEvent): void { debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`) break } - case "resolveToolResult": { - log(`${header(e)} Resolved Tool Result`) - if (e.toolResult.skillName === "@perstack/base") { - switch (e.toolResult.toolName) { + case "resolveToolResults": { + log(`${header(e)} Resolved ${e.toolResults.length} Tool Result(s)`) + for (const toolResult of e.toolResults) { + if (toolResult.skillName === "@perstack/base") { + switch (toolResult.toolName) { case "todo": { - const text = e.toolResult.result.find((r) => r.type === "textPart")?.text + const text = toolResult.result.find((r) => r.type === "textPart")?.text const { todos } = JSON.parse(text ?? "{}") as { todos: { id: number @@ -87,14 +90,15 @@ export function defaultEventListener(e: RunEvent): void { break } default: { - log(`${header(e)} Tool: ${e.toolResult.skillName}/${e.toolResult.toolName}`) - debug(`${header(e)} Result: ${JSON.stringify(e.toolResult.result, null, 2)}`) + log(`${header(e)} Tool: ${toolResult.skillName}/${toolResult.toolName}`) + debug(`${header(e)} Result: ${JSON.stringify(toolResult.result, null, 2)}`) break } } } else { - log(`${header(e)} Tool: ${e.toolResult.skillName}/${e.toolResult.toolName}`) - debug(`${header(e)} Result: ${JSON.stringify(e.toolResult.result, null, 2)}`) + log(`${header(e)} Tool: ${toolResult.skillName}/${toolResult.toolName}`) + debug(`${header(e)} Result: ${JSON.stringify(toolResult.result, null, 2)}`) + } } break } diff --git a/packages/runtime/README.md b/packages/runtime/README.md index 70e94a54..7c933253 100644 --- a/packages/runtime/README.md +++ b/packages/runtime/README.md @@ -40,7 +40,7 @@ The `eventListener` callback receives a `RunEvent` object, which provides granul ```typescript type RunEvent = { - type: EventType // e.g., "startRun", "callTool" + type: EventType // e.g., "startRun", "callTools" id: string // Unique event ID timestamp: number // Unix timestamp runId: string // ID of the current run @@ -53,9 +53,9 @@ You can narrow down the event type to access specific properties: ```typescript eventListener: (event) => { - if (event.type === "callTool") { - // event is now narrowed to the callTool event type - console.log(`Executing tool: ${event.toolCall.name}`) + if (event.type === "callTools") { + // event is now narrowed to the callTools event type + console.log(`Executing ${event.toolCalls.length} tools`) } } ``` @@ -185,19 +185,21 @@ stateDiagram-v2 [*] --> Init Init --> PreparingForStep: startRun PreparingForStep --> GeneratingToolCall: startGeneration + PreparingForStep --> CallingTools: resumeToolCalls + PreparingForStep --> FinishingStep: finishAllToolCalls - GeneratingToolCall --> CallingTool: callTool - GeneratingToolCall --> CallingInteractiveTool: callInteractiveTool - GeneratingToolCall --> CallingDelegate: callDelegate + GeneratingToolCall --> CallingTools: callTools GeneratingToolCall --> FinishingStep: retry - CallingTool --> ResolvingToolResult: resolveToolResult - CallingTool --> ResolvingThought: resolveThought - CallingTool --> ResolvingPdfFile: resolvePdfFile - CallingTool --> ResolvingImageFile: resolveImageFile - CallingTool --> GeneratingRunResult: attemptCompletion + CallingTools --> ResolvingToolResults: resolveToolResults + CallingTools --> ResolvingThought: resolveThought + CallingTools --> ResolvingPdfFile: resolvePdfFile + CallingTools --> ResolvingImageFile: resolveImageFile + CallingTools --> GeneratingRunResult: attemptCompletion + CallingTools --> CallingDelegate: callDelegate + CallingTools --> CallingInteractiveTool: callInteractiveTool - ResolvingToolResult --> FinishingStep: finishToolCall + ResolvingToolResults --> FinishingStep: finishToolCall ResolvingThought --> FinishingStep: finishToolCall ResolvingPdfFile --> FinishingStep: finishToolCall ResolvingImageFile --> FinishingStep: finishToolCall @@ -216,8 +218,9 @@ stateDiagram-v2 Events trigger state transitions. They are emitted by the runtime logic or external inputs. - **Lifecycle**: `startRun`, `startGeneration`, `continueToNextStep`, `completeRun` -- **Tool Execution**: `callTool`, `resolveToolResult`, `finishToolCall` +- **Tool Execution**: `callTools`, `resolveToolResults`, `finishToolCall`, `resumeToolCalls`, `finishAllToolCalls` - **Special Types**: `resolveThought`, `resolvePdfFile`, `resolveImageFile` +- **Mixed Tool Calls**: `callDelegate`, `callInteractiveTool` (from CallingTools state) - **Interruption**: `stopRunByInteractiveTool`, `stopRunByDelegate`, `stopRunByExceededMaxSteps` - **Error Handling**: `retry` diff --git a/packages/runtime/src/checkpoint-helpers.ts b/packages/runtime/src/checkpoint-helpers.ts index 189a3af2..923acb9c 100644 --- a/packages/runtime/src/checkpoint-helpers.ts +++ b/packages/runtime/src/checkpoint-helpers.ts @@ -76,6 +76,8 @@ export function buildDelegationReturnState( ...parentCheckpoint, stepNumber: resultCheckpoint.stepNumber, usage: resultCheckpoint.usage, + pendingToolCalls: parentCheckpoint.pendingToolCalls, + partialToolResults: parentCheckpoint.partialToolResults, }, } } @@ -118,6 +120,8 @@ export function buildDelegateToState( checkpointId: resultCheckpoint.id, }, usage: resultCheckpoint.usage, + pendingToolCalls: undefined, + partialToolResults: undefined, }, } } diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts index 907cc749..a51c0294 100644 --- a/packages/runtime/src/messages/message.ts +++ b/packages/runtime/src/messages/message.ts @@ -66,7 +66,9 @@ export function createExpertMessage( export function createToolMessage( contents: Array< Omit & { - contents: Array | Omit> + contents: Array< + Omit | Omit | Omit + > } >, ): ToolMessage { @@ -244,11 +246,12 @@ function toolResultPartToCoreToolResultPart(part: ToolResultPart): ToolResultMod output: { type: "text" as const, value: contents[0].text }, } } - const contentValue = contents.map((content) => - content.type === "textPart" - ? { type: "text" as const, text: content.text } - : { type: "media" as const, data: content.encodedData, mediaType: content.mimeType }, - ) + const contentValue = contents.map((content) => { + if (content.type === "textPart") { + return { type: "text" as const, text: content.text } + } + return { type: "media" as const, data: content.encodedData, mediaType: content.mimeType } + }) return { type: "tool-result", toolCallId: part.toolCallId, diff --git a/packages/runtime/src/runtime-state-machine.ts b/packages/runtime/src/runtime-state-machine.ts index 985d7df4..0fbb3f95 100644 --- a/packages/runtime/src/runtime-state-machine.ts +++ b/packages/runtime/src/runtime-state-machine.ts @@ -61,6 +61,8 @@ export const runtimeStateMachine = setup({ ...context.checkpoint, status: "proceeding", messages: [...context.checkpoint.messages, ...event.inputMessages], + pendingToolCalls: event.initialCheckpoint.pendingToolCalls, + partialToolResults: event.initialCheckpoint.partialToolResults, }) satisfies Checkpoint, step: ({ context, event }) => ({ @@ -87,6 +89,41 @@ export const runtimeStateMachine = setup({ }) satisfies Step, }), }, + resumeToolCalls: { + target: "CallingTool", + actions: assign({ + step: ({ context, event }) => + ({ + stepNumber: context.checkpoint.stepNumber, + inputMessages: context.step.inputMessages ?? [], + newMessages: context.step.newMessages, + toolCalls: context.step.toolCalls, + toolResults: event.partialToolResults, + pendingToolCalls: event.pendingToolCalls, + usage: context.step.usage, + startedAt: context.step.startedAt, + }) satisfies Step, + }), + }, + finishAllToolCalls: { + target: "FinishingStep", + actions: assign({ + checkpoint: ({ context, event }) => + ({ + ...context.checkpoint, + messages: [...context.checkpoint.messages, ...event.newMessages], + pendingToolCalls: undefined, + partialToolResults: undefined, + }) satisfies Checkpoint, + step: ({ context, event }) => + ({ + ...context.step, + newMessages: [...context.step.newMessages, ...event.newMessages], + toolResults: context.checkpoint.partialToolResults, + pendingToolCalls: undefined, + }) satisfies Step, + }), + }, }, }, @@ -105,13 +142,13 @@ export const runtimeStateMachine = setup({ ({ ...context.step, newMessages: event.newMessages, - toolCall: event.toolCall, - toolResult: event.toolResult, + toolCalls: event.toolCalls, + toolResults: event.toolResults, usage: sumUsage(context.step.usage, event.usage), }) satisfies Step, }), }, - callTool: { + callTools: { target: "CallingTool", actions: assign({ checkpoint: ({ context, event }) => @@ -127,7 +164,7 @@ export const runtimeStateMachine = setup({ ({ ...context.step, newMessages: [event.newMessage], - toolCall: event.toolCall, + toolCalls: event.toolCalls, usage: sumUsage(context.step.usage, event.usage), }) satisfies Step, }), @@ -148,7 +185,7 @@ export const runtimeStateMachine = setup({ ({ ...context.step, newMessages: [event.newMessage], - toolCall: event.toolCall, + toolCalls: [event.toolCall], usage: sumUsage(context.step.usage, event.usage), }) satisfies Step, }), @@ -169,7 +206,7 @@ export const runtimeStateMachine = setup({ ({ ...context.step, newMessages: [event.newMessage], - toolCall: event.toolCall, + toolCalls: [event.toolCall], usage: sumUsage(context.step.usage, event.usage), }) satisfies Step, }), @@ -179,13 +216,14 @@ export const runtimeStateMachine = setup({ CallingTool: { on: { - resolveToolResult: { + resolveToolResults: { target: "ResolvingToolResult", actions: assign({ step: ({ context, event }) => ({ ...context.step, - toolResult: event.toolResult, + toolResults: event.toolResults, + pendingToolCalls: undefined, }) satisfies Step, }), }, @@ -195,7 +233,7 @@ export const runtimeStateMachine = setup({ step: ({ context, event }) => ({ ...context.step, - toolResult: event.toolResult, + toolResults: [event.toolResult], }) satisfies Step, }), }, @@ -205,7 +243,7 @@ export const runtimeStateMachine = setup({ step: ({ context, event }) => ({ ...context.step, - toolResult: event.toolResult, + toolResults: [event.toolResult], }) satisfies Step, }), }, @@ -215,7 +253,7 @@ export const runtimeStateMachine = setup({ step: ({ context, event }) => ({ ...context.step, - toolResult: event.toolResult, + toolResults: [event.toolResult], }) satisfies Step, }), }, @@ -225,7 +263,33 @@ export const runtimeStateMachine = setup({ step: ({ context, event }) => ({ ...context.step, - toolResult: event.toolResult, + toolResults: [event.toolResult], + }) satisfies Step, + }), + }, + callDelegate: { + target: "CallingDelegate", + actions: assign({ + step: ({ context }) => + ({ + ...context.step, + toolCalls: context.step.toolCalls, + toolResults: context.step.toolResults, + pendingToolCalls: context.step.pendingToolCalls, + partialToolResults: context.step.partialToolResults, + }) satisfies Step, + }), + }, + callInteractiveTool: { + target: "CallingInteractiveTool", + actions: assign({ + step: ({ context }) => + ({ + ...context.step, + toolCalls: context.step.toolCalls, + toolResults: context.step.toolResults, + pendingToolCalls: context.step.pendingToolCalls, + partialToolResults: context.step.partialToolResults, }) satisfies Step, }), }, @@ -327,8 +391,8 @@ export const runtimeStateMachine = setup({ ({ ...context.step, newMessages: event.newMessages, - toolCall: event.toolCall, - toolResult: event.toolResult, + toolCalls: event.toolCalls, + toolResults: event.toolResults, usage: sumUsage(context.step.usage, event.usage), }) satisfies Step, }), diff --git a/packages/runtime/src/states/calling-delegate.test.ts b/packages/runtime/src/states/calling-delegate.test.ts index 1a789c6b..6df15e15 100644 --- a/packages/runtime/src/states/calling-delegate.test.ts +++ b/packages/runtime/src/states/calling-delegate.test.ts @@ -8,12 +8,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + pendingToolCalls: [ + { id: "tc_123", skillName: "@perstack/math-expert", toolName: "@perstack/math-expert", args: { query: "Calculate 2 + 2" }, }, + ], }) const skillManagers = { "@perstack/math-expert": { @@ -62,6 +64,15 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => { toolName: "@perstack/math-expert", query: "Calculate 2 + 2", }, + pendingToolCalls: [ + { + id: "tc_123", + skillName: "@perstack/math-expert", + toolName: "@perstack/math-expert", + args: { query: "Calculate 2 + 2" }, + }, + ], + partialToolResults: undefined, }, step: { ...step, @@ -74,7 +85,7 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: undefined, + pendingToolCalls: undefined, }) await expect( StateMachineLogics.CallingDelegate({ @@ -84,19 +95,21 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => { eventListener: async () => {}, skillManagers: {}, }), - ).rejects.toThrow("No tool call found") + ).rejects.toThrow("No pending tool calls found") }) it("throws error when skill manager missing", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + pendingToolCalls: [ + { id: "tc_123", skillName: "@perstack/math-expert", toolName: "@perstack/math-expert", args: { query: "Calculate 2 + 2" }, }, + ], }) const skillManagers = { "@perstack/math-expert": { @@ -128,12 +141,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + pendingToolCalls: [ + { id: "tc_123", skillName: "@perstack/math-expert", toolName: "@perstack/math-expert", args: { query: undefined }, }, + ], }) const skillManagers = { "@perstack/math-expert": { diff --git a/packages/runtime/src/states/calling-delegate.ts b/packages/runtime/src/states/calling-delegate.ts index 071da516..57a16063 100644 --- a/packages/runtime/src/states/calling-delegate.ts +++ b/packages/runtime/src/states/calling-delegate.ts @@ -8,10 +8,14 @@ export async function callingDelegateLogic({ step, skillManagers, }: RunSnapshot["context"]): Promise { - if (!step.toolCall) { - throw new Error("No tool call found") + if (!step.pendingToolCalls || step.pendingToolCalls.length === 0) { + throw new Error("No pending tool calls found") } - const { id, toolName, args } = step.toolCall + const toolCall = step.pendingToolCalls[0] + if (!toolCall) { + throw new Error("No pending tool call found") + } + const { id, toolName, args } = toolCall const skillManager = await getSkillManagerByToolName(skillManagers, toolName) if (!skillManager.expert) { throw new Error(`Delegation error: skill manager "${toolName}" not found`) @@ -19,6 +23,8 @@ export async function callingDelegateLogic({ if (!args || !args.query || typeof args.query !== "string") { throw new Error("Delegation error: query is undefined") } + const currentToolCall = step.pendingToolCalls[0] + const remainingToolCalls = step.pendingToolCalls.slice(1) return stopRunByDelegate(setting, checkpoint, { checkpoint: { ...checkpoint, @@ -33,6 +39,8 @@ export async function callingDelegateLogic({ toolName, query: args.query, }, + pendingToolCalls: [currentToolCall, ...remainingToolCalls], + partialToolResults: step.partialToolResults, }, step: { ...step, diff --git a/packages/runtime/src/states/calling-interactive-tool.test.ts b/packages/runtime/src/states/calling-interactive-tool.test.ts index 729663ab..e5bbcce4 100644 --- a/packages/runtime/src/states/calling-interactive-tool.test.ts +++ b/packages/runtime/src/states/calling-interactive-tool.test.ts @@ -7,12 +7,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () => const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + pendingToolCalls: [ + { id: "tc_interactive_123", skillName: "interactive", toolName: "humanApproval", args: { message: "Please approve this action" }, }, + ], }) await expect( StateMachineLogics.CallingInteractiveTool({ @@ -32,6 +34,15 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () => checkpoint: { ...checkpoint, status: "stoppedByInteractiveTool", + pendingToolCalls: [ + { + id: "tc_interactive_123", + skillName: "interactive", + toolName: "humanApproval", + args: { message: "Please approve this action" }, + }, + ], + partialToolResults: undefined, }, step: { ...step, @@ -39,4 +50,83 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () => }, }) }) + + it("throws error when pendingToolCalls is empty", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ pendingToolCalls: [] }) + await expect( + StateMachineLogics.CallingInteractiveTool({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No pending tool calls found") + }) + + it("throws error when pendingToolCalls is undefined", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ pendingToolCalls: undefined }) + await expect( + StateMachineLogics.CallingInteractiveTool({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No pending tool calls found") + }) + + it("preserves remaining tool calls in pendingToolCalls", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + pendingToolCalls: [ + { id: "tc_1", skillName: "interactive", toolName: "tool1", args: {} }, + { id: "tc_2", skillName: "interactive", toolName: "tool2", args: {} }, + ], + }) + const result = await StateMachineLogics.CallingInteractiveTool({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("stopRunByInteractiveTool") + if (result.type === "stopRunByInteractiveTool") { + expect(result.checkpoint.pendingToolCalls).toHaveLength(2) + expect(result.checkpoint.pendingToolCalls?.[0]?.id).toBe("tc_1") + expect(result.checkpoint.pendingToolCalls?.[1]?.id).toBe("tc_2") + } + }) + + it("preserves partialToolResults in checkpoint", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const partialToolResults = [ + { id: "tc_0", skillName: "mcp", toolName: "prevTool", result: [] }, + ] + const step = createStep({ + pendingToolCalls: [ + { id: "tc_1", skillName: "interactive", toolName: "tool1", args: {} }, + ], + partialToolResults, + }) + const result = await StateMachineLogics.CallingInteractiveTool({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("stopRunByInteractiveTool") + if (result.type === "stopRunByInteractiveTool") { + expect(result.checkpoint.partialToolResults).toEqual(partialToolResults) + } + }) }) diff --git a/packages/runtime/src/states/calling-interactive-tool.ts b/packages/runtime/src/states/calling-interactive-tool.ts index 48a855fd..4695b868 100644 --- a/packages/runtime/src/states/calling-interactive-tool.ts +++ b/packages/runtime/src/states/calling-interactive-tool.ts @@ -6,10 +6,17 @@ export async function callingInteractiveToolLogic({ checkpoint, step, }: RunSnapshot["context"]): Promise { + if (!step.pendingToolCalls || step.pendingToolCalls.length === 0) { + throw new Error("No pending tool calls found") + } + const currentToolCall = step.pendingToolCalls[0] + const remainingToolCalls = step.pendingToolCalls.slice(1) return stopRunByInteractiveTool(setting, checkpoint, { checkpoint: { ...checkpoint, status: "stoppedByInteractiveTool", + pendingToolCalls: [currentToolCall, ...remainingToolCalls], + partialToolResults: step.partialToolResults, }, step: { ...step, diff --git a/packages/runtime/src/states/calling-tool.test.ts b/packages/runtime/src/states/calling-tool.test.ts index cc2291f2..538fe132 100644 --- a/packages/runtime/src/states/calling-tool.test.ts +++ b/packages/runtime/src/states/calling-tool.test.ts @@ -1,28 +1,43 @@ import { createId } from "@paralleldrive/cuid2" -import { describe, expect, it } from "vitest" +import { describe, expect, it, vi } from "vitest" import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js" import type { BaseSkillManager } from "../skill-manager/index.js" import { callingToolLogic } from "./calling-tool.js" +type CallToolResult = Array<{ type: string; text?: string; id: string }> +type CallToolFn = (toolName: string, args: unknown) => Promise + function createMockMcpSkillManager( name: string, - toolName: string, - callToolResult: Array<{ type: string; text?: string; id: string }> = [ - { type: "textPart", text: "Tool executed successfully", id: createId() }, - ], + toolNames: string | string[], + callToolFnOrResult?: CallToolFn | CallToolResult, ): BaseSkillManager { + const tools = Array.isArray(toolNames) ? toolNames : [toolNames] + const defaultCallTool = async () => [ + { type: "textPart", text: "Tool executed successfully", id: createId() }, + ] + const callTool: CallToolFn = + callToolFnOrResult === undefined + ? defaultCallTool + : typeof callToolFnOrResult === "function" + ? callToolFnOrResult + : async () => callToolFnOrResult return { name, type: "mcp" as const, lazyInit: false, - _toolDefinitions: [{ name: toolName, skillName: name, inputSchema: {}, interactive: false }], + _toolDefinitions: tools.map((t) => ({ + name: t, + skillName: name, + inputSchema: {}, + interactive: false, + })), _initialized: true, init: async () => {}, isInitialized: () => true, - getToolDefinitions: async () => [ - { name: toolName, skillName: name, inputSchema: {}, interactive: false }, - ], - callTool: async () => callToolResult, + getToolDefinitions: async () => + tools.map((t) => ({ name: t, skillName: name, inputSchema: {}, interactive: false })), + callTool, close: async () => {}, } as unknown as BaseSkillManager } @@ -45,83 +60,245 @@ function createMockDelegateSkillManager(name: string): BaseSkillManager { } as unknown as BaseSkillManager } +function createMockInteractiveSkillManager(name: string, toolName: string): BaseSkillManager { + return { + name, + type: "interactive" as const, + lazyInit: false, + _toolDefinitions: [{ name: toolName, skillName: name, inputSchema: {}, interactive: true }], + _initialized: true, + init: async () => {}, + isInitialized: () => true, + getToolDefinitions: async () => [ + { name: toolName, skillName: name, inputSchema: {}, interactive: true }, + ], + callTool: async () => [], + close: async () => {}, + } as unknown as BaseSkillManager +} + describe("@perstack/runtime: callingToolLogic", () => { - it("executes tool and returns resolveToolResult event", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "test-skill", - toolName: "testTool", - args: { param: "value" }, - }, + describe("parallel tool execution", () => { + it("executes multiple tools in parallel and returns all results", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_1", skillName: "skill-a", toolName: "tool1", args: { x: 1 } }, + { id: "tc_2", skillName: "skill-a", toolName: "tool2", args: { x: 2 } }, + { id: "tc_3", skillName: "skill-b", toolName: "tool3", args: { x: 3 } }, + ], + }) + const callToolA = vi.fn(async (toolName: string) => [ + { type: "textPart", text: `Result from ${toolName}`, id: createId() }, + ]) + const callToolB = vi.fn(async (toolName: string) => [ + { type: "textPart", text: `Result from ${toolName}`, id: createId() }, + ]) + const skillManagers = { + "skill-a": createMockMcpSkillManager("skill-a", ["tool1", "tool2"], callToolA), + "skill-b": createMockMcpSkillManager("skill-b", ["tool3"], callToolB), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("resolveToolResults") + if (event.type === "resolveToolResults") { + expect(event.toolResults).toHaveLength(3) + expect(event.toolResults[0].id).toBe("tc_1") + expect(event.toolResults[1].id).toBe("tc_2") + expect(event.toolResults[2].id).toBe("tc_3") + } + expect(callToolA).toHaveBeenCalledTimes(2) + expect(callToolB).toHaveBeenCalledTimes(1) }) - const skillManagers = { - "test-skill": createMockMcpSkillManager("test-skill", "testTool"), - } - const event = await callingToolLogic({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManagers, + + it("preserves tool call order in results", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_first", skillName: "test-skill", toolName: "slowTool", args: {} }, + { id: "tc_second", skillName: "test-skill", toolName: "fastTool", args: {} }, + ], + }) + const callTool = vi.fn(async (toolName: string) => { + if (toolName === "slowTool") { + await new Promise((r) => setTimeout(r, 50)) + } + return [{ type: "textPart", text: toolName, id: createId() }] + }) + const skillManagers = { + "test-skill": createMockMcpSkillManager("test-skill", ["slowTool", "fastTool"], callTool), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("resolveToolResults") + if (event.type === "resolveToolResults") { + expect(event.toolResults[0].id).toBe("tc_first") + expect(event.toolResults[1].id).toBe("tc_second") + } + }) + + it("executes tools concurrently (not sequentially)", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_1", skillName: "test-skill", toolName: "tool1", args: {} }, + { id: "tc_2", skillName: "test-skill", toolName: "tool2", args: {} }, + { id: "tc_3", skillName: "test-skill", toolName: "tool3", args: {} }, + ], + }) + const DELAY_MS = 30 + const callTool = vi.fn(async () => { + await new Promise((r) => setTimeout(r, DELAY_MS)) + return [{ type: "textPart", text: "done", id: createId() }] + }) + const skillManagers = { + "test-skill": createMockMcpSkillManager( + "test-skill", + ["tool1", "tool2", "tool3"], + callTool, + ), + } + const start = Date.now() + await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + const elapsed = Date.now() - start + expect(elapsed).toBeLessThan(DELAY_MS * 2) }) - expect(event.type).toBe("resolveToolResult") - expect(event.expertKey).toBe(setting.expertKey) - expect(event.runId).toBe(setting.runId) }) - it("throws error when tool call is missing", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ toolCall: undefined }) - await expect( - callingToolLogic({ + describe("single tool execution", () => { + it("executes single tool and returns resolveToolResults event", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "test-skill", toolName: "testTool", args: { param: "value" } }, + ], + }) + const skillManagers = { + "test-skill": createMockMcpSkillManager("test-skill", "testTool"), + } + const event = await callingToolLogic({ setting, checkpoint, step, eventListener: async () => {}, - skillManagers: {}, - }), - ).rejects.toThrow("No tool call found") + skillManagers, + }) + expect(event.type).toBe("resolveToolResults") + expect(event.expertKey).toBe(setting.expertKey) + expect(event.runId).toBe(setting.runId) + }) }) - it("throws error when skill type is not mcp", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "delegate-skill", - toolName: "delegate-skill", - args: { query: "test" }, - }, + describe("error handling", () => { + it("throws error when tool calls are missing", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ toolCalls: undefined }) + await expect( + callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No tool calls found") }) - const skillManagers = { - "delegate-skill": createMockDelegateSkillManager("delegate-skill"), - } - await expect( - callingToolLogic({ + + it("returns callDelegate event for delegate skill", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "delegate-skill", toolName: "delegate-skill", args: {} }, + ], + }) + const skillManagers = { + "delegate-skill": createMockDelegateSkillManager("delegate-skill"), + } + const event = await callingToolLogic({ setting, checkpoint, step, eventListener: async () => {}, skillManagers, - }), - ).rejects.toThrow("Incorrect SkillType, required MCP, got delegate") + }) + expect(event.type).toBe("callDelegate") + }) + + it("returns callInteractiveTool event for interactive skill", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "interactive-skill", toolName: "humanApproval", args: {} }, + ], + }) + const skillManagers = { + "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("callInteractiveTool") + }) + + it("throws error when tool not found in skill managers", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "unknown-skill", toolName: "unknownTool", args: {} }, + ], + }) + await expect( + callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("Tool unknownTool not found") + }) }) - it("routes think tool to resolveThought handler", async () => { + it("routes think tool to resolveToolResults handler", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "think", - args: { thought: "thinking..." }, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "think", + args: { thought: "thinking..." }, + }, + ], }) const skillManagers = { "@perstack/base": createMockMcpSkillManager("@perstack/base", "think"), @@ -133,19 +310,21 @@ describe("@perstack/runtime: callingToolLogic", () => { eventListener: async () => {}, skillManagers, }) - expect(event.type).toBe("resolveThought") + expect(event.type).toBe("resolveToolResults") }) it("routes attemptCompletion to attemptCompletion handler when no remaining todos", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "attemptCompletion", + args: {}, + }, + ], }) const emptyResult = [{ type: "textPart", text: JSON.stringify({}), id: createId() }] const skillManagers = { @@ -165,16 +344,18 @@ describe("@perstack/runtime: callingToolLogic", () => { expect(event.type).toBe("attemptCompletion") }) - it("routes attemptCompletion to resolveToolResult when remaining todos exist", async () => { + it("routes attemptCompletion to resolveToolResults when remaining todos exist", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "attemptCompletion", + args: {}, + }, + ], }) const remainingTodosResult = [ { @@ -197,19 +378,21 @@ describe("@perstack/runtime: callingToolLogic", () => { eventListener: async () => {}, skillManagers, }) - expect(event.type).toBe("resolveToolResult") + expect(event.type).toBe("resolveToolResults") }) - it("routes readPdfFile tool to resolvePdfFile handler", async () => { + it("routes readPdfFile tool to resolveToolResults handler", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "readPdfFile", - args: { path: "/test.pdf" }, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + args: { path: "/test.pdf" }, + }, + ], }) const pdfResult = [{ type: "textPart", text: "PDF content", id: createId() }] const skillManagers = { @@ -222,19 +405,21 @@ describe("@perstack/runtime: callingToolLogic", () => { eventListener: async () => {}, skillManagers, }) - expect(event.type).toBe("resolvePdfFile") + expect(event.type).toBe("resolveToolResults") }) - it("routes readImageFile tool to resolveImageFile handler", async () => { + it("routes readImageFile tool to resolveToolResults handler", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "readImageFile", - args: { path: "/test.png" }, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readImageFile", + args: { path: "/test.png" }, + }, + ], }) const imageResult = [ { type: "imageInlinePart", encodedData: "base64data", mimeType: "image/png", id: createId() }, @@ -249,19 +434,21 @@ describe("@perstack/runtime: callingToolLogic", () => { eventListener: async () => {}, skillManagers, }) - expect(event.type).toBe("resolveImageFile") + expect(event.type).toBe("resolveToolResults") }) - it("routes non-special @perstack/base tools to resolveToolResult", async () => { + it("routes non-special @perstack/base tools to resolveToolResults", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "readTextFile", - args: { path: "/test.txt" }, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readTextFile", + args: { path: "/test.txt" }, + }, + ], }) const skillManagers = { "@perstack/base": createMockMcpSkillManager("@perstack/base", "readTextFile"), @@ -273,19 +460,21 @@ describe("@perstack/runtime: callingToolLogic", () => { eventListener: async () => {}, skillManagers, }) - expect(event.type).toBe("resolveToolResult") + expect(event.type).toBe("resolveToolResults") }) it("throws error when tool not found in skill managers", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "unknown-skill", - toolName: "unknownTool", - args: {}, - }, + toolCalls: [ + { + id: "tc_123", + skillName: "unknown-skill", + toolName: "unknownTool", + args: {}, + }, + ], }) await expect( callingToolLogic({ @@ -297,4 +486,132 @@ describe("@perstack/runtime: callingToolLogic", () => { }), ).rejects.toThrow("Tool unknownTool not found") }) + + it("executes multiple tools in parallel", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { + id: "tc_1", + skillName: "test-skill", + toolName: "testTool1", + args: { param: "value1" }, + }, + { + id: "tc_2", + skillName: "test-skill", + toolName: "testTool2", + args: { param: "value2" }, + }, + ], + }) + const skillManagers = { + "test-skill": { + name: "test-skill", + type: "mcp" as const, + lazyInit: false, + _toolDefinitions: [ + { name: "testTool1", skillName: "test-skill", inputSchema: {}, interactive: false }, + { name: "testTool2", skillName: "test-skill", inputSchema: {}, interactive: false }, + ], + _initialized: true, + init: async () => {}, + isInitialized: () => true, + getToolDefinitions: async () => [ + { name: "testTool1", skillName: "test-skill", inputSchema: {}, interactive: false }, + { name: "testTool2", skillName: "test-skill", inputSchema: {}, interactive: false }, + ], + callTool: async () => [{ type: "textPart", text: "Success", id: createId() }], + close: async () => {}, + } as unknown as BaseSkillManager, + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("resolveToolResults") + if (event.type === "resolveToolResults") { + expect(event.toolResults).toHaveLength(2) + } + }) + + describe("mixed tool types", () => { + it("executes MCP tools first then calls delegate", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_mcp", skillName: "mcp-skill", toolName: "mcpTool", args: {} }, + { id: "tc_delegate", skillName: "delegate-skill", toolName: "delegate-skill", args: {} }, + ], + }) + const skillManagers = { + "mcp-skill": createMockMcpSkillManager("mcp-skill", "mcpTool"), + "delegate-skill": createMockDelegateSkillManager("delegate-skill"), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("callDelegate") + expect(step.partialToolResults).toHaveLength(1) + expect(step.partialToolResults?.[0]?.toolName).toBe("mcpTool") + }) + + it("executes MCP tools first then calls interactive", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_mcp", skillName: "mcp-skill", toolName: "mcpTool", args: {} }, + { id: "tc_interactive", skillName: "interactive-skill", toolName: "humanApproval", args: {} }, + ], + }) + const skillManagers = { + "mcp-skill": createMockMcpSkillManager("mcp-skill", "mcpTool"), + "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("callInteractiveTool") + expect(step.partialToolResults).toHaveLength(1) + expect(step.partialToolResults?.[0]?.toolName).toBe("mcpTool") + }) + + it("delegates before interactive when both exist", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_delegate", skillName: "delegate-skill", toolName: "delegate-skill", args: {} }, + { id: "tc_interactive", skillName: "interactive-skill", toolName: "humanApproval", args: {} }, + ], + }) + const skillManagers = { + "delegate-skill": createMockDelegateSkillManager("delegate-skill"), + "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"), + } + const event = await callingToolLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers, + }) + expect(event.type).toBe("callDelegate") + expect(step.pendingToolCalls).toHaveLength(2) + }) + }) }) diff --git a/packages/runtime/src/states/calling-tool.ts b/packages/runtime/src/states/calling-tool.ts index ff4a0479..a6bd1f03 100644 --- a/packages/runtime/src/states/calling-tool.ts +++ b/packages/runtime/src/states/calling-tool.ts @@ -1,14 +1,18 @@ +import { readFile } from "node:fs/promises" import { attemptCompletion, + callDelegate, + callInteractiveTool, + type MessagePart, type RunEvent, - resolveImageFile, - resolvePdfFile, - resolveThought, - resolveToolResult, + resolveToolResults, + type ToolCall, type ToolResult, } from "@perstack/core" import type { RunSnapshot } from "../runtime-state-machine.js" +import type { BaseSkillManager } from "../skill-manager/index.js" import { getSkillManagerByToolName } from "../skill-manager/index.js" +import type { McpSkillManager } from "../skill-manager/mcp.js" function hasRemainingTodos(toolResult: ToolResult): boolean { const firstPart = toolResult.result[0] @@ -23,38 +27,169 @@ function hasRemainingTodos(toolResult: ToolResult): boolean { } } +type FileInfo = { path: string; mimeType: string; size: number } + +function isFileInfo(value: unknown): value is FileInfo { + return ( + typeof value === "object" && + value !== null && + "path" in value && + "mimeType" in value && + "size" in value && + typeof (value as FileInfo).path === "string" && + typeof (value as FileInfo).mimeType === "string" && + typeof (value as FileInfo).size === "number" + ) +} + +async function processFileToolResult( + toolResult: ToolResult, + toolName: "readPdfFile" | "readImageFile", +): Promise { + const processedContents: MessagePart[] = [] + for (const part of toolResult.result) { + if (part.type !== "textPart") { + processedContents.push(part) + continue + } + let fileInfo: FileInfo | undefined + try { + const parsed = JSON.parse(part.text) + if (isFileInfo(parsed)) { + fileInfo = parsed + } + } catch { + processedContents.push(part) + continue + } + if (!fileInfo) { + processedContents.push(part) + continue + } + const { path, mimeType } = fileInfo + try { + const buffer = await readFile(path) + if (toolName === "readImageFile") { + processedContents.push({ + type: "imageInlinePart", + id: part.id, + encodedData: buffer.toString("base64"), + mimeType, + }) + } else { + processedContents.push({ + type: "fileInlinePart", + id: part.id, + encodedData: buffer.toString("base64"), + mimeType, + }) + } + } catch (error) { + processedContents.push({ + type: "textPart", + id: part.id, + text: `Failed to read file "${path}": ${error instanceof Error ? error.message : String(error)}`, + }) + } + } + return { ...toolResult, result: processedContents } +} + +async function executeMcpToolCall( + toolCall: ToolCall, + skillManagers: Record, +): Promise { + const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName) + if (skillManager.type !== "mcp") { + throw new Error(`Incorrect SkillType, required MCP, got ${skillManager.type}`) + } + const result = await (skillManager as McpSkillManager).callTool(toolCall.toolName, toolCall.args) + const toolResult: ToolResult = { + id: toolCall.id, + skillName: toolCall.skillName, + toolName: toolCall.toolName, + result, + } + if (toolCall.toolName === "readPdfFile" || toolCall.toolName === "readImageFile") { + return processFileToolResult(toolResult, toolCall.toolName) + } + return toolResult +} + +async function getToolType( + toolCall: ToolCall, + skillManagers: Record, +): Promise<"mcp" | "delegate" | "interactive"> { + const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName) + return skillManager.type +} + export async function callingToolLogic({ setting, checkpoint, step, skillManagers, }: RunSnapshot["context"]): Promise { - if (!step.toolCall) { - throw new Error("No tool call found") - } - const { id, skillName, toolName, args } = step.toolCall - const skillManager = await getSkillManagerByToolName(skillManagers, toolName) - if (skillManager.type !== "mcp") { - throw new Error(`Incorrect SkillType, required MCP, got ${skillManager.type}`) + const pendingToolCalls = step.pendingToolCalls ?? step.toolCalls ?? [] + if (pendingToolCalls.length === 0) { + throw new Error("No tool calls found") } - const result = await skillManager.callTool(toolName, args) - const toolResult: ToolResult = { id, skillName, toolName, result } - if (skillName === "@perstack/base") { - if (toolName === "think") { - return resolveThought(setting, checkpoint, { toolResult }) - } - if (toolName === "attemptCompletion") { - if (hasRemainingTodos(toolResult)) { - return resolveToolResult(setting, checkpoint, { toolResult }) - } - return attemptCompletion(setting, checkpoint, { toolResult }) + const toolResults: ToolResult[] = step.toolResults ? [...step.toolResults] : [] + const attemptCompletionTool = pendingToolCalls.find( + (tc) => tc.skillName === "@perstack/base" && tc.toolName === "attemptCompletion", + ) + if (attemptCompletionTool) { + const toolResult = await executeMcpToolCall(attemptCompletionTool, skillManagers) + if (hasRemainingTodos(toolResult)) { + return resolveToolResults(setting, checkpoint, { toolResults: [toolResult] }) } - if (toolName === "readPdfFile") { - return resolvePdfFile(setting, checkpoint, { toolResult }) + return attemptCompletion(setting, checkpoint, { toolResult }) + } + const toolCallTypes = await Promise.all( + pendingToolCalls.map(async (tc) => ({ + toolCall: tc, + type: await getToolType(tc, skillManagers), + })), + ) + const mcpToolCalls = toolCallTypes.filter((t) => t.type === "mcp").map((t) => t.toolCall) + const delegateToolCalls = toolCallTypes + .filter((t) => t.type === "delegate") + .map((t) => t.toolCall) + const interactiveToolCalls = toolCallTypes + .filter((t) => t.type === "interactive") + .map((t) => t.toolCall) + if (mcpToolCalls.length > 0) { + const mcpResults = await Promise.all( + mcpToolCalls.map((tc) => executeMcpToolCall(tc, skillManagers)), + ) + toolResults.push(...mcpResults) + } + const remainingToolCalls = [...delegateToolCalls, ...interactiveToolCalls] + if (delegateToolCalls.length > 0) { + const delegateToolCall = delegateToolCalls[0] + if (!delegateToolCall) { + throw new Error("No delegate tool call found") } - if (toolName === "readImageFile") { - return resolveImageFile(setting, checkpoint, { toolResult }) + step.partialToolResults = toolResults + step.pendingToolCalls = remainingToolCalls + return callDelegate(setting, checkpoint, { + newMessage: checkpoint.messages[checkpoint.messages.length - 1] as never, + toolCall: delegateToolCall, + usage: step.usage, + }) + } + if (interactiveToolCalls.length > 0) { + const interactiveToolCall = interactiveToolCalls[0] + if (!interactiveToolCall) { + throw new Error("No interactive tool call found") } + step.partialToolResults = toolResults + step.pendingToolCalls = remainingToolCalls + return callInteractiveTool(setting, checkpoint, { + newMessage: checkpoint.messages[checkpoint.messages.length - 1] as never, + toolCall: interactiveToolCall, + usage: step.usage, + }) } - return resolveToolResult(setting, checkpoint, { toolResult }) + return resolveToolResults(setting, checkpoint, { toolResults }) } diff --git a/packages/runtime/src/states/generating-run-result.test.ts b/packages/runtime/src/states/generating-run-result.test.ts index 6fb077fd..3bbf76c9 100644 --- a/packages/runtime/src/states/generating-run-result.test.ts +++ b/packages/runtime/src/states/generating-run-result.test.ts @@ -33,18 +33,22 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", args: {}, }, - toolResult: { + ], + toolResults: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], }, + ], }) mockGetModel.mockReturnValue(createMockLanguageModel("Task completed successfully")) const event = await StateMachineLogics.GeneratingRunResult({ @@ -65,18 +69,22 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", args: {}, }, - toolResult: { + ], + toolResults: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], }, + ], }) const errorModel = new MockLanguageModelV2({ doGenerate: async () => { @@ -94,10 +102,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { expect(event.type).toBe("retry") }) - it("throws error when tool call or result missing", async () => { + it("throws error when tool calls or results missing", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() - const step = createStep({ toolCall: undefined, toolResult: undefined }) + const step = createStep({ toolCalls: undefined, toolResults: undefined }) await expect( StateMachineLogics.GeneratingRunResult({ setting, @@ -106,25 +114,29 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { eventListener: async () => {}, skillManagers: {}, }), - ).rejects.toThrow("No tool call or tool result found") + ).rejects.toThrow("No tool calls or tool results found") }) it("includes proper event metadata", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", args: {}, }, - toolResult: { + ], + toolResults: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], }, + ], }) mockGetModel.mockReturnValue(createMockLanguageModel("Final result")) const event = await StateMachineLogics.GeneratingRunResult({ diff --git a/packages/runtime/src/states/generating-run-result.ts b/packages/runtime/src/states/generating-run-result.ts index 9a2cdf93..0912d98f 100644 --- a/packages/runtime/src/states/generating-run-result.ts +++ b/packages/runtime/src/states/generating-run-result.ts @@ -15,21 +15,24 @@ export async function generatingRunResultLogic({ checkpoint, step, }: RunSnapshot["context"]): Promise { - if (!step.toolCall || !step.toolResult) { - throw new Error("No tool call or tool result found") + if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { + throw new Error("No tool calls or tool results found") } - const { id, toolName } = step.toolCall - const { result } = step.toolResult - const toolMessage = createToolMessage([ - { - type: "toolResultPart", - toolCallId: id, - toolName, - contents: result.filter( - (part) => part.type === "textPart" || part.type === "imageInlinePart", + const toolResultParts = step.toolResults.map((toolResult) => { + const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id) + return { + type: "toolResultPart" as const, + toolCallId: toolResult.id, + toolName: toolCall?.toolName ?? toolResult.toolName, + contents: toolResult.result.filter( + (part) => + part.type === "textPart" || + part.type === "imageInlinePart" || + part.type === "fileInlinePart", ), - }, - ]) + } + }) + const toolMessage = createToolMessage(toolResultParts) const model = getModel(setting.model, setting.providerConfig) const { messages } = checkpoint let generationResult: GenerateTextResult diff --git a/packages/runtime/src/states/generating-tool-call.test.ts b/packages/runtime/src/states/generating-tool-call.test.ts index 3142513f..cbadeaa9 100644 --- a/packages/runtime/src/states/generating-tool-call.test.ts +++ b/packages/runtime/src/states/generating-tool-call.test.ts @@ -186,10 +186,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => { eventListener: async () => {}, skillManagers: { "test-skill": skillManager }, }) - expect(event.type).toBe("callTool") + expect(event.type).toBe("callTools") }) - it("returns callInteractiveTool event for interactive skill", async () => { + it("returns callTools event for interactive skill (processed later in CallingTool)", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep() @@ -214,10 +214,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => { eventListener: async () => {}, skillManagers: { "interactive-skill": skillManager }, }) - expect(event.type).toBe("callInteractiveTool") + expect(event.type).toBe("callTools") }) - it("returns callDelegate event for delegate skill", async () => { + it("returns callTools event for delegate skill (processed later in CallingTool)", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep() @@ -242,7 +242,47 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => { eventListener: async () => {}, skillManagers: { "delegate-skill": skillManager }, }) - expect(event.type).toBe("callDelegate") + expect(event.type).toBe("callTools") + }) + + it("sorts tool calls by priority: MCP → Delegate → Interactive", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep() + const mcpSkillManager = createMockSkillManager("mcp-skill", "mcp", "mcpTool") + const delegateSkillManager = createMockSkillManager("delegate-skill", "delegate", "delegateTool") + const interactiveSkillManager = createMockSkillManager( + "interactive-skill", + "interactive", + "interactiveTool", + ) + mockGetModel.mockReturnValue( + createMockLanguageModel({ + finishReason: "tool-calls", + toolCalls: [ + { type: "tool-call", toolCallId: "tc_int", toolName: "interactiveTool", input: "{}" }, + { type: "tool-call", toolCallId: "tc_del", toolName: "delegateTool", input: "{}" }, + { type: "tool-call", toolCallId: "tc_mcp", toolName: "mcpTool", input: "{}" }, + ], + }), + ) + const event = await StateMachineLogics.GeneratingToolCall({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: { + "mcp-skill": mcpSkillManager, + "delegate-skill": delegateSkillManager, + "interactive-skill": interactiveSkillManager, + }, + }) + expect(event.type).toBe("callTools") + if (event.type === "callTools") { + expect(event.toolCalls[0].toolName).toBe("mcpTool") + expect(event.toolCalls[1].toolName).toBe("delegateTool") + expect(event.toolCalls[2].toolName).toBe("interactiveTool") + } }) it("returns retry event when finish reason is length", async () => { @@ -307,6 +347,6 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => { eventListener: async () => {}, skillManagers: { "test-skill": skillManager }, }) - expect(event.type).toBe("callTool") + expect(event.type).toBe("callTools") }) }) diff --git a/packages/runtime/src/states/generating-tool-call.ts b/packages/runtime/src/states/generating-tool-call.ts index 75543b78..934b9b21 100644 --- a/packages/runtime/src/states/generating-tool-call.ts +++ b/packages/runtime/src/states/generating-tool-call.ts @@ -1,11 +1,10 @@ import { createId } from "@paralleldrive/cuid2" import { - callDelegate, - callInteractiveTool, - callTool, + callTools, type RunEvent, retry, type TextPart, + type ToolCall, type ToolCallPart, } from "@perstack/core" import { type GenerateTextResult, generateText, type ToolSet } from "ai" @@ -17,9 +16,59 @@ import { } from "../messages/message.js" import { getModel } from "../model.js" import type { RunSnapshot } from "../runtime-state-machine.js" +import type { BaseSkillManager } from "../skill-manager/index.js" import { getSkillManagerByToolName, getToolSet } from "../skill-manager/index.js" import { createEmptyUsage, usageFromGenerateTextResult } from "../usage.js" +type ClassifiedToolCall = { + toolCallId: string + toolName: string + input: Record + skillManager: BaseSkillManager +} + +async function classifyToolCalls( + toolCalls: Array<{ toolCallId: string; toolName: string; input: unknown }>, + skillManagers: Record, +): Promise { + return Promise.all( + toolCalls.map(async (tc) => { + const skillManager = await getSkillManagerByToolName(skillManagers, tc.toolName) + return { + toolCallId: tc.toolCallId, + toolName: tc.toolName, + input: tc.input as Record, + skillManager, + } + }), + ) +} + +function sortToolCallsByPriority(toolCalls: ClassifiedToolCall[]): ClassifiedToolCall[] { + const priority = { mcp: 0, delegate: 1, interactive: 2 } + return [...toolCalls].sort( + (a, b) => (priority[a.skillManager.type] ?? 99) - (priority[b.skillManager.type] ?? 99), + ) +} + +function buildToolCallParts(toolCalls: ClassifiedToolCall[]): Array> { + return toolCalls.map((tc) => ({ + type: "toolCallPart" as const, + toolCallId: tc.toolCallId, + toolName: tc.toolName, + args: tc.input, + })) +} + +function buildToolCalls(toolCalls: ClassifiedToolCall[]): ToolCall[] { + return toolCalls.map((tc) => ({ + id: tc.toolCallId, + skillName: tc.skillManager.name, + toolName: tc.toolName, + args: tc.input, + })) +} + export async function generatingToolCallLogic({ setting, checkpoint, @@ -51,8 +100,7 @@ export async function generatingToolCallLogic({ } const usage = usageFromGenerateTextResult(result) const { text, toolCalls, finishReason } = result - const toolCall = toolCalls[0] - if (!toolCall) { + if (toolCalls.length === 0) { const reason = JSON.stringify({ error: "Error: No tool call generated", message: "You must generate a tool call. Try again.", @@ -63,42 +111,26 @@ export async function generatingToolCallLogic({ usage, }) } - const contents: Array | Omit> = [ - { - type: "toolCallPart", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - args: toolCall.input, - }, - ] - if (text) { - contents.push({ - type: "textPart", - text, - }) - } - const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName) - const eventPayload = { - newMessage: createExpertMessage(contents), - toolCall: { - id: toolCall.toolCallId, - skillName: skillManager.name, - toolName: toolCall.toolName, - args: toolCall.input, - }, - usage, - } + const classified = await classifyToolCalls(toolCalls, skillManagers) + const sorted = sortToolCallsByPriority(classified) if (finishReason === "tool-calls" || finishReason === "stop") { - switch (skillManager.type) { - case "mcp": - return callTool(setting, checkpoint, eventPayload) - case "interactive": - return callInteractiveTool(setting, checkpoint, eventPayload) - case "delegate": - return callDelegate(setting, checkpoint, eventPayload) + const toolCallParts = buildToolCallParts(sorted) + const contents: Array | Omit> = [...toolCallParts] + if (text) { + contents.push({ type: "textPart", text }) } + const allToolCalls = buildToolCalls(sorted) + return callTools(setting, checkpoint, { + newMessage: createExpertMessage(contents), + toolCalls: allToolCalls, + usage, + }) } if (finishReason === "length") { + const firstToolCall = sorted[0] + if (!firstToolCall) { + throw new Error("No tool call found") + } const reason = JSON.stringify({ error: "Error: Tool call generation failed", message: "Generation length exceeded. Try again.", @@ -109,27 +141,36 @@ export async function generatingToolCallLogic({ createExpertMessage([ { type: "toolCallPart", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - args: toolCall.input, + toolCallId: firstToolCall.toolCallId, + toolName: firstToolCall.toolName, + args: firstToolCall.input, }, ]), createToolMessage([ { type: "toolResultPart", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, + toolCallId: firstToolCall.toolCallId, + toolName: firstToolCall.toolName, contents: [{ type: "textPart", text: reason }], }, ]), ], - toolCall: eventPayload.toolCall, - toolResult: { - id: toolCall.toolCallId, - skillName: skillManager.name, - toolName: toolCall.toolName, - result: [{ type: "textPart", id: createId(), text: reason }], - }, + toolCalls: [ + { + id: firstToolCall.toolCallId, + skillName: firstToolCall.skillManager.name, + toolName: firstToolCall.toolName, + args: firstToolCall.input, + }, + ], + toolResults: [ + { + id: firstToolCall.toolCallId, + skillName: firstToolCall.skillManager.name, + toolName: firstToolCall.toolName, + result: [{ type: "textPart", id: createId(), text: reason }], + }, + ], usage, }) } diff --git a/packages/runtime/src/states/init.test.ts b/packages/runtime/src/states/init.test.ts index d76ba805..336af517 100644 --- a/packages/runtime/src/states/init.test.ts +++ b/packages/runtime/src/states/init.test.ts @@ -70,40 +70,23 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => { }) const checkpoint = createCheckpoint({ status: "stoppedByDelegate", + pendingToolCalls: [{ id: "123", skillName: "test-skill", toolName: "test", args: {} }], }) const step = createStep() - await expect( - StateMachineLogics.Init({ + const event = await StateMachineLogics.Init({ setting, checkpoint, step, eventListener: async () => {}, skillManagers: {}, - }), - ).resolves.toStrictEqual({ - type: "startRun", - id: expect.any(String), - expertKey: setting.expertKey, - timestamp: expect.any(Number), - runId: setting.runId, - stepNumber: checkpoint.stepNumber, - initialCheckpoint: checkpoint, - inputMessages: [ - { - type: "toolMessage", - id: expect.any(String), - contents: [ - { - type: "toolResultPart", - id: expect.any(String), - toolCallId: "123", - toolName: "test", - contents: [{ type: "textPart", id: expect.any(String), text: "test-delegate" }], - }, - ], - }, - ], }) + expect(event.type).toBe("startRun") + if (event.type === "startRun") { + expect(event.inputMessages).toEqual([]) + expect(event.initialCheckpoint.partialToolResults).toHaveLength(1) + expect(event.initialCheckpoint.partialToolResults?.[0].id).toBe("123") + expect(event.initialCheckpoint.pendingToolCalls).toBeUndefined() + } }) it("throws error when delegate call result is undefined", async () => { @@ -137,42 +120,23 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => { }) const checkpoint = createCheckpoint({ status: "stoppedByInteractiveTool", + pendingToolCalls: [{ id: "123", skillName: "test-skill", toolName: "test", args: {} }], }) const step = createStep() - await expect( - StateMachineLogics.Init({ + const event = await StateMachineLogics.Init({ setting, checkpoint, step, eventListener: async () => {}, skillManagers: {}, - }), - ).resolves.toStrictEqual({ - type: "startRun", - id: expect.any(String), - expertKey: setting.expertKey, - timestamp: expect.any(Number), - runId: setting.runId, - stepNumber: checkpoint.stepNumber, - initialCheckpoint: checkpoint, - inputMessages: [ - { - type: "toolMessage", - id: expect.any(String), - contents: [ - { - type: "toolResultPart", - id: expect.any(String), - toolCallId: "123", - toolName: "test", - contents: [ - { type: "textPart", id: expect.any(String), text: "test-interactive-tool" }, - ], - }, - ], - }, - ], }) + expect(event.type).toBe("startRun") + if (event.type === "startRun") { + expect(event.inputMessages).toEqual([]) + expect(event.initialCheckpoint.partialToolResults).toHaveLength(1) + expect(event.initialCheckpoint.partialToolResults?.[0].id).toBe("123") + expect(event.initialCheckpoint.pendingToolCalls).toBeUndefined() + } }) it("throws error when interactive tool call result is undefined", async () => { diff --git a/packages/runtime/src/states/init.ts b/packages/runtime/src/states/init.ts index 71b148c6..97d59ef9 100644 --- a/packages/runtime/src/states/init.ts +++ b/packages/runtime/src/states/init.ts @@ -1,6 +1,7 @@ -import { type RunEvent, startRun } from "@perstack/core" +import { createId } from "@paralleldrive/cuid2" +import { type RunEvent, startRun, type ToolResult } from "@perstack/core" import { createInstructionMessage } from "../messages/instruction-message.js" -import { createToolMessage, createUserMessage } from "../messages/message.js" +import { createUserMessage } from "../messages/message.js" import type { RunSnapshot } from "../runtime-state-machine.js" export async function initLogic({ @@ -27,18 +28,29 @@ export async function initLogic({ if (!setting.input.interactiveToolCallResult) { throw new Error("Interactive tool call result is undefined") } - return startRun(setting, checkpoint, { - initialCheckpoint: checkpoint, - inputMessages: [ - createToolMessage([ - { - type: "toolResultPart", - toolCallId: setting.input.interactiveToolCallResult.toolCallId, - toolName: setting.input.interactiveToolCallResult.toolName, - contents: [{ type: "textPart", text: setting.input.interactiveToolCallResult.text }], - }, - ]), - ], + const { toolCallId, toolName, text } = setting.input.interactiveToolCallResult + const pendingToolCalls = checkpoint.pendingToolCalls ?? [] + const completedToolCall = pendingToolCalls.find((tc) => tc.id === toolCallId) + const skillName = + completedToolCall?.skillName ?? + (checkpoint.status === "stoppedByDelegate" ? checkpoint.delegateTo?.expert.key : "") ?? + "" + const newToolResult: ToolResult = { + id: toolCallId, + skillName, + toolName, + result: [{ type: "textPart", id: createId(), text }], + } + const updatedPartialResults = [...(checkpoint.partialToolResults ?? []), newToolResult] + const updatedPendingToolCalls = pendingToolCalls.filter((tc) => tc.id !== toolCallId) + const updatedCheckpoint = { + ...checkpoint, + partialToolResults: updatedPartialResults, + pendingToolCalls: updatedPendingToolCalls.length > 0 ? updatedPendingToolCalls : undefined, + } + return startRun(setting, updatedCheckpoint, { + initialCheckpoint: updatedCheckpoint, + inputMessages: [], }) } default: diff --git a/packages/runtime/src/states/preparing-for-step.test.ts b/packages/runtime/src/states/preparing-for-step.test.ts index 24566e59..efb276b9 100644 --- a/packages/runtime/src/states/preparing-for-step.test.ts +++ b/packages/runtime/src/states/preparing-for-step.test.ts @@ -1,9 +1,10 @@ +import { createId } from "@paralleldrive/cuid2" import { describe, expect, it } from "vitest" import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js" import { StateMachineLogics } from "../index.js" describe("@perstack/runtime: StateMachineLogic['PreparingForStep']", () => { - it("prepares execution steps correctly", async () => { + it("returns startGeneration when no pending tool calls or partial results", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep() @@ -25,4 +26,89 @@ describe("@perstack/runtime: StateMachineLogic['PreparingForStep']", () => { messages: checkpoint.messages, }) }) + + it("returns resumeToolCalls when pendingToolCalls exist", async () => { + const setting = createRunSetting() + const pendingToolCalls = [ + { id: "tc_1", skillName: "test-skill", toolName: "testTool", args: {} }, + ] + const partialToolResults = [ + { id: "tc_0", skillName: "test-skill", toolName: "prevTool", result: [] }, + ] + const checkpoint = createCheckpoint({ pendingToolCalls, partialToolResults }) + const step = createStep() + const result = await StateMachineLogics.PreparingForStep({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("resumeToolCalls") + if (result.type === "resumeToolCalls") { + expect(result.pendingToolCalls).toEqual(pendingToolCalls) + expect(result.partialToolResults).toEqual(partialToolResults) + } + }) + + it("returns finishAllToolCalls when only partialToolResults exist", async () => { + const setting = createRunSetting() + const partialToolResults = [ + { + id: "tc_1", + skillName: "test-skill", + toolName: "testTool", + result: [{ type: "textPart" as const, text: "result", id: createId() }], + }, + ] + const checkpoint = createCheckpoint({ partialToolResults }) + const step = createStep() + const result = await StateMachineLogics.PreparingForStep({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("finishAllToolCalls") + if (result.type === "finishAllToolCalls") { + expect(result.newMessages).toHaveLength(1) + expect(result.newMessages[0].type).toBe("toolMessage") + } + }) + + it("filters partialToolResults contents to allowed types", async () => { + const setting = createRunSetting() + const partialToolResults = [ + { + id: "tc_1", + skillName: "test-skill", + toolName: "testTool", + result: [ + { type: "textPart" as const, text: "text", id: createId() }, + { type: "imageInlinePart" as const, encodedData: "base64", mimeType: "image/png", id: createId() }, + { type: "fileInlinePart" as const, encodedData: "base64", mimeType: "application/pdf", id: createId() }, + ], + }, + ] + const checkpoint = createCheckpoint({ partialToolResults }) + const step = createStep() + const result = await StateMachineLogics.PreparingForStep({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("finishAllToolCalls") + if (result.type === "finishAllToolCalls") { + const toolMessage = result.newMessages[0] + if (toolMessage.type === "toolMessage") { + const toolResultPart = toolMessage.contents[0] + if (toolResultPart.type === "toolResultPart") { + expect(toolResultPart.contents).toHaveLength(3) + } + } + } + }) }) diff --git a/packages/runtime/src/states/preparing-for-step.ts b/packages/runtime/src/states/preparing-for-step.ts index e4a4d5f3..0f697fb7 100644 --- a/packages/runtime/src/states/preparing-for-step.ts +++ b/packages/runtime/src/states/preparing-for-step.ts @@ -1,11 +1,33 @@ -import { type RunEvent, startGeneration } from "@perstack/core" +import { finishAllToolCalls, type RunEvent, resumeToolCalls, startGeneration } from "@perstack/core" +import { createToolMessage } from "../messages/message.js" import type { RunSnapshot } from "../runtime-state-machine.js" export async function preparingForStepLogic({ setting, checkpoint, }: RunSnapshot["context"]): Promise { - // TODO: add logic to count tokens and check if it's exceeded the limit + if (checkpoint.pendingToolCalls && checkpoint.pendingToolCalls.length > 0) { + return resumeToolCalls(setting, checkpoint, { + pendingToolCalls: checkpoint.pendingToolCalls, + partialToolResults: checkpoint.partialToolResults ?? [], + }) + } + if (checkpoint.partialToolResults && checkpoint.partialToolResults.length > 0) { + const toolResultParts = checkpoint.partialToolResults.map((tr) => ({ + type: "toolResultPart" as const, + toolCallId: tr.id, + toolName: tr.toolName, + contents: tr.result.filter( + (part) => + part.type === "textPart" || + part.type === "imageInlinePart" || + part.type === "fileInlinePart", + ), + })) + return finishAllToolCalls(setting, checkpoint, { + newMessages: [createToolMessage(toolResultParts)], + }) + } return startGeneration(setting, checkpoint, { messages: checkpoint.messages, }) diff --git a/packages/runtime/src/states/resolving-image-file.test.ts b/packages/runtime/src/states/resolving-image-file.test.ts index a7ba693e..005477af 100644 --- a/packages/runtime/src/states/resolving-image-file.test.ts +++ b/packages/runtime/src/states/resolving-image-file.test.ts @@ -25,14 +25,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: { path: "/test/image.png" }, }, - toolResult: { - id: "tr_123", + ], + toolResults: [ + { + id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", result: [ @@ -43,6 +46,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => { }, ], }, + ], }) await expect( StateMachineLogics.ResolvingImageFile({ @@ -94,14 +98,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: { path: "/nonexistent.png" }, }, - toolResult: { - id: "tr_123", + ], + toolResults: [ + { + id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", result: [ @@ -112,6 +119,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => { }, ], }, + ], }) const result = await StateMachineLogics.ResolvingImageFile({ setting, @@ -129,4 +137,70 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => { text: expect.stringContaining('Failed to read image file "/nonexistent.png"'), }) }) + + it("throws error when tool calls are missing", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ toolCalls: undefined, toolResults: [] }) + await expect( + StateMachineLogics.ResolvingImageFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No tool calls or tool results found") + }) + + it("throws error when tool results are empty", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [{ id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: {} }], + toolResults: [], + }) + await expect( + StateMachineLogics.ResolvingImageFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No tool calls or tool results found") + }) + + it("handles invalid JSON in text part gracefully", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: {} }, + ], + toolResults: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readImageFile", + result: [{ type: "textPart" as const, text: "not json", id: createId() }], + }, + ], + }) + const result = await StateMachineLogics.ResolvingImageFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("finishToolCall") + if (result.type !== "finishToolCall") throw new Error("Unexpected event type") + const toolResultPart = result.newMessages[0].contents[0] + if (toolResultPart.type !== "toolResultPart") throw new Error("Unexpected part type") + expect(toolResultPart.contents[0]).toMatchObject({ + type: "textPart", + text: "not json", + }) + }) }) diff --git a/packages/runtime/src/states/resolving-image-file.ts b/packages/runtime/src/states/resolving-image-file.ts index 3c06a6a9..1931ff78 100644 --- a/packages/runtime/src/states/resolving-image-file.ts +++ b/packages/runtime/src/states/resolving-image-file.ts @@ -9,12 +9,16 @@ export async function resolvingImageFileLogic({ checkpoint, step, }: RunSnapshot["context"]): Promise { - if (!step.toolCall || !step.toolResult) { - throw new Error("No tool call or tool result found") + if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { + throw new Error("No tool calls or tool results found") } - const { id, toolName } = step.toolCall - const { result } = step.toolResult - const textParts = result.filter((part) => part.type === "textPart") + const toolResult = step.toolResults[0] + if (!toolResult) { + throw new Error("No tool result found") + } + const toolCall = step.toolCalls.find((tc) => tc.id === toolResult.id) + const { result } = toolResult + const textParts = result.filter((part): part is TextPart => part.type === "textPart") const files: (Omit | Omit)[] = [] for (const textPart of textParts) { let imageInfo: ReadImageFileResult | undefined @@ -27,7 +31,7 @@ export async function resolvingImageFileLogic({ }) continue } - const { path, mimeType, size } = imageInfo + const { path, mimeType } = imageInfo try { const buffer = await readFile(path) files.push({ @@ -47,8 +51,8 @@ export async function resolvingImageFileLogic({ createToolMessage([ { type: "toolResultPart", - toolCallId: id, - toolName, + toolCallId: toolResult.id, + toolName: toolCall?.toolName ?? toolResult.toolName, contents: files, }, ]), diff --git a/packages/runtime/src/states/resolving-pdf-file.test.ts b/packages/runtime/src/states/resolving-pdf-file.test.ts index 05bbdbff..71c4210b 100644 --- a/packages/runtime/src/states/resolving-pdf-file.test.ts +++ b/packages/runtime/src/states/resolving-pdf-file.test.ts @@ -1,5 +1,5 @@ -import { createId } from "@paralleldrive/cuid2" import { readFile } from "node:fs/promises" +import { createId } from "@paralleldrive/cuid2" import { beforeEach, describe, expect, it, vi } from "vitest" import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js" import { StateMachineLogics } from "../index.js" @@ -25,24 +25,28 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "readPdfFile", - args: { path: "/test/file.pdf" }, - }, - toolResult: { - id: "tr_123", - skillName: "@perstack/base", - toolName: "readPdfFile", - result: [ - { - type: "textPart" as const, - text: JSON.stringify(pdfInfo), - id: createId(), - }, - ], - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + args: { path: "/test/file.pdf" }, + }, + ], + toolResults: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + result: [ + { + type: "textPart" as const, + text: JSON.stringify(pdfInfo), + id: createId(), + }, + ], + }, + ], }) await expect( StateMachineLogics.ResolvingPdfFile({ @@ -71,26 +75,15 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => { toolName: "readPdfFile", contents: [ { - type: "textPart", + type: "fileInlinePart", id: expect.any(String), - text: "User uploads PDF file as follows.", + encodedData: Buffer.from("encoded_pdf_content").toString("base64"), + mimeType: "application/pdf", }, ], }, ], }, - { - type: "userMessage", - id: expect.any(String), - contents: [ - { - type: "fileInlinePart", - id: expect.any(String), - encodedData: Buffer.from("encoded_pdf_content").toString("base64"), - mimeType: "application/pdf", - }, - ], - }, ], }) }) @@ -105,24 +98,28 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { - id: "tc_123", - skillName: "@perstack/base", - toolName: "readPdfFile", - args: { path: "/nonexistent.pdf" }, - }, - toolResult: { - id: "tr_123", - skillName: "@perstack/base", - toolName: "readPdfFile", - result: [ - { - type: "textPart" as const, - text: JSON.stringify(pdfInfo), - id: createId(), - }, - ], - }, + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + args: { path: "/nonexistent.pdf" }, + }, + ], + toolResults: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + result: [ + { + type: "textPart" as const, + text: JSON.stringify(pdfInfo), + id: createId(), + }, + ], + }, + ], }) const result = await StateMachineLogics.ResolvingPdfFile({ setting, @@ -133,10 +130,81 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => { }) expect(result.type).toBe("finishToolCall") if (result.type !== "finishToolCall") throw new Error("Unexpected event type") - const userMessage = result.newMessages[1] - expect(userMessage.contents[0]).toMatchObject({ + const toolMessage = result.newMessages[0] + if (toolMessage.type !== "toolMessage") throw new Error("Expected toolMessage") + const toolResultPart = toolMessage.contents[0] + if (toolResultPart.type !== "toolResultPart") throw new Error("Expected toolResultPart") + expect(toolResultPart.contents[0]).toMatchObject({ type: "textPart", text: expect.stringContaining('Failed to read PDF file "/nonexistent.pdf"'), }) }) + + it("throws error when tool calls are missing", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ toolCalls: undefined, toolResults: [] }) + await expect( + StateMachineLogics.ResolvingPdfFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No tool calls or tool results found") + }) + + it("throws error when tool results are empty", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [{ id: "tc_123", skillName: "@perstack/base", toolName: "readPdfFile", args: {} }], + toolResults: [], + }) + await expect( + StateMachineLogics.ResolvingPdfFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }), + ).rejects.toThrow("No tool calls or tool results found") + }) + + it("handles invalid JSON in text part gracefully", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "readPdfFile", args: {} }, + ], + toolResults: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "readPdfFile", + result: [{ type: "textPart" as const, text: "not valid json", id: createId() }], + }, + ], + }) + const result = await StateMachineLogics.ResolvingPdfFile({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManagers: {}, + }) + expect(result.type).toBe("finishToolCall") + if (result.type !== "finishToolCall") throw new Error("Unexpected event type") + const toolMessage = result.newMessages[0] + if (toolMessage.type !== "toolMessage") throw new Error("Expected toolMessage") + const toolResultPart = toolMessage.contents[0] + if (toolResultPart.type !== "toolResultPart") throw new Error("Expected toolResultPart") + expect(toolResultPart.contents[0]).toMatchObject({ + type: "textPart", + text: "not valid json", + }) + }) }) diff --git a/packages/runtime/src/states/resolving-pdf-file.ts b/packages/runtime/src/states/resolving-pdf-file.ts index fafa3d8e..3ed612de 100644 --- a/packages/runtime/src/states/resolving-pdf-file.ts +++ b/packages/runtime/src/states/resolving-pdf-file.ts @@ -1,6 +1,6 @@ import { readFile } from "node:fs/promises" -import { type FileInlinePart, type RunEvent, type TextPart, finishToolCall } from "@perstack/core" -import { createToolMessage, createUserMessage } from "../messages/message.js" +import { type FileInlinePart, finishToolCall, type RunEvent, type TextPart } from "@perstack/core" +import { createToolMessage } from "../messages/message.js" import type { RunSnapshot } from "../runtime-state-machine.js" type ReadPdfFileResult = { path: string; mimeType: string; size: number } @@ -9,34 +9,38 @@ export async function resolvingPdfFileLogic({ checkpoint, step, }: RunSnapshot["context"]): Promise { - if (!step.toolCall || !step.toolResult) { - throw new Error("No tool call or tool result found") + if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { + throw new Error("No tool calls or tool results found") } - const { id, toolName } = step.toolCall - const { result } = step.toolResult - const textParts = result.filter((part) => part.type === "textPart") - const files: (Omit | Omit)[] = [] + const toolResult = step.toolResults[0] + if (!toolResult) { + throw new Error("No tool result found") + } + const toolCall = step.toolCalls.find((tc) => tc.id === toolResult.id) + const { result } = toolResult + const textParts = result.filter((part): part is TextPart => part.type === "textPart") + const contents: (Omit | Omit)[] = [] for (const textPart of textParts) { let pdfInfo: ReadPdfFileResult | undefined try { pdfInfo = JSON.parse(textPart.text) as ReadPdfFileResult } catch { - files.push({ + contents.push({ type: "textPart", text: textPart.text, }) continue } - const { path, mimeType, size } = pdfInfo + const { path, mimeType } = pdfInfo try { const buffer = await readFile(path) - files.push({ + contents.push({ type: "fileInlinePart", encodedData: buffer.toString("base64"), mimeType, }) } catch (error) { - files.push({ + contents.push({ type: "textPart", text: `Failed to read PDF file "${path}": ${error instanceof Error ? error.message : String(error)}`, }) @@ -47,17 +51,11 @@ export async function resolvingPdfFileLogic({ createToolMessage([ { type: "toolResultPart", - toolCallId: id, - toolName, - contents: [ - { - type: "textPart", - text: "User uploads PDF file as follows.", - }, - ], + toolCallId: toolResult.id, + toolName: toolCall?.toolName ?? toolResult.toolName, + contents, }, ]), - createUserMessage(files), ], }) } diff --git a/packages/runtime/src/states/resolving-thought.test.ts b/packages/runtime/src/states/resolving-thought.test.ts index e4089f5c..558c7507 100644 --- a/packages/runtime/src/states/resolving-thought.test.ts +++ b/packages/runtime/src/states/resolving-thought.test.ts @@ -8,14 +8,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingThought']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "think", args: { thought: "Let me analyze this problem step by step" }, }, - toolResult: { - id: "tr_123", + ], + toolResults: [ + { + id: "tc_123", skillName: "@perstack/base", toolName: "think", result: [ @@ -26,6 +29,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingThought']", () => { }, ], }, + ], }) await expect( StateMachineLogics.ResolvingThought({ diff --git a/packages/runtime/src/states/resolving-tool-result.test.ts b/packages/runtime/src/states/resolving-tool-result.test.ts index a6cacdc1..2b0eb144 100644 --- a/packages/runtime/src/states/resolving-tool-result.test.ts +++ b/packages/runtime/src/states/resolving-tool-result.test.ts @@ -9,14 +9,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_123", skillName: "@perstack/base", toolName: "readTextFile", args: { path: "/test/file.txt" }, }, - toolResult: { - id: "tr_123", + ], + toolResults: [ + { + id: "tc_123", skillName: "@perstack/base", toolName: "readTextFile", result: [ @@ -27,6 +30,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { }, ], }, + ], }) await expect( StateMachineLogics.ResolvingToolResult({ @@ -67,7 +71,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { }) }) - it("throws error when tool call or result missing", async () => { + it("throws error when tool calls or results missing", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -84,21 +88,24 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { eventListener: async () => {}, skillManagers: {}, }), - ).rejects.toThrow("No tool call or tool result found") + ).rejects.toThrow("No tool calls or tool results found") }) it("filters non-text and non-image parts from result", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ - toolCall: { + toolCalls: [ + { id: "tc_456", skillName: "@perstack/base", toolName: "readImageFile", args: { path: "/test/image.png" }, }, - toolResult: { - id: "tr_456", + ], + toolResults: [ + { + id: "tc_456", skillName: "@perstack/base", toolName: "readImageFile", result: [ @@ -117,6 +124,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { }, ], }, + ], }) const result = await StateMachineLogics.ResolvingToolResult({ setting, diff --git a/packages/runtime/src/states/resolving-tool-result.ts b/packages/runtime/src/states/resolving-tool-result.ts index 5ad4486a..4dabd109 100644 --- a/packages/runtime/src/states/resolving-tool-result.ts +++ b/packages/runtime/src/states/resolving-tool-result.ts @@ -1,4 +1,4 @@ -import { type RunEvent, finishToolCall } from "@perstack/core" +import { finishToolCall, type RunEvent } from "@perstack/core" import { createToolMessage } from "../messages/message.js" import type { RunSnapshot } from "../runtime-state-machine.js" @@ -7,23 +7,24 @@ export async function resolvingToolResultLogic({ checkpoint, step, }: RunSnapshot["context"]): Promise { - if (!step.toolCall || !step.toolResult) { - throw new Error("No tool call or tool result found") + if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { + throw new Error("No tool calls or tool results found") } - const { id, toolName } = step.toolCall - const { result } = step.toolResult + const toolResultParts = step.toolResults.map((toolResult) => { + const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id) + return { + type: "toolResultPart" as const, + toolCallId: toolResult.id, + toolName: toolCall?.toolName ?? toolResult.toolName, + contents: toolResult.result.filter( + (part) => + part.type === "textPart" || + part.type === "imageInlinePart" || + part.type === "fileInlinePart", + ), + } + }) return finishToolCall(setting, checkpoint, { - newMessages: [ - createToolMessage([ - { - type: "toolResultPart", - toolCallId: id, - toolName, - contents: result.filter( - (part) => part.type === "textPart" || part.type === "imageInlinePart", - ), - }, - ]), - ], + newMessages: [createToolMessage(toolResultParts)], }) } diff --git a/packages/tui/src/hooks/state/use-step-store.ts b/packages/tui/src/hooks/state/use-step-store.ts index 2481d1b2..f05db413 100644 --- a/packages/tui/src/hooks/state/use-step-store.ts +++ b/packages/tui/src/hooks/state/use-step-store.ts @@ -9,18 +9,22 @@ type StepBuilder = { completion?: string } const TOOL_RESULT_EVENT_TYPES = new Set([ - "resolveToolResult", + "resolveToolResults", "resolveThought", "resolvePdfFile", "resolveImageFile", "attemptCompletion", ]) +const isToolCallsEvent = (event: PerstackEvent): event is RunEvent & { toolCalls: ToolCall[] } => + "type" in event && event.type === "callTools" && "toolCalls" in event const isToolCallEvent = (event: PerstackEvent): event is RunEvent & { toolCall: ToolCall } => "type" in event && - (event.type === "callTool" || - event.type === "callInteractiveTool" || - event.type === "callDelegate") && + (event.type === "callInteractiveTool" || event.type === "callDelegate") && "toolCall" in event +const isToolResultsEvent = ( + event: PerstackEvent, +): event is RunEvent & { toolResults: ToolResult[] } => + "type" in event && event.type === "resolveToolResults" && "toolResults" in event const isToolResultEvent = (event: PerstackEvent): event is RunEvent & { toolResult: ToolResult } => "type" in event && TOOL_RESULT_EVENT_TYPES.has(event.type) && "toolResult" in event const checkIsSuccess = (result: Array<{ type: string; text?: string }>): boolean => { @@ -48,6 +52,14 @@ const processEvent = (stepMap: Map, event: PerstackEvent): builder.query = extractQuery(event) } else if (event.type === "completeRun") { builder.completion = event.text + } else if (isToolCallsEvent(event)) { + for (const toolCall of event.toolCalls) { + builder.tools.set(toolCall.id, { + id: toolCall.id, + toolName: toolCall.toolName, + args: toolCall.args as Record, + }) + } } else if (isToolCallEvent(event)) { const { toolCall } = event builder.tools.set(toolCall.id, { @@ -55,6 +67,14 @@ const processEvent = (stepMap: Map, event: PerstackEvent): toolName: toolCall.toolName, args: toolCall.args as Record, }) + } else if (isToolResultsEvent(event)) { + for (const toolResult of event.toolResults) { + const existing = builder.tools.get(toolResult.id) + if (existing && Array.isArray(toolResult.result)) { + existing.result = toolResult.result + existing.isSuccess = checkIsSuccess(toolResult.result) + } + } } else if (isToolResultEvent(event)) { const { toolResult } = event const existing = builder.tools.get(toolResult.id) diff --git a/vitest.config.ts b/vitest.config.ts index b6fa48dc..4d4c2f3b 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -9,7 +9,17 @@ export default defineConfig({ globals: true, environment: "node", include: ["**/*.test.ts"], - exclude: ["**/node_modules/**", "**/dist/**"], + exclude: ["**/node_modules/**", "**/dist/**", "e2e/**"], + }, + }, + { + test: { + name: "e2e", + globals: true, + environment: "node", + include: ["e2e/**/*.test.ts"], + testTimeout: 300000, + hookTimeout: 300000, }, }, ],