perstack-ai · FL4TLiN3 · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/.changeset/parallel-tool-calls.md b/.changeset/parallel-tool-calls.md
@@ -0,0 +1,32 @@
+---
+"@perstack/core": patch
+"@perstack/runtime": patch
+"@perstack/api-client": patch
+"@perstack/base": patch
+"@perstack/tui": patch
+"perstack": patch
+---
+
+Add parallel tool call support and mixed tool call handling
+
+Features:
+
+- Process all tool calls from a single LLM response instead of only the first one
+- MCP tools execute in parallel using `Promise.all`
+- Support mixed tool calls (MCP + Delegate + Interactive in same response)
+- Process tools in priority order: MCP → Delegate → Interactive
+- Preserve partial results across checkpoint boundaries
+
+Schema Changes:
+
+- `Step.toolCall` → `Step.toolCalls` (array)
+- `Step.toolResult` → `Step.toolResults` (array)
+- Add `Step.pendingToolCalls` for tracking unprocessed tool calls
+- Add `Checkpoint.pendingToolCalls` and `Checkpoint.partialToolResults` for resume
+
+Event Changes:
+
+- `callTool` → `callTools`
+- `resolveToolResult` → `resolveToolResults`
+- Add `resumeToolCalls` and `finishAllToolCalls` events
+
diff --git a/AGENTS.md b/AGENTS.md
@@ -416,7 +416,7 @@ Key points:
 ## Testing
 
 - **Unit tests:** Vitest (`*.test.ts` files), run with `pnpm test`
-- **E2E tests:** Manual testing by following `E2E.md` — agent should read and execute the procedures
+- **E2E tests:** Vitest (`e2e/*.test.ts` files), run with `pnpm test:e2e`
 - **Coverage:** V8 provider, lcov output
 
 ### Unit Test Scope
@@ -523,11 +523,11 @@ pnpm build              # Build all packages
 
 ### E2E Testing (MANDATORY)
 
-After build passes, run E2E tests by following `E2E.md`:
+After build passes, run E2E tests:
 
 ```bash
-pnpm build  # Must build first
-# Then run E2E tests as documented in E2E.md
+pnpm build     # Must build first
+pnpm test:e2e  # Run E2E tests
 ```
 
 **E2E tests must pass before pushing.** This catches runtime issues that unit tests miss.
@@ -599,5 +599,5 @@ pick = ["attemptCompletion", "think"]
 - [ ] `pnpm check-deps` passes
 - [ ] `pnpm reset && pnpm test` passes
 - [ ] `pnpm build` passes
-- [ ] E2E tests pass (follow `E2E.md`)
+- [ ] `pnpm test:e2e` passes
 - [ ] Versioning rules in `CONTRIBUTING.md` are followed
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -78,7 +78,8 @@ pnpm build
 git checkout -b feature/your-feature
 # ... edit code ...
 pnpm changeset
-pnpm typecheck && pnpm test
+pnpm typecheck && pnpm test && pnpm build
+pnpm test:e2e  # Run E2E tests
 git commit -m "feat: your changes"
 ```
 
@@ -195,6 +196,7 @@ pnpm changeset
 pnpm typecheck  # Must pass
 pnpm test       # Must pass
 pnpm build      # Must succeed
+pnpm test:e2e   # Run E2E tests
 ```
 
 ### 4. Commit and Push
@@ -428,8 +430,13 @@ Perstack uses a two-stage release workflow powered by [changesets/action](https:
    - Updated `CHANGELOG.md` with PR links and author attribution
 
 **Stage 2: Publish**
-1. Review and merge "Version Packages" PR
-2. Release workflow automatically:
+1. Review "Version Packages" PR
+2. **Run E2E tests locally before merging:**
+   ```bash
+   pnpm build && pnpm test:e2e
+   ```
+3. Merge "Version Packages" PR
+4. Release workflow automatically:
    - Publishes packages to npm
    - Creates git tags
    - Creates GitHub Releases
@@ -571,6 +578,7 @@ Before requesting review, ensure:
 - [ ] Changeset created with appropriate version bump
 - [ ] All tests pass (`pnpm test`)
 - [ ] Types check across all packages (`pnpm typecheck`)
+- [ ] E2E tests pass (`pnpm test:e2e`)
 - [ ] Documentation updated (README, JSDoc, CHANGELOG via changeset)
 - [ ] Migration guide included (for breaking changes)
 - [ ] No unintended version sync issues

diff --git a/E2E.md b/E2E.md
diff --git a/docs/content/making-experts/testing.mdx b/docs/content/making-experts/testing.mdx
@@ -70,8 +70,8 @@ import { run } from "@perstack/runtime"
 const result = await run(params, {
   // Mock eventListener for assertions
   eventListener: (event) => {
-    if (event.type === "callTool") {
-      expect(event.toolCall.name).toBe("expectedTool")
+    if (event.type === "callTools") {
+      expect(event.toolCalls[0].toolName).toBe("expectedTool")
     }
   }
 })

diff --git a/e2e/README.md b/e2e/README.md
@@ -0,0 +1,120 @@
+# E2E Tests
+
+End-to-end tests for Perstack CLI and runtime.
+
+## Prerequisites
+
+```bash
+pnpm build
+```
+
+## Running Tests
+
+```bash
+# Run all E2E tests (parallel execution)
+pnpm test:e2e
+
+# Run specific test file
+pnpm test:e2e -- run.test.ts
+
+# Run tests matching pattern
+pnpm test:e2e -- --testNamePattern "publish"
+```
+
+## Test Structure
+
+```
+e2e/
+├── lib/                      # Test utilities
+│   ├── runner.ts             # CLI and Expert execution
+│   ├── event-parser.ts       # Runtime event parsing
+│   └── assertions.ts         # Custom assertions
+├── experts/                  # Expert definitions for tests
+│   ├── mixed-tools.toml      # MCP + Delegate + Interactive
+│   ├── parallel-mcp.toml     # Parallel MCP calls
+│   ├── delegate-chain.toml   # Delegation chain
+│   └── continue-resume.toml  # Continue/resume functionality
+├── run.test.ts               # CLI run command
+├── publish.test.ts           # CLI publish command
+├── unpublish.test.ts         # CLI unpublish command
+├── tag.test.ts               # CLI tag command
+├── status.test.ts            # CLI status command
+├── mixed-tools.test.ts       # Mixed tool calls (MCP + Delegate + Interactive)
+├── parallel-mcp.test.ts      # Parallel MCP tool execution
+├── delegate-chain.test.ts    # Expert delegation chain
+└── continue-resume.test.ts   # --continue-run and --resume-from
+```
+
+## Test Categories
+
+### CLI Commands
+
+Tests for CLI argument validation and error handling.
+
+| File | Tests | Coverage |
+|------|-------|----------|
+| run.test.ts | 4 | Missing args, nonexistent expert, invalid config |
+| publish.test.ts | 4 | dry-run success, nonexistent expert, config errors |
+| unpublish.test.ts | 2 | Missing version, missing --force |
+| tag.test.ts | 2 | Missing version, missing tags |
+| status.test.ts | 3 | Missing version/status, invalid status |
+
+### Runtime Features
+
+Tests for parallel tool calls, delegation, and state management.
+
+| File | Tests | Coverage |
+|------|-------|----------|
+| mixed-tools.test.ts | 4 | MCP + Delegate + Interactive in single response |
+| parallel-mcp.test.ts | 3 | Parallel MCP tool execution |
+| delegate-chain.test.ts | 3 | Multi-level delegation |
+| continue-resume.test.ts | 4 | --continue-run, --resume-from |
+
+## Writing Tests
+
+### CLI Command Tests
+
+```typescript
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI command", () => {
+  it("should fail with invalid args", async () => {
+    const result = await runCli(["command", "invalid-arg"])
+    expect(result.exitCode).toBe(1)
+  })
+})
+```
+
+### Runtime Tests
+
+```typescript
+import { beforeAll, describe, expect, it } from "vitest"
+import { assertEventSequenceContains } from "./lib/assertions.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Runtime feature", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert("expert-key", "query", {
+      configPath: "./e2e/experts/your-expert.toml",
+      timeout: 180000,
+    })
+  }, 200000)
+
+  it("should emit expected events", () => {
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed,
+    ).toBe(true)
+  })
+})
+```
+
+## Notes
+
+- Tests run in parallel via vitest
+- Runtime tests require API keys (set in `.env.local`)
+- TUI-based commands (`start`) are excluded from E2E tests
+- API-calling tests (actual publish, unpublish) require registry access and are not included
+