diff --git a/.changeset/parallel-tool-calls.md b/.changeset/parallel-tool-calls.md
new file mode 100644
index 00000000..0f147a71
--- /dev/null
+++ b/.changeset/parallel-tool-calls.md
@@ -0,0 +1,32 @@
+---
+"@perstack/core": patch
+"@perstack/runtime": patch
+"@perstack/api-client": patch
+"@perstack/base": patch
+"@perstack/tui": patch
+"perstack": patch
+---
+
+Add parallel tool call support and mixed tool call handling
+
+Features:
+
+- Process all tool calls from a single LLM response instead of only the first one
+- MCP tools execute in parallel using `Promise.all`
+- Support mixed tool calls (MCP + Delegate + Interactive in same response)
+- Process tools in priority order: MCP → Delegate → Interactive
+- Preserve partial results across checkpoint boundaries
+
+Schema Changes:
+
+- `Step.toolCall` → `Step.toolCalls` (array)
+- `Step.toolResult` → `Step.toolResults` (array)
+- Add `Step.pendingToolCalls` for tracking unprocessed tool calls
+- Add `Checkpoint.pendingToolCalls` and `Checkpoint.partialToolResults` for resume
+
+Event Changes:
+
+- `callTool` → `callTools`
+- `resolveToolResult` → `resolveToolResults`
+- Add `resumeToolCalls` and `finishAllToolCalls` events
+
diff --git a/AGENTS.md b/AGENTS.md
index abe454c2..78f2c88d 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -416,7 +416,7 @@ Key points:
 ## Testing
 
 - **Unit tests:** Vitest (`*.test.ts` files), run with `pnpm test`
-- **E2E tests:** Manual testing by following `E2E.md` — agent should read and execute the procedures
+- **E2E tests:** Vitest (`e2e/*.test.ts` files), run with `pnpm test:e2e`
 - **Coverage:** V8 provider, lcov output
 
 ### Unit Test Scope
@@ -523,11 +523,11 @@ pnpm build              # Build all packages
 
 ### E2E Testing (MANDATORY)
 
-After build passes, run E2E tests by following `E2E.md`:
+After build passes, run E2E tests:
 
 ```bash
-pnpm build  # Must build first
-# Then run E2E tests as documented in E2E.md
+pnpm build     # Must build first
+pnpm test:e2e  # Run E2E tests
 ```
 
 **E2E tests must pass before pushing.** This catches runtime issues that unit tests miss.
@@ -599,5 +599,5 @@ pick = ["attemptCompletion", "think"]
 - [ ] `pnpm check-deps` passes
 - [ ] `pnpm reset && pnpm test` passes
 - [ ] `pnpm build` passes
-- [ ] E2E tests pass (follow `E2E.md`)
+- [ ] `pnpm test:e2e` passes
 - [ ] Versioning rules in `CONTRIBUTING.md` are followed
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2c5ab59f..a39eb518 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,7 +78,8 @@ pnpm build
 git checkout -b feature/your-feature
 # ... edit code ...
 pnpm changeset
-pnpm typecheck && pnpm test
+pnpm typecheck && pnpm test && pnpm build
+pnpm test:e2e  # Run E2E tests
 git commit -m "feat: your changes"
 ```
 
@@ -195,6 +196,7 @@ pnpm changeset
 pnpm typecheck  # Must pass
 pnpm test       # Must pass
 pnpm build      # Must succeed
+pnpm test:e2e   # Run E2E tests
 ```
 
 ### 4. Commit and Push
@@ -428,8 +430,13 @@ Perstack uses a two-stage release workflow powered by [changesets/action](https:
    - Updated `CHANGELOG.md` with PR links and author attribution
 
 **Stage 2: Publish**
-1. Review and merge "Version Packages" PR
-2. Release workflow automatically:
+1. Review "Version Packages" PR
+2. **Run E2E tests locally before merging:**
+   ```bash
+   pnpm build && pnpm test:e2e
+   ```
+3. Merge "Version Packages" PR
+4. Release workflow automatically:
    - Publishes packages to npm
    - Creates git tags
    - Creates GitHub Releases
@@ -571,6 +578,7 @@ Before requesting review, ensure:
 - [ ] Changeset created with appropriate version bump
 - [ ] All tests pass (`pnpm test`)
 - [ ] Types check across all packages (`pnpm typecheck`)
+- [ ] E2E tests pass (`pnpm test:e2e`)
 - [ ] Documentation updated (README, JSDoc, CHANGELOG via changeset)
 - [ ] Migration guide included (for breaking changes)
 - [ ] No unintended version sync issues
diff --git a/E2E.md b/E2E.md
deleted file mode 100644
index 5cc4f0bb..00000000
--- a/E2E.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# E2E Testing Guide
-
-Manual E2E testing procedures for perstack CLI.
-
-## Prerequisites
-
-```bash
-pnpm build
-```
-
-## Test Commands
-
-Use `npx tsx` or `bun` to run the CLI:
-
-```bash
-CLI="npx tsx packages/perstack/dist/bin/cli.js"
-```
-
-### 1. Help and Version
-
-```bash
-$CLI --help
-$CLI --version
-$CLI run --help
-$CLI publish --help
-$CLI unpublish --help
-$CLI tag --help
-$CLI status --help
-```
-
-**Expected**: All commands display help/version without errors.
-
-### 2. Publish Dry Run
-
-```bash
-# Valid expert
-$CLI publish tic-tac-toe --dry-run
-
-# Invalid expert
-$CLI publish nonexistent --dry-run
-```
-
-**Expected**:
-- Valid: Outputs JSON payload
-- Invalid: Error message with available experts, exit code 1
-
-### 3. Argument Validation
-
-```bash
-# Missing required args
-$CLI run
-$CLI run expertOnly
-
-# Invalid format (missing version)
-$CLI unpublish no-version --force
-$CLI tag no-version tag1
-$CLI status no-version available
-
-# Invalid status value
-$CLI status expert@1.0.0 invalid-status
-
-# Missing tags
-$CLI tag expert@1.0.0
-```
-
-**Expected**: All return appropriate error messages with exit code 1.
-
-### 4. Config File Handling
-
-```bash
-# Nonexistent config
-$CLI publish tic-tac-toe --dry-run --config nonexistent.toml
-
-# No config in directory
-cd /tmp && $CLI publish tic-tac-toe --dry-run
-```
-
-**Expected**: Error message indicating config file not found, exit code 1.
-
-### 5. Run Command Error Handling
-
-```bash
-# Nonexistent expert
-$CLI run nonexistent-expert "test query"
-```
-
-**Expected**: Error message with exit code 1.
-
-## Quick Test Script
-
-```bash
-#!/bin/bash
-set -e
-CLI="npx tsx packages/perstack/dist/bin/cli.js"
-
-echo "=== Help Commands ==="
-$CLI --help > /dev/null && echo "OK: --help"
-$CLI --version > /dev/null && echo "OK: --version"
-
-echo "=== Publish Dry Run ==="
-$CLI publish tic-tac-toe --dry-run > /dev/null && echo "OK: publish dry-run"
-$CLI publish nonexistent --dry-run 2>&1 && exit 1 || echo "OK: publish invalid expert"
-
-echo "=== Argument Validation ==="
-$CLI run 2>&1 && exit 1 || echo "OK: run missing args"
-$CLI unpublish no-version --force 2>&1 && exit 1 || echo "OK: unpublish invalid format"
-$CLI status expert@1.0.0 invalid-status 2>&1 && exit 1 || echo "OK: status invalid value"
-
-echo "=== Config Handling ==="
-$CLI publish tic-tac-toe --dry-run --config nonexistent.toml 2>&1 && exit 1 || echo "OK: nonexistent config"
-
-echo "All tests passed!"
-```
diff --git a/docs/content/making-experts/testing.mdx b/docs/content/making-experts/testing.mdx
index 636e612b..6497df15 100644
--- a/docs/content/making-experts/testing.mdx
+++ b/docs/content/making-experts/testing.mdx
@@ -70,8 +70,8 @@ import { run } from "@perstack/runtime"
 const result = await run(params, {
   // Mock eventListener for assertions
   eventListener: (event) => {
-    if (event.type === "callTool") {
-      expect(event.toolCall.name).toBe("expectedTool")
+    if (event.type === "callTools") {
+      expect(event.toolCalls[0].toolName).toBe("expectedTool")
     }
   }
 })
diff --git a/e2e/README.md b/e2e/README.md
new file mode 100644
index 00000000..1d430bd5
--- /dev/null
+++ b/e2e/README.md
@@ -0,0 +1,120 @@
+# E2E Tests
+
+End-to-end tests for Perstack CLI and runtime.
+
+## Prerequisites
+
+```bash
+pnpm build
+```
+
+## Running Tests
+
+```bash
+# Run all E2E tests (parallel execution)
+pnpm test:e2e
+
+# Run specific test file
+pnpm test:e2e -- run.test.ts
+
+# Run tests matching pattern
+pnpm test:e2e -- --testNamePattern "publish"
+```
+
+## Test Structure
+
+```
+e2e/
+├── lib/                      # Test utilities
+│   ├── runner.ts             # CLI and Expert execution
+│   ├── event-parser.ts       # Runtime event parsing
+│   └── assertions.ts         # Custom assertions
+├── experts/                  # Expert definitions for tests
+│   ├── mixed-tools.toml      # MCP + Delegate + Interactive
+│   ├── parallel-mcp.toml     # Parallel MCP calls
+│   ├── delegate-chain.toml   # Delegation chain
+│   └── continue-resume.toml  # Continue/resume functionality
+├── run.test.ts               # CLI run command
+├── publish.test.ts           # CLI publish command
+├── unpublish.test.ts         # CLI unpublish command
+├── tag.test.ts               # CLI tag command
+├── status.test.ts            # CLI status command
+├── mixed-tools.test.ts       # Mixed tool calls (MCP + Delegate + Interactive)
+├── parallel-mcp.test.ts      # Parallel MCP tool execution
+├── delegate-chain.test.ts    # Expert delegation chain
+└── continue-resume.test.ts   # --continue-run and --resume-from
+```
+
+## Test Categories
+
+### CLI Commands
+
+Tests for CLI argument validation and error handling.
+
+| File | Tests | Coverage |
+|------|-------|----------|
+| run.test.ts | 4 | Missing args, nonexistent expert, invalid config |
+| publish.test.ts | 4 | dry-run success, nonexistent expert, config errors |
+| unpublish.test.ts | 2 | Missing version, missing --force |
+| tag.test.ts | 2 | Missing version, missing tags |
+| status.test.ts | 3 | Missing version/status, invalid status |
+
+### Runtime Features
+
+Tests for parallel tool calls, delegation, and state management.
+
+| File | Tests | Coverage |
+|------|-------|----------|
+| mixed-tools.test.ts | 4 | MCP + Delegate + Interactive in single response |
+| parallel-mcp.test.ts | 3 | Parallel MCP tool execution |
+| delegate-chain.test.ts | 3 | Multi-level delegation |
+| continue-resume.test.ts | 4 | --continue-run, --resume-from |
+
+## Writing Tests
+
+### CLI Command Tests
+
+```typescript
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI command", () => {
+  it("should fail with invalid args", async () => {
+    const result = await runCli(["command", "invalid-arg"])
+    expect(result.exitCode).toBe(1)
+  })
+})
+```
+
+### Runtime Tests
+
+```typescript
+import { beforeAll, describe, expect, it } from "vitest"
+import { assertEventSequenceContains } from "./lib/assertions.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Runtime feature", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert("expert-key", "query", {
+      configPath: "./e2e/experts/your-expert.toml",
+      timeout: 180000,
+    })
+  }, 200000)
+
+  it("should emit expected events", () => {
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed,
+    ).toBe(true)
+  })
+})
+```
+
+## Notes
+
+- Tests run in parallel via vitest
+- Runtime tests require API keys (set in `.env.local`)
+- TUI-based commands (`start`) are excluded from E2E tests
+- API-calling tests (actual publish, unpublish) require registry access and are not included
+
diff --git a/e2e/continue-resume.test.ts b/e2e/continue-resume.test.ts
new file mode 100644
index 00000000..a99e85bb
--- /dev/null
+++ b/e2e/continue-resume.test.ts
@@ -0,0 +1,73 @@
+import { describe, expect, it } from "vitest"
+import { assertEventSequenceContains } from "./lib/assertions.js"
+import { filterEventsByType, getEventSequence } from "./lib/event-parser.js"
+import { runExpert } from "./lib/runner.js"
+
+const CONFIG_PATH = "./e2e/experts/continue-resume.toml"
+const TIMEOUT = 180000
+
+describe("Continue and Resume From Checkpoint", () => {
+  it("should stop at interactive tool and get run ID", async () => {
+    const result = await runExpert("e2e-continue", "Test continue/resume functionality", {
+      configPath: CONFIG_PATH,
+      timeout: TIMEOUT,
+    })
+    expect(
+      assertEventSequenceContains(result.events, [
+        "startRun",
+        "callInteractiveTool",
+        "stopRunByInteractiveTool",
+      ]).passed,
+    ).toBe(true)
+    expect(result.runId).not.toBeNull()
+  }, 200000)
+
+  it("should continue run with --continue-run", async () => {
+    const initialResult = await runExpert("e2e-continue", "Test continue/resume functionality", {
+      configPath: CONFIG_PATH,
+      timeout: TIMEOUT,
+    })
+    expect(initialResult.runId).not.toBeNull()
+    const continueResult = await runExpert("e2e-continue", "User confirmed the test", {
+      configPath: CONFIG_PATH,
+      continueRunId: initialResult.runId!,
+      isInteractiveResult: true,
+      timeout: TIMEOUT,
+    })
+    expect(assertEventSequenceContains(continueResult.events, ["startRun"]).passed).toBe(true)
+    expect(
+      continueResult.events.some(
+        (e) =>
+          e.type === "startRun" &&
+          (e as { initialCheckpoint?: { status?: string } }).initialCheckpoint?.status ===
+            "stoppedByInteractiveTool",
+      ),
+    ).toBe(true)
+  }, 400000)
+
+  it("should complete after continue", async () => {
+    const initialResult = await runExpert("e2e-continue", "Test continue/resume functionality", {
+      configPath: CONFIG_PATH,
+      timeout: TIMEOUT,
+    })
+    expect(initialResult.runId).not.toBeNull()
+    const continueResult = await runExpert("e2e-continue", "User confirmed the test", {
+      configPath: CONFIG_PATH,
+      continueRunId: initialResult.runId!,
+      isInteractiveResult: true,
+      timeout: TIMEOUT,
+    })
+    expect(getEventSequence(continueResult.events)).toContain("completeRun")
+  }, 400000)
+
+  it("should capture checkpoint for resume", async () => {
+    const result = await runExpert("e2e-resume", "Test continue/resume functionality", {
+      configPath: CONFIG_PATH,
+      timeout: TIMEOUT,
+    })
+    const stopEvent = filterEventsByType(result.events, "stopRunByInteractiveTool")[0]
+    expect(stopEvent).toBeDefined()
+    expect((stopEvent as { checkpoint?: { id?: string } }).checkpoint?.id).toBeDefined()
+    expect(result.runId).not.toBeNull()
+  }, 200000)
+})
diff --git a/e2e/delegate-chain.test.ts b/e2e/delegate-chain.test.ts
new file mode 100644
index 00000000..7aa05d03
--- /dev/null
+++ b/e2e/delegate-chain.test.ts
@@ -0,0 +1,33 @@
+import { beforeAll, describe, expect, it } from "vitest"
+import { assertEventSequenceContains } from "./lib/assertions.js"
+import { getEventSequence } from "./lib/event-parser.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Delegate Chain", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert(
+      "e2e-delegate-chain",
+      "Test delegate chain: process this request through multiple levels",
+      { configPath: "./e2e/experts/delegate-chain.toml", timeout: 180000 },
+    )
+  }, 200000)
+
+  it("should delegate through chain", () => {
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "callDelegate", "stopRunByDelegate"]).passed,
+    ).toBe(true)
+  })
+
+  it("should have multiple delegation levels", () => {
+    const sequence = getEventSequence(result.events)
+    expect(sequence.filter((e) => e === "callDelegate").length).toBeGreaterThanOrEqual(2)
+    expect(sequence.filter((e) => e === "stopRunByDelegate").length).toBeGreaterThanOrEqual(2)
+  })
+
+  it("should return through chain and complete", () => {
+    const sequence = getEventSequence(result.events)
+    expect(sequence.filter((e) => e === "completeRun").length).toBeGreaterThanOrEqual(3)
+  })
+})
diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml
new file mode 100644
index 00000000..d08151ca
--- /dev/null
+++ b/e2e/experts/continue-resume.toml
@@ -0,0 +1,64 @@
+model = "claude-sonnet-4-5"
+temperature = 0.3
+
+[provider]
+providerName = "anthropic"
+
+envPath = [".env", ".env.local"]
+
+[experts."e2e-continue"]
+version = "1.0.0"
+description = "E2E test expert for continue functionality"
+instruction = """
+You are an E2E test expert that tests run continuation.
+
+When given a query:
+1. First, ask the user for confirmation using askUser
+2. After receiving user input, summarize and call attemptCompletion
+
+This tests the --continue and --continue-run functionality.
+"""
+
+[experts."e2e-continue".skills."user-input"]
+type = "interactiveSkill"
+description = "User interaction"
+
+[experts."e2e-continue".skills."user-input".tools.askUser]
+name = "askUser"
+description = "Ask the user a question"
+inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}'
+
+[experts."e2e-continue".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think"]
+
+[experts."e2e-resume"]
+version = "1.0.0"
+description = "E2E test expert for resume-from functionality"
+instruction = """
+You are an E2E test expert that tests checkpoint resumption.
+
+When given a query:
+1. Call think tool to process the query
+2. Ask the user for confirmation using askUser
+3. After receiving user input, summarize and call attemptCompletion
+
+This tests the --resume-from functionality with specific checkpoint.
+"""
+
+[experts."e2e-resume".skills."user-input"]
+type = "interactiveSkill"
+description = "User interaction"
+
+[experts."e2e-resume".skills."user-input".tools.askUser]
+name = "askUser"
+description = "Ask the user a question"
+inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}'
+
+[experts."e2e-resume".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think"]
diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml
new file mode 100644
index 00000000..2d6084ee
--- /dev/null
+++ b/e2e/experts/delegate-chain.toml
@@ -0,0 +1,55 @@
+model = "claude-sonnet-4-5"
+temperature = 0.3
+
+[provider]
+providerName = "anthropic"
+
+envPath = [".env", ".env.local"]
+
+[experts."e2e-delegate-chain"]
+version = "1.0.0"
+description = "E2E test expert for delegate chain"
+instruction = """
+You are an E2E test expert that tests delegate chain execution.
+
+When given a test query, delegate to "e2e-delegate-level1" to process the request.
+Wait for the delegation result and summarize it, then call attemptCompletion.
+"""
+delegates = ["e2e-delegate-level1"]
+
+[experts."e2e-delegate-chain".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think"]
+
+[experts."e2e-delegate-level1"]
+version = "1.0.0"
+description = "First level delegate expert"
+instruction = """
+You are a level 1 delegate expert.
+When given a query, delegate to "e2e-delegate-level2" for further processing.
+Return the combined result.
+"""
+delegates = ["e2e-delegate-level2"]
+
+[experts."e2e-delegate-level1".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion"]
+
+[experts."e2e-delegate-level2"]
+version = "1.0.0"
+description = "Second level delegate expert"
+instruction = """
+You are a level 2 delegate expert.
+When given a query, respond with "Level 2 processing complete: [query summary]".
+Call attemptCompletion with your response.
+"""
+
+[experts."e2e-delegate-level2".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion"]
diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml
new file mode 100644
index 00000000..2a6122e2
--- /dev/null
+++ b/e2e/experts/mixed-tools.toml
@@ -0,0 +1,59 @@
+model = "claude-sonnet-4-5"
+temperature = 0.3
+
+[provider]
+providerName = "anthropic"
+
+envPath = [".env", ".env.local"]
+
+[experts."e2e-mixed-tools"]
+version = "1.0.0"
+description = "E2E test expert for mixed tool calls (MCP + Delegate + Interactive)"
+instruction = """
+You are an E2E test expert that tests parallel tool execution with mixed tool types.
+
+When given a test query, you MUST call ALL THREE tools in a SINGLE response:
+1. web_search_exa - Search for information (MCP tool)
+2. e2e-helper - Delegate to helper expert (Delegate tool)  
+3. askUser - Ask user for input (Interactive tool)
+
+CRITICAL: Make ALL THREE tool calls in ONE response.
+The runtime will process them in order: MCP first, then Delegate, then Interactive.
+"""
+delegates = ["e2e-helper"]
+
+[experts."e2e-mixed-tools".skills."exa"]
+type = "mcpStdioSkill"
+description = "Web search"
+command = "npx"
+args = ["-y", "exa-mcp-server"]
+requiredEnv = ["EXA_API_KEY"]
+
+[experts."e2e-mixed-tools".skills."user-input"]
+type = "interactiveSkill"
+description = "User interaction"
+
+[experts."e2e-mixed-tools".skills."user-input".tools.askUser]
+name = "askUser"
+description = "Ask the user a question"
+inputJsonSchema = '{"type":"object","properties":{"question":{"type":"string"}},"required":["question"]}'
+
+[experts."e2e-mixed-tools".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think"]
+
+[experts."e2e-helper"]
+version = "1.0.0"
+description = "E2E test helper expert"
+instruction = """
+You are a helper expert for E2E testing.
+When given a query, respond briefly with "Helper analysis complete."
+"""
+
+[experts."e2e-helper".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion"]
diff --git a/e2e/experts/parallel-mcp.toml b/e2e/experts/parallel-mcp.toml
new file mode 100644
index 00000000..1a07a825
--- /dev/null
+++ b/e2e/experts/parallel-mcp.toml
@@ -0,0 +1,34 @@
+model = "claude-sonnet-4-5"
+temperature = 0.3
+
+[provider]
+providerName = "anthropic"
+
+envPath = [".env", ".env.local"]
+
+[experts."e2e-parallel-mcp"]
+version = "1.0.0"
+description = "E2E test expert for parallel MCP tool calls"
+instruction = """
+You are an E2E test expert that tests parallel MCP tool execution.
+
+When given a test query, you MUST call MULTIPLE MCP tools in a SINGLE response:
+1. web_search_exa - Search for the topic
+2. web_search_exa - Search for related information (different query)
+
+CRITICAL: Make BOTH tool calls in ONE response to test parallel MCP execution.
+After getting results, summarize briefly and call attemptCompletion.
+"""
+
+[experts."e2e-parallel-mcp".skills."exa"]
+type = "mcpStdioSkill"
+description = "Web search"
+command = "npx"
+args = ["-y", "exa-mcp-server"]
+requiredEnv = ["EXA_API_KEY"]
+
+[experts."e2e-parallel-mcp".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think"]
diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml
new file mode 100644
index 00000000..25914dcb
--- /dev/null
+++ b/e2e/experts/special-tools.toml
@@ -0,0 +1,36 @@
+model = "claude-sonnet-4-5"
+temperature = 0.3
+
+[provider]
+providerName = "anthropic"
+
+envPath = [".env", ".env.local"]
+
+[experts."e2e-special-tools"]
+version = "1.0.0"
+description = "E2E test expert for special tool parallel execution"
+instruction = """
+You are an E2E test expert that tests parallel execution of special tools with regular MCP tools.
+
+When given a test query, you MUST call ALL of these tools in a SINGLE response:
+1. think - Think about the approach
+2. readPdfFile - Read the PDF at e2e/fixtures/test.pdf
+3. readImageFile - Read the image at e2e/fixtures/test.gif
+4. web_search_exa - Search for related information
+
+CRITICAL: Make ALL 4 tool calls in ONE response to test that special tools (think, readPdfFile, readImageFile) execute in parallel with regular MCP tools.
+After getting results, summarize briefly what you found and call attemptCompletion.
+"""
+
+[experts."e2e-special-tools".skills."exa"]
+type = "mcpStdioSkill"
+description = "Web search"
+command = "npx"
+args = ["-y", "exa-mcp-server"]
+requiredEnv = ["EXA_API_KEY"]
+
+[experts."e2e-special-tools".skills."@perstack/base"]
+type = "mcpStdioSkill"
+command = "npx"
+packageName = "@perstack/base"
+pick = ["attemptCompletion", "think", "readPdfFile", "readImageFile"]
diff --git a/e2e/fixtures/test.gif b/e2e/fixtures/test.gif
new file mode 100644
index 00000000..28e7c216
Binary files /dev/null and b/e2e/fixtures/test.gif differ
diff --git a/e2e/fixtures/test.pdf b/e2e/fixtures/test.pdf
new file mode 100644
index 00000000..fc2928d7
Binary files /dev/null and b/e2e/fixtures/test.pdf differ
diff --git a/e2e/lib/assertions.ts b/e2e/lib/assertions.ts
new file mode 100644
index 00000000..9a8aa7c9
--- /dev/null
+++ b/e2e/lib/assertions.ts
@@ -0,0 +1,132 @@
+import {
+  type CheckpointState,
+  extractCheckpointState,
+  extractToolCalls,
+  filterEventsByType,
+  getEventSequence,
+  type ParsedEvent,
+} from "./event-parser.js"
+
+export type AssertionResult = {
+  passed: boolean
+  message: string
+  details?: unknown
+}
+
+export function assertEventSequenceContains(
+  events: ParsedEvent[],
+  expectedSubsequence: string[],
+): AssertionResult {
+  const actual = getEventSequence(events)
+  let matchIndex = 0
+  for (const eventType of actual) {
+    if (eventType === expectedSubsequence[matchIndex]) {
+      matchIndex++
+      if (matchIndex === expectedSubsequence.length) break
+    }
+  }
+  const passed = matchIndex === expectedSubsequence.length
+  return {
+    passed,
+    message: passed
+      ? `Event sequence contains: ${expectedSubsequence.join(" → ")}`
+      : `Event sequence missing expected subsequence`,
+    details: passed ? undefined : { expected: expectedSubsequence, actual },
+  }
+}
+
+export function assertToolCallCount(
+  events: ParsedEvent[],
+  eventType: "callTools",
+  expectedCount: number,
+): AssertionResult {
+  const callToolsEvents = filterEventsByType(events, eventType)
+  if (callToolsEvents.length === 0) {
+    return { passed: false, message: `No ${eventType} events found` }
+  }
+  const matchingEvent = callToolsEvents.find((e) => extractToolCalls(e).length === expectedCount)
+  if (matchingEvent) {
+    return { passed: true, message: `Tool call count matches: ${expectedCount}` }
+  }
+  const allCounts = callToolsEvents.map((e) => extractToolCalls(e).length)
+  return {
+    passed: false,
+    message: `No ${eventType} event with ${expectedCount} tool calls found`,
+    details: { foundCounts: allCounts },
+  }
+}
+
+export function assertCheckpointState(
+  events: ParsedEvent[],
+  eventType: string,
+  expectedState: Partial<CheckpointState>,
+): AssertionResult {
+  const targetEvent = events.find((e) => e.type === eventType)
+  if (!targetEvent) {
+    return { passed: false, message: `Event ${eventType} not found` }
+  }
+  const state = extractCheckpointState(targetEvent)
+  if (!state) {
+    return { passed: false, message: `No checkpoint in ${eventType} event` }
+  }
+  type Check = { key: string; passed: boolean; expected: unknown; actual: unknown }
+  const checks: Check[] = []
+  if (expectedState.status !== undefined) {
+    checks.push({
+      key: "status",
+      passed: state.status === expectedState.status,
+      expected: expectedState.status,
+      actual: state.status,
+    })
+  }
+  if (expectedState.pendingToolCalls !== undefined) {
+    checks.push({
+      key: "pendingToolCalls.length",
+      passed: state.pendingToolCalls.length === expectedState.pendingToolCalls.length,
+      expected: expectedState.pendingToolCalls.length,
+      actual: state.pendingToolCalls.length,
+    })
+  }
+  if (expectedState.partialToolResults !== undefined) {
+    checks.push({
+      key: "partialToolResults.length",
+      passed: state.partialToolResults.length === expectedState.partialToolResults.length,
+      expected: expectedState.partialToolResults.length,
+      actual: state.partialToolResults.length,
+    })
+  }
+  const allPassed = checks.every((c) => c.passed)
+  return {
+    passed: allPassed,
+    message: allPassed
+      ? `Checkpoint state matches for ${eventType}`
+      : `Checkpoint state mismatch for ${eventType}`,
+    details: allPassed
+      ? undefined
+      : { failedChecks: checks.filter((c) => !c.passed), actualState: state },
+  }
+}
+
+export function assertPartialResultsContain(
+  events: ParsedEvent[],
+  eventType: string,
+  expectedToolNames: string[],
+): AssertionResult {
+  const targetEvent = events.find((e) => e.type === eventType)
+  if (!targetEvent) {
+    return { passed: false, message: `Event ${eventType} not found` }
+  }
+  const state = extractCheckpointState(targetEvent)
+  if (!state) {
+    return { passed: false, message: `No checkpoint in ${eventType} event` }
+  }
+  const actualToolNames = state.partialToolResults.map((tr) => tr.toolName)
+  const allFound = expectedToolNames.every((name) => actualToolNames.includes(name))
+  return {
+    passed: allFound,
+    message: allFound
+      ? `Partial results contain: ${expectedToolNames.join(", ")}`
+      : `Missing partial results`,
+    details: allFound ? undefined : { expected: expectedToolNames, actual: actualToolNames },
+  }
+}
diff --git a/e2e/lib/event-parser.ts b/e2e/lib/event-parser.ts
new file mode 100644
index 00000000..f84fa308
--- /dev/null
+++ b/e2e/lib/event-parser.ts
@@ -0,0 +1,85 @@
+import type { RunEvent } from "@perstack/core"
+
+export type ParsedEvent = RunEvent & { raw: string }
+
+export type ToolCallInfo = {
+  id: string
+  skillName: string
+  toolName: string
+}
+
+export type CheckpointState = {
+  status: string
+  pendingToolCalls: ToolCallInfo[]
+  partialToolResults: ToolCallInfo[]
+}
+
+const RELEVANT_EVENT_TYPES = [
+  "startRun",
+  "callTools",
+  "callDelegate",
+  "callInteractiveTool",
+  "stopRunByDelegate",
+  "stopRunByInteractiveTool",
+  "resumeToolCalls",
+  "finishAllToolCalls",
+  "completeRun",
+  "resolveToolResults",
+] as const
+
+export function parseEvents(output: string): ParsedEvent[] {
+  const events: ParsedEvent[] = []
+  for (const line of output.split("\n")) {
+    try {
+      const data = JSON.parse(line) as RunEvent
+      if (data.type) {
+        events.push({ ...data, raw: line })
+      }
+    } catch {
+      // skip
+    }
+  }
+  return events
+}
+
+export function filterEventsByType<T extends RunEvent["type"]>(
+  events: ParsedEvent[],
+  type: T,
+): Extract<ParsedEvent, { type: T }>[] {
+  return events.filter((e) => e.type === type) as Extract<ParsedEvent, { type: T }>[]
+}
+
+export function getEventSequence(events: ParsedEvent[]): string[] {
+  return events.filter((e) => RELEVANT_EVENT_TYPES.includes(e.type as never)).map((e) => e.type)
+}
+
+export function extractToolCalls(event: ParsedEvent): ToolCallInfo[] {
+  if (event.type === "callTools") {
+    return (event.toolCalls ?? []).map((tc) => ({
+      id: tc.id,
+      skillName: tc.skillName,
+      toolName: tc.toolName,
+    }))
+  }
+  return []
+}
+
+export function extractCheckpointState(event: ParsedEvent): CheckpointState | null {
+  const checkpoint = (event as { checkpoint?: Record<string, unknown> }).checkpoint
+  if (!checkpoint) return null
+  const pending = (checkpoint.pendingToolCalls ?? []) as ToolCallInfo[]
+  const partial = (checkpoint.partialToolResults ?? []) as ToolCallInfo[]
+  return {
+    status: checkpoint.status as string,
+    pendingToolCalls: pending.map((tc) => ({
+      id: tc.id,
+      skillName: tc.skillName,
+      toolName: tc.toolName,
+    })),
+    partialToolResults: partial.map((tr) => ({
+      id: tr.id,
+      skillName: tr.skillName,
+      toolName: tr.toolName,
+    })),
+  }
+}
diff --git a/e2e/lib/runner.ts b/e2e/lib/runner.ts
new file mode 100644
index 00000000..2d9c70df
--- /dev/null
+++ b/e2e/lib/runner.ts
@@ -0,0 +1,108 @@
+import { spawn } from "node:child_process"
+import { type ParsedEvent, parseEvents } from "./event-parser.js"
+
+export type CommandResult = {
+  stdout: string
+  stderr: string
+  exitCode: number
+}
+
+export type RunResult = CommandResult & {
+  events: ParsedEvent[]
+  runId: string | null
+}
+
+export async function runCli(
+  args: string[],
+  options?: { timeout?: number; cwd?: string },
+): Promise<CommandResult> {
+  const timeout = options?.timeout ?? 30000
+  const cwd = options?.cwd ?? process.cwd()
+  return new Promise((resolve, reject) => {
+    let stdout = ""
+    let stderr = ""
+    const proc = spawn("npx", ["tsx", "./packages/perstack/bin/cli.ts", ...args], {
+      cwd,
+      env: { ...process.env },
+      stdio: ["pipe", "pipe", "pipe"],
+    })
+    const timer = setTimeout(() => {
+      proc.kill("SIGTERM")
+      reject(new Error(`Timeout after ${timeout}ms`))
+    }, timeout)
+    proc.stdout.on("data", (data) => {
+      stdout += data.toString()
+    })
+    proc.stderr.on("data", (data) => {
+      stderr += data.toString()
+    })
+    proc.on("close", (code) => {
+      clearTimeout(timer)
+      resolve({ stdout, stderr, exitCode: code ?? 0 })
+    })
+    proc.on("error", (err) => {
+      clearTimeout(timer)
+      reject(err)
+    })
+  })
+}
+
+export async function runExpert(
+  expertKey: string,
+  query: string,
+  options?: {
+    configPath?: string
+    timeout?: number
+    continueRunId?: string
+    isInteractiveResult?: boolean
+  },
+): Promise<RunResult> {
+  const timeout = options?.timeout ?? 120000
+  const args = ["run"]
+  if (options?.configPath) {
+    args.push("--config", options.configPath)
+  }
+  if (options?.continueRunId) {
+    args.push("--continue-run", options.continueRunId)
+  }
+  if (options?.isInteractiveResult) {
+    args.push("-i")
+  }
+  args.push(expertKey, query)
+  return new Promise((resolve, reject) => {
+    let stdout = ""
+    let stderr = ""
+    const proc = spawn("npx", ["tsx", "./packages/perstack/bin/cli.ts", ...args], {
+      cwd: process.cwd(),
+      env: { ...process.env },
+      stdio: ["pipe", "pipe", "pipe"],
+    })
+    const timer = setTimeout(() => {
+      proc.kill("SIGTERM")
+      reject(new Error(`Timeout after ${timeout}ms`))
+    }, timeout)
+    proc.stdout.on("data", (data) => {
+      stdout += data.toString()
+    })
+    proc.stderr.on("data", (data) => {
+      stderr += data.toString()
+    })
+    proc.on("close", (code) => {
+      clearTimeout(timer)
+      const events = parseEvents(stdout)
+      const startRunEvent = events.find((e) => e.type === "startRun")
+      const runId = startRunEvent ? ((startRunEvent as { runId?: string }).runId ?? null) : null
+      resolve({
+        stdout,
+        stderr,
+        events,
+        exitCode: code ?? 0,
+        runId,
+      })
+    })
+    proc.on("error", (err) => {
+      clearTimeout(timer)
+      reject(err)
+    })
+  })
+}
diff --git a/e2e/mixed-tools.test.ts b/e2e/mixed-tools.test.ts
new file mode 100644
index 00000000..fd737e2c
--- /dev/null
+++ b/e2e/mixed-tools.test.ts
@@ -0,0 +1,71 @@
+import { beforeAll, describe, expect, it } from "vitest"
+import {
+  assertCheckpointState,
+  assertEventSequenceContains,
+  assertPartialResultsContain,
+  assertToolCallCount,
+} from "./lib/assertions.js"
+import type { ToolCallInfo } from "./lib/event-parser.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Mixed Tool Calls (MCP + Delegate + Interactive)", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert(
+      "e2e-mixed-tools",
+      "Test mixed tool calls: search, delegate, and ask user",
+      {
+        configPath: "./e2e/experts/mixed-tools.toml",
+        timeout: 180000,
+      },
+    )
+  }, 200000)
+
+  it("should generate 3 tool calls in priority order", () => {
+    expect(assertToolCallCount(result.events, "callTools", 3).passed).toBe(true)
+    expect(
+      assertEventSequenceContains(result.events, [
+        "startRun",
+        "callTools",
+        "callDelegate",
+        "stopRunByDelegate",
+      ]).passed,
+    ).toBe(true)
+  })
+
+  it("should collect MCP result before delegate", () => {
+    const checkResult = assertCheckpointState(result.events, "stopRunByDelegate", {
+      status: "stoppedByDelegate",
+      partialToolResults: [{}] as ToolCallInfo[],
+      pendingToolCalls: [{}, {}] as ToolCallInfo[],
+    })
+    expect(checkResult.passed).toBe(true)
+    expect(
+      assertPartialResultsContain(result.events, "stopRunByDelegate", ["web_search_exa"]).passed,
+    ).toBe(true)
+  })
+
+  it("should resume with delegate result and process interactive", () => {
+    expect(
+      assertEventSequenceContains(result.events, [
+        "stopRunByDelegate",
+        "startRun",
+        "completeRun",
+        "startRun",
+        "resumeToolCalls",
+        "callInteractiveTool",
+        "stopRunByInteractiveTool",
+      ]).passed,
+    ).toBe(true)
+  })
+
+  it("should have all partial results after interactive stop", () => {
+    const checkResult = assertCheckpointState(result.events, "stopRunByInteractiveTool", {
+      status: "stoppedByInteractiveTool",
+      partialToolResults: [{}, {}] as ToolCallInfo[],
+      pendingToolCalls: [{}] as ToolCallInfo[],
+    })
+    expect(checkResult.passed).toBe(true)
+  })
+})
diff --git a/e2e/parallel-mcp.test.ts b/e2e/parallel-mcp.test.ts
new file mode 100644
index 00000000..32e56571
--- /dev/null
+++ b/e2e/parallel-mcp.test.ts
@@ -0,0 +1,41 @@
+import { beforeAll, describe, expect, it } from "vitest"
+import { assertEventSequenceContains, assertToolCallCount } from "./lib/assertions.js"
+import { filterEventsByType } from "./lib/event-parser.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Parallel MCP Tool Calls", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert(
+      "e2e-parallel-mcp",
+      "Test parallel MCP: search TypeScript and JavaScript",
+      {
+        configPath: "./e2e/experts/parallel-mcp.toml",
+        timeout: 180000,
+      },
+    )
+  }, 200000)
+
+  it("should execute multiple MCP tools in parallel", () => {
+    expect(assertToolCallCount(result.events, "callTools", 2).passed).toBe(true)
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "callTools", "resolveToolResults"])
+        .passed,
+    ).toBe(true)
+  })
+
+  it("should resolve all MCP results before next step", () => {
+    const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
+    const hasMultipleResults = resolveEvents.some((e) => {
+      const toolResults = (e as { toolResults?: unknown[] }).toolResults ?? []
+      return toolResults.length >= 2
+    })
+    expect(hasMultipleResults).toBe(true)
+  })
+
+  it("should complete run successfully", () => {
+    expect(assertEventSequenceContains(result.events, ["completeRun"]).passed).toBe(true)
+    expect(result.exitCode).toBe(0)
+  })
+})
diff --git a/e2e/publish.test.ts b/e2e/publish.test.ts
new file mode 100644
index 00000000..8f2cf4fa
--- /dev/null
+++ b/e2e/publish.test.ts
@@ -0,0 +1,32 @@
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI publish", () => {
+  it("should output JSON payload for valid expert with --dry-run", async () => {
+    const result = await runCli(["publish", "tic-tac-toe", "--dry-run"])
+    expect(result.exitCode).toBe(0)
+    expect(result.stdout).toBeTruthy()
+  })
+
+  it("should fail for nonexistent expert", async () => {
+    const result = await runCli(["publish", "nonexistent", "--dry-run"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail with nonexistent config file", async () => {
+    const result = await runCli([
+      "publish",
+      "tic-tac-toe",
+      "--dry-run",
+      "--config",
+      "nonexistent.toml",
+    ])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail when no config in directory", async () => {
+    const result = await runCli(["publish", "tic-tac-toe", "--dry-run"], { cwd: "/tmp" })
+    expect(result.exitCode).toBe(1)
+  })
+})
+
diff --git a/e2e/run.test.ts b/e2e/run.test.ts
new file mode 100644
index 00000000..13cb09f8
--- /dev/null
+++ b/e2e/run.test.ts
@@ -0,0 +1,25 @@
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI run", () => {
+  it("should fail without arguments", async () => {
+    const result = await runCli(["run"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail with only expert key", async () => {
+    const result = await runCli(["run", "expertOnly"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail for nonexistent expert", async () => {
+    const result = await runCli(["run", "nonexistent-expert", "test query"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail with nonexistent config file", async () => {
+    const result = await runCli(["run", "expert", "query", "--config", "nonexistent.toml"])
+    expect(result.exitCode).toBe(1)
+  })
+})
+
diff --git a/e2e/special-tools.test.ts b/e2e/special-tools.test.ts
new file mode 100644
index 00000000..e227cb68
--- /dev/null
+++ b/e2e/special-tools.test.ts
@@ -0,0 +1,68 @@
+import { beforeAll, describe, expect, it } from "vitest"
+import { assertEventSequenceContains, assertToolCallCount } from "./lib/assertions.js"
+import { filterEventsByType } from "./lib/event-parser.js"
+import { type RunResult, runExpert } from "./lib/runner.js"
+
+describe("Special Tools Parallel Execution", () => {
+  let result: RunResult
+
+  beforeAll(async () => {
+    result = await runExpert(
+      "e2e-special-tools",
+      "Test all special tools: think, read the PDF, read the GIF image, and search",
+      {
+        configPath: "./e2e/experts/special-tools.toml",
+        timeout: 180000,
+      },
+    )
+  }, 200000)
+
+  it("should execute all 4 tools in parallel", () => {
+    expect(assertToolCallCount(result.events, "callTools", 4).passed).toBe(true)
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "callTools", "resolveToolResults"])
+        .passed,
+    ).toBe(true)
+  })
+
+  it("should resolve all tool results together", () => {
+    const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
+    const hasAllResults = resolveEvents.some((e) => {
+      const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
+      return toolResults.length >= 4
+    })
+    expect(hasAllResults).toBe(true)
+  })
+
+  it("should include think tool in resolved results", () => {
+    const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
+    const hasThinkResult = resolveEvents.some((e) => {
+      const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
+      return toolResults.some((tr) => tr.toolName === "think")
+    })
+    expect(hasThinkResult).toBe(true)
+  })
+
+  it("should include readPdfFile in resolved results", () => {
+    const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
+    const hasPdfResult = resolveEvents.some((e) => {
+      const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
+      return toolResults.some((tr) => tr.toolName === "readPdfFile")
+    })
+    expect(hasPdfResult).toBe(true)
+  })
+
+  it("should include readImageFile in resolved results", () => {
+    const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
+    const hasImageResult = resolveEvents.some((e) => {
+      const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
+      return toolResults.some((tr) => tr.toolName === "readImageFile")
+    })
+    expect(hasImageResult).toBe(true)
+  })
+
+  it("should complete run successfully", () => {
+    expect(assertEventSequenceContains(result.events, ["completeRun"]).passed).toBe(true)
+    expect(result.exitCode).toBe(0)
+  })
+})
diff --git a/e2e/status.test.ts b/e2e/status.test.ts
new file mode 100644
index 00000000..3f67f758
--- /dev/null
+++ b/e2e/status.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI status", () => {
+  it("should fail without version", async () => {
+    const result = await runCli(["status", "no-version", "available"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail without status value", async () => {
+    const result = await runCli(["status", "expert@1.0.0"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail with invalid status value", async () => {
+    const result = await runCli(["status", "expert@1.0.0", "invalid-status"])
+    expect(result.exitCode).toBe(1)
+  })
+})
+
diff --git a/e2e/tag.test.ts b/e2e/tag.test.ts
new file mode 100644
index 00000000..d5079eb7
--- /dev/null
+++ b/e2e/tag.test.ts
@@ -0,0 +1,15 @@
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI tag", () => {
+  it("should fail without version", async () => {
+    const result = await runCli(["tag", "no-version", "tag1"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail without tags", async () => {
+    const result = await runCli(["tag", "expert@1.0.0"])
+    expect(result.exitCode).toBe(1)
+  })
+})
+
diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json
new file mode 100644
index 00000000..4e6d45a5
--- /dev/null
+++ b/e2e/tsconfig.json
@@ -0,0 +1,13 @@
+{
+  "extends": "@tsconfig/node22/tsconfig.json",
+  "compilerOptions": {
+    "resolveJsonModule": true,
+    "paths": {
+      "@perstack/core": ["../packages/core/src/index.ts"],
+      "@perstack/runtime": ["../packages/runtime/src/index.ts"]
+    }
+  },
+  "include": ["**/*.ts"],
+  "exclude": ["node_modules"]
+}
+
diff --git a/e2e/unpublish.test.ts b/e2e/unpublish.test.ts
new file mode 100644
index 00000000..6df3e85e
--- /dev/null
+++ b/e2e/unpublish.test.ts
@@ -0,0 +1,16 @@
+import { describe, expect, it } from "vitest"
+import { runCli } from "./lib/runner.js"
+
+describe("CLI unpublish", () => {
+  it("should fail without version", async () => {
+    const result = await runCli(["unpublish", "no-version", "--force"])
+    expect(result.exitCode).toBe(1)
+  })
+
+  it("should fail without --force when version provided", async () => {
+    const result = await runCli(["unpublish", "expert@1.0.0"])
+    expect(result.exitCode).toBe(1)
+    expect(result.stderr).toContain("--force")
+  })
+})
+
diff --git a/knip.json b/knip.json
index a25dca4c..c9bbe578 100644
--- a/knip.json
+++ b/knip.json
@@ -3,7 +3,7 @@
   "ignoreDependencies": ["@tsconfig/node22", "vitest", "ts-dedent"],
   "ignoreExportsUsedInFile": true,
   "ignoreBinaries": ["perstack"],
-  "ignore": ["dist/**/*", "**/*.test.ts", "**/*.test.tsx", "docs/content/**/*", "scripts/**/*", "examples/**/*"],
+  "ignore": ["dist/**/*", "**/*.test.ts", "**/*.test.tsx", "docs/content/**/*", "scripts/**/*", "examples/**/*", "e2e/**/*"],
   "workspaces": {
     "packages/perstack": {
       "entry": ["bin/cli.ts", "src/**/*.ts"]
diff --git a/package.json b/package.json
index d9c6c108..d3df9f32 100644
--- a/package.json
+++ b/package.json
@@ -12,6 +12,7 @@
     "release": "pnpm run clean && pnpm run build && changeset publish",
     "test": "vitest run --project unit --coverage --coverage.reporter=lcov --coverage.reporter=text",
     "test:watch": "vitest watch --project unit",
+    "test:e2e": "vitest run --project e2e",
     "format-and-lint": "biome check .",
     "format-and-lint:fix": "biome check . --write",
     "typecheck": "turbo run typecheck --continue",
diff --git a/packages/api-client/test/test-data.ts b/packages/api-client/test/test-data.ts
index 14b21b4e..5b728f26 100644
--- a/packages/api-client/test/test-data.ts
+++ b/packages/api-client/test/test-data.ts
@@ -208,8 +208,8 @@ export const runtimeStep: z.input<typeof stepSchema> = {
   stepNumber: 1,
   inputMessages: [],
   newMessages: [],
-  toolCall: undefined,
-  toolResult: undefined,
+  toolCalls: undefined,
+  toolResults: undefined,
   usage: {
     inputTokens: 100,
     outputTokens: 100,
diff --git a/packages/core/src/schemas/checkpoint.ts b/packages/core/src/schemas/checkpoint.ts
index 68535499..9d5ae44c 100644
--- a/packages/core/src/schemas/checkpoint.ts
+++ b/packages/core/src/schemas/checkpoint.ts
@@ -1,6 +1,10 @@
 import { z } from "zod"
 import type { Message } from "./message.js"
 import { messageSchema } from "./message.js"
+import type { ToolCall } from "./tool-call.js"
+import { toolCallSchema } from "./tool-call.js"
+import type { ToolResult } from "./tool-result.js"
+import { toolResultSchema } from "./tool-result.js"
 import type { Usage } from "./usage.js"
 import { usageSchema } from "./usage.js"
 
@@ -84,6 +88,10 @@ export interface Checkpoint {
   contextWindow?: number
   /** Context window usage ratio (0-1) */
   contextWindowUsage?: number
+  /** Tool calls waiting to be processed (for resume after delegate/interactive) */
+  pendingToolCalls?: ToolCall[]
+  /** Partial tool results collected before stopping (for resume) */
+  partialToolResults?: ToolResult[]
 }
 
 export const checkpointSchema = z.object({
@@ -124,5 +132,7 @@ export const checkpointSchema = z.object({
   usage: usageSchema,
   contextWindow: z.number().optional(),
   contextWindowUsage: z.number().optional(),
+  pendingToolCalls: z.array(toolCallSchema).optional(),
+  partialToolResults: z.array(toolResultSchema).optional(),
 })
 checkpointSchema satisfies z.ZodType<Checkpoint>
diff --git a/packages/core/src/schemas/message-part.ts b/packages/core/src/schemas/message-part.ts
index 2416e6ed..944cdc80 100644
--- a/packages/core/src/schemas/message-part.ts
+++ b/packages/core/src/schemas/message-part.ts
@@ -147,7 +147,7 @@ export interface ToolResultPart extends BasePart {
   /** Name of the tool that was called */
   toolName: string
   /** Content of the tool result */
-  contents: (TextPart | ImageInlinePart)[]
+  contents: (TextPart | ImageInlinePart | FileInlinePart)[]
   /** Whether the tool call resulted in an error */
   isError?: boolean
 }
@@ -156,7 +156,7 @@ export const toolResultPartSchema = basePartSchema.extend({
   type: z.literal("toolResultPart"),
   toolCallId: z.string(),
   toolName: z.string(),
-  contents: z.array(z.union([textPartSchema, imageInlinePartSchema])),
+  contents: z.array(z.union([textPartSchema, imageInlinePartSchema, fileInlinePartSchema])),
   isError: z.boolean().optional(),
 })
 toolResultPartSchema satisfies z.ZodType<ToolResultPart>
diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts
index dfb9fc18..1740a185 100644
--- a/packages/core/src/schemas/runtime.ts
+++ b/packages/core/src/schemas/runtime.ts
@@ -222,13 +222,13 @@ type ExpertEventPayloads = {
   retry: {
     reason: string
     newMessages: (UserMessage | ExpertMessage | ToolMessage)[]
-    toolCall?: ToolCall
-    toolResult?: ToolResult
+    toolCalls?: ToolCall[]
+    toolResults?: ToolResult[]
     usage: Usage
   }
-  callTool: {
+  callTools: {
     newMessage: ExpertMessage
-    toolCall: ToolCall
+    toolCalls: ToolCall[]
     usage: Usage
   }
   callInteractiveTool: {
@@ -241,8 +241,8 @@ type ExpertEventPayloads = {
     toolCall: ToolCall
     usage: Usage
   }
-  resolveToolResult: {
-    toolResult: ToolResult
+  resolveToolResults: {
+    toolResults: ToolResult[]
   }
   resolveThought: {
     toolResult: ToolResult
@@ -259,6 +259,13 @@ type ExpertEventPayloads = {
   finishToolCall: {
     newMessages: (UserMessage | ToolMessage)[]
   }
+  resumeToolCalls: {
+    pendingToolCalls: ToolCall[]
+    partialToolResults: ToolResult[]
+  }
+  finishAllToolCalls: {
+    newMessages: (UserMessage | ToolMessage)[]
+  }
   continueToNextStep: {
     checkpoint: Checkpoint
     step: Step
@@ -331,15 +338,17 @@ export function createEvent<T extends EventType>(type: T) {
 export const startRun = createEvent("startRun")
 export const startGeneration = createEvent("startGeneration")
 export const retry = createEvent("retry")
-export const callTool = createEvent("callTool")
+export const callTools = createEvent("callTools")
 export const callInteractiveTool = createEvent("callInteractiveTool")
 export const callDelegate = createEvent("callDelegate")
-export const resolveToolResult = createEvent("resolveToolResult")
+export const resolveToolResults = createEvent("resolveToolResults")
 export const resolveThought = createEvent("resolveThought")
 export const resolvePdfFile = createEvent("resolvePdfFile")
 export const resolveImageFile = createEvent("resolveImageFile")
 export const attemptCompletion = createEvent("attemptCompletion")
 export const finishToolCall = createEvent("finishToolCall")
+export const resumeToolCalls = createEvent("resumeToolCalls")
+export const finishAllToolCalls = createEvent("finishAllToolCalls")
 export const completeRun = createEvent("completeRun")
 export const stopRunByInteractiveTool = createEvent("stopRunByInteractiveTool")
 export const stopRunByDelegate = createEvent("stopRunByDelegate")
diff --git a/packages/core/src/schemas/step.ts b/packages/core/src/schemas/step.ts
index 60aecab7..23b7a735 100644
--- a/packages/core/src/schemas/step.ts
+++ b/packages/core/src/schemas/step.ts
@@ -15,7 +15,7 @@ import { usageSchema } from "./usage.js"
 
 /**
  * A single execution step in an Expert run.
- * Each step represents one LLM generation cycle, optionally followed by a tool call.
+ * Each step represents one LLM generation cycle, optionally followed by tool calls.
  */
 export interface Step {
   /** Sequential step number (1-indexed) */
@@ -24,10 +24,14 @@ export interface Step {
   inputMessages?: (InstructionMessage | UserMessage | ToolMessage)[]
   /** Messages generated during this step */
   newMessages: Message[]
-  /** Tool call made during this step, if any */
-  toolCall?: ToolCall
-  /** Result of the tool call, if any */
-  toolResult?: ToolResult
+  /** Tool calls made during this step, if any */
+  toolCalls?: ToolCall[]
+  /** Results of the tool calls, if any */
+  toolResults?: ToolResult[]
+  /** Tool calls waiting to be processed (sorted: MCP → Delegate → Interactive) */
+  pendingToolCalls?: ToolCall[]
+  /** Partial tool results collected so far (used during mixed tool call processing) */
+  partialToolResults?: ToolResult[]
   /** Token usage for this step */
   usage: Usage
   /** Unix timestamp (ms) when step started */
@@ -42,8 +46,10 @@ export const stepSchema = z.object({
     .array(z.union([instructionMessageSchema, userMessageSchema, toolMessageSchema]))
     .optional(),
   newMessages: z.array(messageSchema),
-  toolCall: toolCallSchema.optional(),
-  toolResult: toolResultSchema.optional(),
+  toolCalls: z.array(toolCallSchema).optional(),
+  toolResults: z.array(toolResultSchema).optional(),
+  pendingToolCalls: z.array(toolCallSchema).optional(),
+  partialToolResults: z.array(toolResultSchema).optional(),
   usage: usageSchema,
   startedAt: z.number(),
   finishedAt: z.number().optional(),
diff --git a/packages/perstack/src/lib/tui.tsx b/packages/perstack/src/lib/tui.tsx
index caa8dd7d..cd50ed99 100644
--- a/packages/perstack/src/lib/tui.tsx
+++ b/packages/perstack/src/lib/tui.tsx
@@ -23,35 +23,37 @@ export function defaultEventListener(e: RunEvent): void {
       debug(e.reason)
       break
     }
-    case "callTool": {
-      log(`${header(e)} Calling tool`)
-      if (e.toolCall.skillName === "@perstack/base") {
-        switch (e.toolCall.toolName) {
+    case "callTools": {
+      log(`${header(e)} Calling ${e.toolCalls.length} tool(s)`)
+      for (const toolCall of e.toolCalls) {
+        if (toolCall.skillName === "@perstack/base") {
+          switch (toolCall.toolName) {
           case "think": {
-            const thought = e.toolCall.args.thought
+              const thought = toolCall.args.thought
             log(`${header(e)} Thought Updated:`)
             debug(thought)
             break
           }
           case "readPdfFile": {
-            const path = e.toolCall.args.path
+              const path = toolCall.args.path
             log(`${header(e)} Reading PDF: ${path}`)
             break
           }
           case "readImageFile": {
-            const path = e.toolCall.args.path
+              const path = toolCall.args.path
             log(`${header(e)} Reading Image: ${path}`)
             break
           }
           default: {
-            log(`${header(e)} Tool: ${e.toolCall.skillName}/${e.toolCall.toolName}`)
-            debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`)
+              log(`${header(e)} Tool: ${toolCall.skillName}/${toolCall.toolName}`)
+              debug(`${header(e)} Args: ${JSON.stringify(toolCall.args, null, 2)}`)
             break
           }
         }
       } else {
-        log(`${header(e)} Tool: ${e.toolCall.skillName}/${e.toolCall.toolName}`)
-        debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`)
+          log(`${header(e)} Tool: ${toolCall.skillName}/${toolCall.toolName}`)
+          debug(`${header(e)} Args: ${JSON.stringify(toolCall.args, null, 2)}`)
+        }
       }
       break
     }
@@ -67,12 +69,13 @@ export function defaultEventListener(e: RunEvent): void {
       debug(`${header(e)} Args: ${JSON.stringify(e.toolCall.args, null, 2)}`)
       break
     }
-    case "resolveToolResult": {
-      log(`${header(e)} Resolved Tool Result`)
-      if (e.toolResult.skillName === "@perstack/base") {
-        switch (e.toolResult.toolName) {
+    case "resolveToolResults": {
+      log(`${header(e)} Resolved ${e.toolResults.length} Tool Result(s)`)
+      for (const toolResult of e.toolResults) {
+        if (toolResult.skillName === "@perstack/base") {
+          switch (toolResult.toolName) {
           case "todo": {
-            const text = e.toolResult.result.find((r) => r.type === "textPart")?.text
+              const text = toolResult.result.find((r) => r.type === "textPart")?.text
             const { todos } = JSON.parse(text ?? "{}") as {
               todos: {
                 id: number
@@ -87,14 +90,15 @@ export function defaultEventListener(e: RunEvent): void {
             break
           }
           default: {
-            log(`${header(e)} Tool: ${e.toolResult.skillName}/${e.toolResult.toolName}`)
-            debug(`${header(e)} Result: ${JSON.stringify(e.toolResult.result, null, 2)}`)
+              log(`${header(e)} Tool: ${toolResult.skillName}/${toolResult.toolName}`)
+              debug(`${header(e)} Result: ${JSON.stringify(toolResult.result, null, 2)}`)
             break
           }
         }
       } else {
-        log(`${header(e)} Tool: ${e.toolResult.skillName}/${e.toolResult.toolName}`)
-        debug(`${header(e)} Result: ${JSON.stringify(e.toolResult.result, null, 2)}`)
+          log(`${header(e)} Tool: ${toolResult.skillName}/${toolResult.toolName}`)
+          debug(`${header(e)} Result: ${JSON.stringify(toolResult.result, null, 2)}`)
+        }
       }
       break
     }
diff --git a/packages/runtime/README.md b/packages/runtime/README.md
index 70e94a54..7c933253 100644
--- a/packages/runtime/README.md
+++ b/packages/runtime/README.md
@@ -40,7 +40,7 @@ The `eventListener` callback receives a `RunEvent` object, which provides granul
 
 ```typescript
 type RunEvent = {
-  type: EventType       // e.g., "startRun", "callTool"
+  type: EventType       // e.g., "startRun", "callTools"
   id: string            // Unique event ID
   timestamp: number     // Unix timestamp
   runId: string         // ID of the current run
@@ -53,9 +53,9 @@ You can narrow down the event type to access specific properties:
 
 ```typescript
 eventListener: (event) => {
-  if (event.type === "callTool") {
-    // event is now narrowed to the callTool event type
-    console.log(`Executing tool: ${event.toolCall.name}`)
+  if (event.type === "callTools") {
+    // event is now narrowed to the callTools event type
+    console.log(`Executing ${event.toolCalls.length} tools`)
   }
 }
 ```
@@ -185,19 +185,21 @@ stateDiagram-v2
     [*] --> Init
     Init --> PreparingForStep: startRun
     PreparingForStep --> GeneratingToolCall: startGeneration
+    PreparingForStep --> CallingTools: resumeToolCalls
+    PreparingForStep --> FinishingStep: finishAllToolCalls
     
-    GeneratingToolCall --> CallingTool: callTool
-    GeneratingToolCall --> CallingInteractiveTool: callInteractiveTool
-    GeneratingToolCall --> CallingDelegate: callDelegate
+    GeneratingToolCall --> CallingTools: callTools
     GeneratingToolCall --> FinishingStep: retry
 
-    CallingTool --> ResolvingToolResult: resolveToolResult
-    CallingTool --> ResolvingThought: resolveThought
-    CallingTool --> ResolvingPdfFile: resolvePdfFile
-    CallingTool --> ResolvingImageFile: resolveImageFile
-    CallingTool --> GeneratingRunResult: attemptCompletion
+    CallingTools --> ResolvingToolResults: resolveToolResults
+    CallingTools --> ResolvingThought: resolveThought
+    CallingTools --> ResolvingPdfFile: resolvePdfFile
+    CallingTools --> ResolvingImageFile: resolveImageFile
+    CallingTools --> GeneratingRunResult: attemptCompletion
+    CallingTools --> CallingDelegate: callDelegate
+    CallingTools --> CallingInteractiveTool: callInteractiveTool
 
-    ResolvingToolResult --> FinishingStep: finishToolCall
+    ResolvingToolResults --> FinishingStep: finishToolCall
     ResolvingThought --> FinishingStep: finishToolCall
     ResolvingPdfFile --> FinishingStep: finishToolCall
     ResolvingImageFile --> FinishingStep: finishToolCall
@@ -216,8 +218,9 @@ stateDiagram-v2
 Events trigger state transitions. They are emitted by the runtime logic or external inputs.
 
 - **Lifecycle**: `startRun`, `startGeneration`, `continueToNextStep`, `completeRun`
-- **Tool Execution**: `callTool`, `resolveToolResult`, `finishToolCall`
+- **Tool Execution**: `callTools`, `resolveToolResults`, `finishToolCall`, `resumeToolCalls`, `finishAllToolCalls`
 - **Special Types**: `resolveThought`, `resolvePdfFile`, `resolveImageFile`
+- **Mixed Tool Calls**: `callDelegate`, `callInteractiveTool` (from CallingTools state)
 - **Interruption**: `stopRunByInteractiveTool`, `stopRunByDelegate`, `stopRunByExceededMaxSteps`
 - **Error Handling**: `retry`
 
diff --git a/packages/runtime/src/checkpoint-helpers.ts b/packages/runtime/src/checkpoint-helpers.ts
index 189a3af2..923acb9c 100644
--- a/packages/runtime/src/checkpoint-helpers.ts
+++ b/packages/runtime/src/checkpoint-helpers.ts
@@ -76,6 +76,8 @@ export function buildDelegationReturnState(
       ...parentCheckpoint,
       stepNumber: resultCheckpoint.stepNumber,
       usage: resultCheckpoint.usage,
+      pendingToolCalls: parentCheckpoint.pendingToolCalls,
+      partialToolResults: parentCheckpoint.partialToolResults,
     },
   }
 }
@@ -118,6 +120,8 @@ export function buildDelegateToState(
         checkpointId: resultCheckpoint.id,
       },
       usage: resultCheckpoint.usage,
+      pendingToolCalls: undefined,
+      partialToolResults: undefined,
     },
   }
 }
diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts
index 907cc749..a51c0294 100644
--- a/packages/runtime/src/messages/message.ts
+++ b/packages/runtime/src/messages/message.ts
@@ -66,7 +66,9 @@ export function createExpertMessage(
 export function createToolMessage(
   contents: Array<
     Omit<ToolResultPart, "id" | "contents"> & {
-      contents: Array<Omit<TextPart, "id"> | Omit<ImageInlinePart, "id">>
+      contents: Array<
+        Omit<TextPart, "id"> | Omit<ImageInlinePart, "id"> | Omit<FileInlinePart, "id">
+      >
     }
   >,
 ): ToolMessage {
@@ -244,11 +246,12 @@ function toolResultPartToCoreToolResultPart(part: ToolResultPart): ToolResultMod
       output: { type: "text" as const, value: contents[0].text },
     }
   }
-  const contentValue = contents.map((content) =>
-    content.type === "textPart"
-      ? { type: "text" as const, text: content.text }
-      : { type: "media" as const, data: content.encodedData, mediaType: content.mimeType },
-  )
+  const contentValue = contents.map((content) => {
+    if (content.type === "textPart") {
+      return { type: "text" as const, text: content.text }
+    }
+    return { type: "media" as const, data: content.encodedData, mediaType: content.mimeType }
+  })
   return {
     type: "tool-result",
     toolCallId: part.toolCallId,
diff --git a/packages/runtime/src/runtime-state-machine.ts b/packages/runtime/src/runtime-state-machine.ts
index 985d7df4..0fbb3f95 100644
--- a/packages/runtime/src/runtime-state-machine.ts
+++ b/packages/runtime/src/runtime-state-machine.ts
@@ -61,6 +61,8 @@ export const runtimeStateMachine = setup({
                 ...context.checkpoint,
                 status: "proceeding",
                 messages: [...context.checkpoint.messages, ...event.inputMessages],
+                pendingToolCalls: event.initialCheckpoint.pendingToolCalls,
+                partialToolResults: event.initialCheckpoint.partialToolResults,
               }) satisfies Checkpoint,
             step: ({ context, event }) =>
               ({
@@ -87,6 +89,41 @@ export const runtimeStateMachine = setup({
               }) satisfies Step,
           }),
         },
+        resumeToolCalls: {
+          target: "CallingTool",
+          actions: assign({
+            step: ({ context, event }) =>
+              ({
+                stepNumber: context.checkpoint.stepNumber,
+                inputMessages: context.step.inputMessages ?? [],
+                newMessages: context.step.newMessages,
+                toolCalls: context.step.toolCalls,
+                toolResults: event.partialToolResults,
+                pendingToolCalls: event.pendingToolCalls,
+                usage: context.step.usage,
+                startedAt: context.step.startedAt,
+              }) satisfies Step,
+          }),
+        },
+        finishAllToolCalls: {
+          target: "FinishingStep",
+          actions: assign({
+            checkpoint: ({ context, event }) =>
+              ({
+                ...context.checkpoint,
+                messages: [...context.checkpoint.messages, ...event.newMessages],
+                pendingToolCalls: undefined,
+                partialToolResults: undefined,
+              }) satisfies Checkpoint,
+            step: ({ context, event }) =>
+              ({
+                ...context.step,
+                newMessages: [...context.step.newMessages, ...event.newMessages],
+                toolResults: context.checkpoint.partialToolResults,
+                pendingToolCalls: undefined,
+              }) satisfies Step,
+          }),
+        },
       },
     },
 
@@ -105,13 +142,13 @@ export const runtimeStateMachine = setup({
               ({
                 ...context.step,
                 newMessages: event.newMessages,
-                toolCall: event.toolCall,
-                toolResult: event.toolResult,
+                toolCalls: event.toolCalls,
+                toolResults: event.toolResults,
                 usage: sumUsage(context.step.usage, event.usage),
               }) satisfies Step,
           }),
         },
-        callTool: {
+        callTools: {
           target: "CallingTool",
           actions: assign({
             checkpoint: ({ context, event }) =>
@@ -127,7 +164,7 @@ export const runtimeStateMachine = setup({
               ({
                 ...context.step,
                 newMessages: [event.newMessage],
-                toolCall: event.toolCall,
+                toolCalls: event.toolCalls,
                 usage: sumUsage(context.step.usage, event.usage),
               }) satisfies Step,
           }),
@@ -148,7 +185,7 @@ export const runtimeStateMachine = setup({
               ({
                 ...context.step,
                 newMessages: [event.newMessage],
-                toolCall: event.toolCall,
+                toolCalls: [event.toolCall],
                 usage: sumUsage(context.step.usage, event.usage),
               }) satisfies Step,
           }),
@@ -169,7 +206,7 @@ export const runtimeStateMachine = setup({
               ({
                 ...context.step,
                 newMessages: [event.newMessage],
-                toolCall: event.toolCall,
+                toolCalls: [event.toolCall],
                 usage: sumUsage(context.step.usage, event.usage),
               }) satisfies Step,
           }),
@@ -179,13 +216,14 @@ export const runtimeStateMachine = setup({
 
     CallingTool: {
       on: {
-        resolveToolResult: {
+        resolveToolResults: {
           target: "ResolvingToolResult",
           actions: assign({
             step: ({ context, event }) =>
               ({
                 ...context.step,
-                toolResult: event.toolResult,
+                toolResults: event.toolResults,
+                pendingToolCalls: undefined,
               }) satisfies Step,
           }),
         },
@@ -195,7 +233,7 @@ export const runtimeStateMachine = setup({
             step: ({ context, event }) =>
               ({
                 ...context.step,
-                toolResult: event.toolResult,
+                toolResults: [event.toolResult],
               }) satisfies Step,
           }),
         },
@@ -205,7 +243,7 @@ export const runtimeStateMachine = setup({
             step: ({ context, event }) =>
               ({
                 ...context.step,
-                toolResult: event.toolResult,
+                toolResults: [event.toolResult],
               }) satisfies Step,
           }),
         },
@@ -215,7 +253,7 @@ export const runtimeStateMachine = setup({
             step: ({ context, event }) =>
               ({
                 ...context.step,
-                toolResult: event.toolResult,
+                toolResults: [event.toolResult],
               }) satisfies Step,
           }),
         },
@@ -225,7 +263,33 @@ export const runtimeStateMachine = setup({
             step: ({ context, event }) =>
               ({
                 ...context.step,
-                toolResult: event.toolResult,
+                toolResults: [event.toolResult],
+              }) satisfies Step,
+          }),
+        },
+        callDelegate: {
+          target: "CallingDelegate",
+          actions: assign({
+            step: ({ context }) =>
+              ({
+                ...context.step,
+                toolCalls: context.step.toolCalls,
+                toolResults: context.step.toolResults,
+                pendingToolCalls: context.step.pendingToolCalls,
+                partialToolResults: context.step.partialToolResults,
+              }) satisfies Step,
+          }),
+        },
+        callInteractiveTool: {
+          target: "CallingInteractiveTool",
+          actions: assign({
+            step: ({ context }) =>
+              ({
+                ...context.step,
+                toolCalls: context.step.toolCalls,
+                toolResults: context.step.toolResults,
+                pendingToolCalls: context.step.pendingToolCalls,
+                partialToolResults: context.step.partialToolResults,
               }) satisfies Step,
           }),
         },
@@ -327,8 +391,8 @@ export const runtimeStateMachine = setup({
               ({
                 ...context.step,
                 newMessages: event.newMessages,
-                toolCall: event.toolCall,
-                toolResult: event.toolResult,
+                toolCalls: event.toolCalls,
+                toolResults: event.toolResults,
                 usage: sumUsage(context.step.usage, event.usage),
               }) satisfies Step,
           }),
diff --git a/packages/runtime/src/states/calling-delegate.test.ts b/packages/runtime/src/states/calling-delegate.test.ts
index 1a789c6b..6df15e15 100644
--- a/packages/runtime/src/states/calling-delegate.test.ts
+++ b/packages/runtime/src/states/calling-delegate.test.ts
@@ -8,12 +8,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      pendingToolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/math-expert",
         toolName: "@perstack/math-expert",
         args: { query: "Calculate 2 + 2" },
       },
+      ],
     })
     const skillManagers = {
       "@perstack/math-expert": {
@@ -62,6 +64,15 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => {
           toolName: "@perstack/math-expert",
           query: "Calculate 2 + 2",
         },
+        pendingToolCalls: [
+          {
+            id: "tc_123",
+            skillName: "@perstack/math-expert",
+            toolName: "@perstack/math-expert",
+            args: { query: "Calculate 2 + 2" },
+          },
+        ],
+        partialToolResults: undefined,
       },
       step: {
         ...step,
@@ -74,7 +85,7 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: undefined,
+      pendingToolCalls: undefined,
     })
     await expect(
       StateMachineLogics.CallingDelegate({
@@ -84,19 +95,21 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => {
         eventListener: async () => {},
         skillManagers: {},
       }),
-    ).rejects.toThrow("No tool call found")
+    ).rejects.toThrow("No pending tool calls found")
   })
 
   it("throws error when skill manager missing", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      pendingToolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/math-expert",
         toolName: "@perstack/math-expert",
         args: { query: "Calculate 2 + 2" },
       },
+      ],
     })
     const skillManagers = {
       "@perstack/math-expert": {
@@ -128,12 +141,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegate']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      pendingToolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/math-expert",
         toolName: "@perstack/math-expert",
         args: { query: undefined },
       },
+      ],
     })
     const skillManagers = {
       "@perstack/math-expert": {
diff --git a/packages/runtime/src/states/calling-delegate.ts b/packages/runtime/src/states/calling-delegate.ts
index 071da516..57a16063 100644
--- a/packages/runtime/src/states/calling-delegate.ts
+++ b/packages/runtime/src/states/calling-delegate.ts
@@ -8,10 +8,14 @@ export async function callingDelegateLogic({
   step,
   skillManagers,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall) {
-    throw new Error("No tool call found")
+  if (!step.pendingToolCalls || step.pendingToolCalls.length === 0) {
+    throw new Error("No pending tool calls found")
   }
-  const { id, toolName, args } = step.toolCall
+  const toolCall = step.pendingToolCalls[0]
+  if (!toolCall) {
+    throw new Error("No pending tool call found")
+  }
+  const { id, toolName, args } = toolCall
   const skillManager = await getSkillManagerByToolName(skillManagers, toolName)
   if (!skillManager.expert) {
     throw new Error(`Delegation error: skill manager "${toolName}" not found`)
@@ -19,6 +23,8 @@ export async function callingDelegateLogic({
   if (!args || !args.query || typeof args.query !== "string") {
     throw new Error("Delegation error: query is undefined")
   }
+  const currentToolCall = step.pendingToolCalls[0]
+  const remainingToolCalls = step.pendingToolCalls.slice(1)
   return stopRunByDelegate(setting, checkpoint, {
     checkpoint: {
       ...checkpoint,
@@ -33,6 +39,8 @@ export async function callingDelegateLogic({
         toolName,
         query: args.query,
       },
+      pendingToolCalls: [currentToolCall, ...remainingToolCalls],
+      partialToolResults: step.partialToolResults,
     },
     step: {
       ...step,
diff --git a/packages/runtime/src/states/calling-interactive-tool.test.ts b/packages/runtime/src/states/calling-interactive-tool.test.ts
index 729663ab..e5bbcce4 100644
--- a/packages/runtime/src/states/calling-interactive-tool.test.ts
+++ b/packages/runtime/src/states/calling-interactive-tool.test.ts
@@ -7,12 +7,14 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () =>
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      pendingToolCalls: [
+        {
         id: "tc_interactive_123",
         skillName: "interactive",
         toolName: "humanApproval",
         args: { message: "Please approve this action" },
       },
+      ],
     })
     await expect(
       StateMachineLogics.CallingInteractiveTool({
@@ -32,6 +34,15 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () =>
       checkpoint: {
         ...checkpoint,
         status: "stoppedByInteractiveTool",
+        pendingToolCalls: [
+          {
+            id: "tc_interactive_123",
+            skillName: "interactive",
+            toolName: "humanApproval",
+            args: { message: "Please approve this action" },
+          },
+        ],
+        partialToolResults: undefined,
       },
       step: {
         ...step,
@@ -39,4 +50,83 @@ describe("@perstack/runtime: StateMachineLogic['CallingInteractiveTool']", () =>
       },
     })
   })
+
+  it("throws error when pendingToolCalls is empty", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({ pendingToolCalls: [] })
+    await expect(
+      StateMachineLogics.CallingInteractiveTool({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No pending tool calls found")
+  })
+
+  it("throws error when pendingToolCalls is undefined", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({ pendingToolCalls: undefined })
+    await expect(
+      StateMachineLogics.CallingInteractiveTool({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No pending tool calls found")
+  })
+
+  it("preserves remaining tool calls in pendingToolCalls", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      pendingToolCalls: [
+        { id: "tc_1", skillName: "interactive", toolName: "tool1", args: {} },
+        { id: "tc_2", skillName: "interactive", toolName: "tool2", args: {} },
+      ],
+    })
+    const result = await StateMachineLogics.CallingInteractiveTool({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("stopRunByInteractiveTool")
+    if (result.type === "stopRunByInteractiveTool") {
+      expect(result.checkpoint.pendingToolCalls).toHaveLength(2)
+      expect(result.checkpoint.pendingToolCalls?.[0]?.id).toBe("tc_1")
+      expect(result.checkpoint.pendingToolCalls?.[1]?.id).toBe("tc_2")
+    }
+  })
+
+  it("preserves partialToolResults in checkpoint", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const partialToolResults = [
+      { id: "tc_0", skillName: "mcp", toolName: "prevTool", result: [] },
+    ]
+    const step = createStep({
+      pendingToolCalls: [
+        { id: "tc_1", skillName: "interactive", toolName: "tool1", args: {} },
+      ],
+      partialToolResults,
+    })
+    const result = await StateMachineLogics.CallingInteractiveTool({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("stopRunByInteractiveTool")
+    if (result.type === "stopRunByInteractiveTool") {
+      expect(result.checkpoint.partialToolResults).toEqual(partialToolResults)
+    }
+  })
 })
diff --git a/packages/runtime/src/states/calling-interactive-tool.ts b/packages/runtime/src/states/calling-interactive-tool.ts
index 48a855fd..4695b868 100644
--- a/packages/runtime/src/states/calling-interactive-tool.ts
+++ b/packages/runtime/src/states/calling-interactive-tool.ts
@@ -6,10 +6,17 @@ export async function callingInteractiveToolLogic({
   checkpoint,
   step,
 }: RunSnapshot["context"]): Promise<RunEvent> {
+  if (!step.pendingToolCalls || step.pendingToolCalls.length === 0) {
+    throw new Error("No pending tool calls found")
+  }
+  const currentToolCall = step.pendingToolCalls[0]
+  const remainingToolCalls = step.pendingToolCalls.slice(1)
   return stopRunByInteractiveTool(setting, checkpoint, {
     checkpoint: {
       ...checkpoint,
       status: "stoppedByInteractiveTool",
+      pendingToolCalls: [currentToolCall, ...remainingToolCalls],
+      partialToolResults: step.partialToolResults,
     },
     step: {
       ...step,
diff --git a/packages/runtime/src/states/calling-tool.test.ts b/packages/runtime/src/states/calling-tool.test.ts
index cc2291f2..538fe132 100644
--- a/packages/runtime/src/states/calling-tool.test.ts
+++ b/packages/runtime/src/states/calling-tool.test.ts
@@ -1,28 +1,43 @@
 import { createId } from "@paralleldrive/cuid2"
-import { describe, expect, it } from "vitest"
+import { describe, expect, it, vi } from "vitest"
 import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js"
 import type { BaseSkillManager } from "../skill-manager/index.js"
 import { callingToolLogic } from "./calling-tool.js"
 
+type CallToolResult = Array<{ type: string; text?: string; id: string }>
+type CallToolFn = (toolName: string, args: unknown) => Promise<CallToolResult>
+
 function createMockMcpSkillManager(
   name: string,
-  toolName: string,
-  callToolResult: Array<{ type: string; text?: string; id: string }> = [
-    { type: "textPart", text: "Tool executed successfully", id: createId() },
-  ],
+  toolNames: string | string[],
+  callToolFnOrResult?: CallToolFn | CallToolResult,
 ): BaseSkillManager {
+  const tools = Array.isArray(toolNames) ? toolNames : [toolNames]
+  const defaultCallTool = async () => [
+    { type: "textPart", text: "Tool executed successfully", id: createId() },
+  ]
+  const callTool: CallToolFn =
+    callToolFnOrResult === undefined
+      ? defaultCallTool
+      : typeof callToolFnOrResult === "function"
+        ? callToolFnOrResult
+        : async () => callToolFnOrResult
   return {
     name,
     type: "mcp" as const,
     lazyInit: false,
-    _toolDefinitions: [{ name: toolName, skillName: name, inputSchema: {}, interactive: false }],
+    _toolDefinitions: tools.map((t) => ({
+      name: t,
+      skillName: name,
+      inputSchema: {},
+      interactive: false,
+    })),
     _initialized: true,
     init: async () => {},
     isInitialized: () => true,
-    getToolDefinitions: async () => [
-      { name: toolName, skillName: name, inputSchema: {}, interactive: false },
-    ],
-    callTool: async () => callToolResult,
+    getToolDefinitions: async () =>
+      tools.map((t) => ({ name: t, skillName: name, inputSchema: {}, interactive: false })),
+    callTool,
     close: async () => {},
   } as unknown as BaseSkillManager
 }
@@ -45,83 +60,245 @@ function createMockDelegateSkillManager(name: string): BaseSkillManager {
   } as unknown as BaseSkillManager
 }
 
+function createMockInteractiveSkillManager(name: string, toolName: string): BaseSkillManager {
+  return {
+    name,
+    type: "interactive" as const,
+    lazyInit: false,
+    _toolDefinitions: [{ name: toolName, skillName: name, inputSchema: {}, interactive: true }],
+    _initialized: true,
+    init: async () => {},
+    isInitialized: () => true,
+    getToolDefinitions: async () => [
+      { name: toolName, skillName: name, inputSchema: {}, interactive: true },
+    ],
+    callTool: async () => [],
+    close: async () => {},
+  } as unknown as BaseSkillManager
+}
+
 describe("@perstack/runtime: callingToolLogic", () => {
-  it("executes tool and returns resolveToolResult event", async () => {
-    const setting = createRunSetting()
-    const checkpoint = createCheckpoint()
-    const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "test-skill",
-        toolName: "testTool",
-        args: { param: "value" },
-      },
+  describe("parallel tool execution", () => {
+    it("executes multiple tools in parallel and returns all results", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_1", skillName: "skill-a", toolName: "tool1", args: { x: 1 } },
+          { id: "tc_2", skillName: "skill-a", toolName: "tool2", args: { x: 2 } },
+          { id: "tc_3", skillName: "skill-b", toolName: "tool3", args: { x: 3 } },
+        ],
+      })
+      const callToolA = vi.fn(async (toolName: string) => [
+        { type: "textPart", text: `Result from ${toolName}`, id: createId() },
+      ])
+      const callToolB = vi.fn(async (toolName: string) => [
+        { type: "textPart", text: `Result from ${toolName}`, id: createId() },
+      ])
+      const skillManagers = {
+        "skill-a": createMockMcpSkillManager("skill-a", ["tool1", "tool2"], callToolA),
+        "skill-b": createMockMcpSkillManager("skill-b", ["tool3"], callToolB),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("resolveToolResults")
+      if (event.type === "resolveToolResults") {
+        expect(event.toolResults).toHaveLength(3)
+        expect(event.toolResults[0].id).toBe("tc_1")
+        expect(event.toolResults[1].id).toBe("tc_2")
+        expect(event.toolResults[2].id).toBe("tc_3")
+      }
+      expect(callToolA).toHaveBeenCalledTimes(2)
+      expect(callToolB).toHaveBeenCalledTimes(1)
     })
-    const skillManagers = {
-      "test-skill": createMockMcpSkillManager("test-skill", "testTool"),
-    }
-    const event = await callingToolLogic({
-      setting,
-      checkpoint,
-      step,
-      eventListener: async () => {},
-      skillManagers,
+
+    it("preserves tool call order in results", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_first", skillName: "test-skill", toolName: "slowTool", args: {} },
+          { id: "tc_second", skillName: "test-skill", toolName: "fastTool", args: {} },
+        ],
+      })
+      const callTool = vi.fn(async (toolName: string) => {
+        if (toolName === "slowTool") {
+          await new Promise((r) => setTimeout(r, 50))
+        }
+        return [{ type: "textPart", text: toolName, id: createId() }]
+      })
+      const skillManagers = {
+        "test-skill": createMockMcpSkillManager("test-skill", ["slowTool", "fastTool"], callTool),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("resolveToolResults")
+      if (event.type === "resolveToolResults") {
+        expect(event.toolResults[0].id).toBe("tc_first")
+        expect(event.toolResults[1].id).toBe("tc_second")
+      }
+    })
+
+    it("executes tools concurrently (not sequentially)", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_1", skillName: "test-skill", toolName: "tool1", args: {} },
+          { id: "tc_2", skillName: "test-skill", toolName: "tool2", args: {} },
+          { id: "tc_3", skillName: "test-skill", toolName: "tool3", args: {} },
+        ],
+      })
+      const DELAY_MS = 30
+      const callTool = vi.fn(async () => {
+        await new Promise((r) => setTimeout(r, DELAY_MS))
+        return [{ type: "textPart", text: "done", id: createId() }]
+      })
+      const skillManagers = {
+        "test-skill": createMockMcpSkillManager(
+          "test-skill",
+          ["tool1", "tool2", "tool3"],
+          callTool,
+        ),
+      }
+      const start = Date.now()
+      await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      const elapsed = Date.now() - start
+      expect(elapsed).toBeLessThan(DELAY_MS * 2)
     })
-    expect(event.type).toBe("resolveToolResult")
-    expect(event.expertKey).toBe(setting.expertKey)
-    expect(event.runId).toBe(setting.runId)
   })
 
-  it("throws error when tool call is missing", async () => {
-    const setting = createRunSetting()
-    const checkpoint = createCheckpoint()
-    const step = createStep({ toolCall: undefined })
-    await expect(
-      callingToolLogic({
+  describe("single tool execution", () => {
+    it("executes single tool and returns resolveToolResults event", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_123", skillName: "test-skill", toolName: "testTool", args: { param: "value" } },
+        ],
+      })
+      const skillManagers = {
+        "test-skill": createMockMcpSkillManager("test-skill", "testTool"),
+      }
+      const event = await callingToolLogic({
         setting,
         checkpoint,
         step,
         eventListener: async () => {},
-        skillManagers: {},
-      }),
-    ).rejects.toThrow("No tool call found")
+        skillManagers,
+      })
+      expect(event.type).toBe("resolveToolResults")
+      expect(event.expertKey).toBe(setting.expertKey)
+      expect(event.runId).toBe(setting.runId)
+    })
   })
 
-  it("throws error when skill type is not mcp", async () => {
-    const setting = createRunSetting()
-    const checkpoint = createCheckpoint()
-    const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "delegate-skill",
-        toolName: "delegate-skill",
-        args: { query: "test" },
-      },
+  describe("error handling", () => {
+    it("throws error when tool calls are missing", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({ toolCalls: undefined })
+      await expect(
+        callingToolLogic({
+          setting,
+          checkpoint,
+          step,
+          eventListener: async () => {},
+          skillManagers: {},
+        }),
+      ).rejects.toThrow("No tool calls found")
     })
-    const skillManagers = {
-      "delegate-skill": createMockDelegateSkillManager("delegate-skill"),
-    }
-    await expect(
-      callingToolLogic({
+
+    it("returns callDelegate event for delegate skill", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_123", skillName: "delegate-skill", toolName: "delegate-skill", args: {} },
+        ],
+      })
+      const skillManagers = {
+        "delegate-skill": createMockDelegateSkillManager("delegate-skill"),
+      }
+      const event = await callingToolLogic({
         setting,
         checkpoint,
         step,
         eventListener: async () => {},
         skillManagers,
-      }),
-    ).rejects.toThrow("Incorrect SkillType, required MCP, got delegate")
+      })
+      expect(event.type).toBe("callDelegate")
+    })
+
+    it("returns callInteractiveTool event for interactive skill", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_123", skillName: "interactive-skill", toolName: "humanApproval", args: {} },
+        ],
+      })
+      const skillManagers = {
+        "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("callInteractiveTool")
+    })
+
+    it("throws error when tool not found in skill managers", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_123", skillName: "unknown-skill", toolName: "unknownTool", args: {} },
+        ],
+      })
+      await expect(
+        callingToolLogic({
+          setting,
+          checkpoint,
+          step,
+          eventListener: async () => {},
+          skillManagers: {},
+        }),
+      ).rejects.toThrow("Tool unknownTool not found")
+    })
   })
 
-  it("routes think tool to resolveThought handler", async () => {
+  it("routes think tool to resolveToolResults handler", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "think",
-        args: { thought: "thinking..." },
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "think",
+          args: { thought: "thinking..." },
+        },
+      ],
     })
     const skillManagers = {
       "@perstack/base": createMockMcpSkillManager("@perstack/base", "think"),
@@ -133,19 +310,21 @@ describe("@perstack/runtime: callingToolLogic", () => {
       eventListener: async () => {},
       skillManagers,
     })
-    expect(event.type).toBe("resolveThought")
+    expect(event.type).toBe("resolveToolResults")
   })
 
   it("routes attemptCompletion to attemptCompletion handler when no remaining todos", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "attemptCompletion",
-        args: {},
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "attemptCompletion",
+          args: {},
+        },
+      ],
     })
     const emptyResult = [{ type: "textPart", text: JSON.stringify({}), id: createId() }]
     const skillManagers = {
@@ -165,16 +344,18 @@ describe("@perstack/runtime: callingToolLogic", () => {
     expect(event.type).toBe("attemptCompletion")
   })
 
-  it("routes attemptCompletion to resolveToolResult when remaining todos exist", async () => {
+  it("routes attemptCompletion to resolveToolResults when remaining todos exist", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "attemptCompletion",
-        args: {},
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "attemptCompletion",
+          args: {},
+        },
+      ],
     })
     const remainingTodosResult = [
       {
@@ -197,19 +378,21 @@ describe("@perstack/runtime: callingToolLogic", () => {
       eventListener: async () => {},
       skillManagers,
     })
-    expect(event.type).toBe("resolveToolResult")
+    expect(event.type).toBe("resolveToolResults")
   })
 
-  it("routes readPdfFile tool to resolvePdfFile handler", async () => {
+  it("routes readPdfFile tool to resolveToolResults handler", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "readPdfFile",
-        args: { path: "/test.pdf" },
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          args: { path: "/test.pdf" },
+        },
+      ],
     })
     const pdfResult = [{ type: "textPart", text: "PDF content", id: createId() }]
     const skillManagers = {
@@ -222,19 +405,21 @@ describe("@perstack/runtime: callingToolLogic", () => {
       eventListener: async () => {},
       skillManagers,
     })
-    expect(event.type).toBe("resolvePdfFile")
+    expect(event.type).toBe("resolveToolResults")
   })
 
-  it("routes readImageFile tool to resolveImageFile handler", async () => {
+  it("routes readImageFile tool to resolveToolResults handler", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "readImageFile",
-        args: { path: "/test.png" },
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readImageFile",
+          args: { path: "/test.png" },
+        },
+      ],
     })
     const imageResult = [
       { type: "imageInlinePart", encodedData: "base64data", mimeType: "image/png", id: createId() },
@@ -249,19 +434,21 @@ describe("@perstack/runtime: callingToolLogic", () => {
       eventListener: async () => {},
       skillManagers,
     })
-    expect(event.type).toBe("resolveImageFile")
+    expect(event.type).toBe("resolveToolResults")
   })
 
-  it("routes non-special @perstack/base tools to resolveToolResult", async () => {
+  it("routes non-special @perstack/base tools to resolveToolResults", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "readTextFile",
-        args: { path: "/test.txt" },
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readTextFile",
+          args: { path: "/test.txt" },
+        },
+      ],
     })
     const skillManagers = {
       "@perstack/base": createMockMcpSkillManager("@perstack/base", "readTextFile"),
@@ -273,19 +460,21 @@ describe("@perstack/runtime: callingToolLogic", () => {
       eventListener: async () => {},
       skillManagers,
     })
-    expect(event.type).toBe("resolveToolResult")
+    expect(event.type).toBe("resolveToolResults")
   })
 
   it("throws error when tool not found in skill managers", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "unknown-skill",
-        toolName: "unknownTool",
-        args: {},
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "unknown-skill",
+          toolName: "unknownTool",
+          args: {},
+        },
+      ],
     })
     await expect(
       callingToolLogic({
@@ -297,4 +486,132 @@ describe("@perstack/runtime: callingToolLogic", () => {
       }),
     ).rejects.toThrow("Tool unknownTool not found")
   })
+
+  it("executes multiple tools in parallel", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      toolCalls: [
+        {
+          id: "tc_1",
+          skillName: "test-skill",
+          toolName: "testTool1",
+          args: { param: "value1" },
+        },
+        {
+          id: "tc_2",
+          skillName: "test-skill",
+          toolName: "testTool2",
+          args: { param: "value2" },
+        },
+      ],
+    })
+    const skillManagers = {
+      "test-skill": {
+        name: "test-skill",
+        type: "mcp" as const,
+        lazyInit: false,
+        _toolDefinitions: [
+          { name: "testTool1", skillName: "test-skill", inputSchema: {}, interactive: false },
+          { name: "testTool2", skillName: "test-skill", inputSchema: {}, interactive: false },
+        ],
+        _initialized: true,
+        init: async () => {},
+        isInitialized: () => true,
+        getToolDefinitions: async () => [
+          { name: "testTool1", skillName: "test-skill", inputSchema: {}, interactive: false },
+          { name: "testTool2", skillName: "test-skill", inputSchema: {}, interactive: false },
+        ],
+        callTool: async () => [{ type: "textPart", text: "Success", id: createId() }],
+        close: async () => {},
+      } as unknown as BaseSkillManager,
+    }
+    const event = await callingToolLogic({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers,
+    })
+    expect(event.type).toBe("resolveToolResults")
+    if (event.type === "resolveToolResults") {
+      expect(event.toolResults).toHaveLength(2)
+    }
+  })
+
+  describe("mixed tool types", () => {
+    it("executes MCP tools first then calls delegate", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_mcp", skillName: "mcp-skill", toolName: "mcpTool", args: {} },
+          { id: "tc_delegate", skillName: "delegate-skill", toolName: "delegate-skill", args: {} },
+        ],
+      })
+      const skillManagers = {
+        "mcp-skill": createMockMcpSkillManager("mcp-skill", "mcpTool"),
+        "delegate-skill": createMockDelegateSkillManager("delegate-skill"),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("callDelegate")
+      expect(step.partialToolResults).toHaveLength(1)
+      expect(step.partialToolResults?.[0]?.toolName).toBe("mcpTool")
+    })
+
+    it("executes MCP tools first then calls interactive", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_mcp", skillName: "mcp-skill", toolName: "mcpTool", args: {} },
+          { id: "tc_interactive", skillName: "interactive-skill", toolName: "humanApproval", args: {} },
+        ],
+      })
+      const skillManagers = {
+        "mcp-skill": createMockMcpSkillManager("mcp-skill", "mcpTool"),
+        "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("callInteractiveTool")
+      expect(step.partialToolResults).toHaveLength(1)
+      expect(step.partialToolResults?.[0]?.toolName).toBe("mcpTool")
+    })
+
+    it("delegates before interactive when both exist", async () => {
+      const setting = createRunSetting()
+      const checkpoint = createCheckpoint()
+      const step = createStep({
+        toolCalls: [
+          { id: "tc_delegate", skillName: "delegate-skill", toolName: "delegate-skill", args: {} },
+          { id: "tc_interactive", skillName: "interactive-skill", toolName: "humanApproval", args: {} },
+        ],
+      })
+      const skillManagers = {
+        "delegate-skill": createMockDelegateSkillManager("delegate-skill"),
+        "interactive-skill": createMockInteractiveSkillManager("interactive-skill", "humanApproval"),
+      }
+      const event = await callingToolLogic({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers,
+      })
+      expect(event.type).toBe("callDelegate")
+      expect(step.pendingToolCalls).toHaveLength(2)
+    })
+  })
 })
diff --git a/packages/runtime/src/states/calling-tool.ts b/packages/runtime/src/states/calling-tool.ts
index ff4a0479..a6bd1f03 100644
--- a/packages/runtime/src/states/calling-tool.ts
+++ b/packages/runtime/src/states/calling-tool.ts
@@ -1,14 +1,18 @@
+import { readFile } from "node:fs/promises"
 import {
   attemptCompletion,
+  callDelegate,
+  callInteractiveTool,
+  type MessagePart,
   type RunEvent,
-  resolveImageFile,
-  resolvePdfFile,
-  resolveThought,
-  resolveToolResult,
+  resolveToolResults,
+  type ToolCall,
   type ToolResult,
 } from "@perstack/core"
 import type { RunSnapshot } from "../runtime-state-machine.js"
+import type { BaseSkillManager } from "../skill-manager/index.js"
 import { getSkillManagerByToolName } from "../skill-manager/index.js"
+import type { McpSkillManager } from "../skill-manager/mcp.js"
 
 function hasRemainingTodos(toolResult: ToolResult): boolean {
   const firstPart = toolResult.result[0]
@@ -23,38 +27,169 @@ function hasRemainingTodos(toolResult: ToolResult): boolean {
   }
 }
 
+type FileInfo = { path: string; mimeType: string; size: number }
+
+function isFileInfo(value: unknown): value is FileInfo {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    "path" in value &&
+    "mimeType" in value &&
+    "size" in value &&
+    typeof (value as FileInfo).path === "string" &&
+    typeof (value as FileInfo).mimeType === "string" &&
+    typeof (value as FileInfo).size === "number"
+  )
+}
+
+async function processFileToolResult(
+  toolResult: ToolResult,
+  toolName: "readPdfFile" | "readImageFile",
+): Promise<ToolResult> {
+  const processedContents: MessagePart[] = []
+  for (const part of toolResult.result) {
+    if (part.type !== "textPart") {
+      processedContents.push(part)
+      continue
+    }
+    let fileInfo: FileInfo | undefined
+    try {
+      const parsed = JSON.parse(part.text)
+      if (isFileInfo(parsed)) {
+        fileInfo = parsed
+      }
+    } catch {
+      processedContents.push(part)
+      continue
+    }
+    if (!fileInfo) {
+      processedContents.push(part)
+      continue
+    }
+    const { path, mimeType } = fileInfo
+    try {
+      const buffer = await readFile(path)
+      if (toolName === "readImageFile") {
+        processedContents.push({
+          type: "imageInlinePart",
+          id: part.id,
+          encodedData: buffer.toString("base64"),
+          mimeType,
+        })
+      } else {
+        processedContents.push({
+          type: "fileInlinePart",
+          id: part.id,
+          encodedData: buffer.toString("base64"),
+          mimeType,
+        })
+      }
+    } catch (error) {
+      processedContents.push({
+        type: "textPart",
+        id: part.id,
+        text: `Failed to read file "${path}": ${error instanceof Error ? error.message : String(error)}`,
+      })
+    }
+  }
+  return { ...toolResult, result: processedContents }
+}
+
+async function executeMcpToolCall(
+  toolCall: ToolCall,
+  skillManagers: Record<string, BaseSkillManager>,
+): Promise<ToolResult> {
+  const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName)
+  if (skillManager.type !== "mcp") {
+    throw new Error(`Incorrect SkillType, required MCP, got ${skillManager.type}`)
+  }
+  const result = await (skillManager as McpSkillManager).callTool(toolCall.toolName, toolCall.args)
+  const toolResult: ToolResult = {
+    id: toolCall.id,
+    skillName: toolCall.skillName,
+    toolName: toolCall.toolName,
+    result,
+  }
+  if (toolCall.toolName === "readPdfFile" || toolCall.toolName === "readImageFile") {
+    return processFileToolResult(toolResult, toolCall.toolName)
+  }
+  return toolResult
+}
+
+async function getToolType(
+  toolCall: ToolCall,
+  skillManagers: Record<string, BaseSkillManager>,
+): Promise<"mcp" | "delegate" | "interactive"> {
+  const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName)
+  return skillManager.type
+}
+
 export async function callingToolLogic({
   setting,
   checkpoint,
   step,
   skillManagers,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall) {
-    throw new Error("No tool call found")
-  }
-  const { id, skillName, toolName, args } = step.toolCall
-  const skillManager = await getSkillManagerByToolName(skillManagers, toolName)
-  if (skillManager.type !== "mcp") {
-    throw new Error(`Incorrect SkillType, required MCP, got ${skillManager.type}`)
+  const pendingToolCalls = step.pendingToolCalls ?? step.toolCalls ?? []
+  if (pendingToolCalls.length === 0) {
+    throw new Error("No tool calls found")
   }
-  const result = await skillManager.callTool(toolName, args)
-  const toolResult: ToolResult = { id, skillName, toolName, result }
-  if (skillName === "@perstack/base") {
-    if (toolName === "think") {
-      return resolveThought(setting, checkpoint, { toolResult })
-    }
-    if (toolName === "attemptCompletion") {
-      if (hasRemainingTodos(toolResult)) {
-        return resolveToolResult(setting, checkpoint, { toolResult })
-      }
-      return attemptCompletion(setting, checkpoint, { toolResult })
+  const toolResults: ToolResult[] = step.toolResults ? [...step.toolResults] : []
+  const attemptCompletionTool = pendingToolCalls.find(
+    (tc) => tc.skillName === "@perstack/base" && tc.toolName === "attemptCompletion",
+  )
+  if (attemptCompletionTool) {
+    const toolResult = await executeMcpToolCall(attemptCompletionTool, skillManagers)
+    if (hasRemainingTodos(toolResult)) {
+      return resolveToolResults(setting, checkpoint, { toolResults: [toolResult] })
     }
-    if (toolName === "readPdfFile") {
-      return resolvePdfFile(setting, checkpoint, { toolResult })
+    return attemptCompletion(setting, checkpoint, { toolResult })
+  }
+  const toolCallTypes = await Promise.all(
+    pendingToolCalls.map(async (tc) => ({
+      toolCall: tc,
+      type: await getToolType(tc, skillManagers),
+    })),
+  )
+  const mcpToolCalls = toolCallTypes.filter((t) => t.type === "mcp").map((t) => t.toolCall)
+  const delegateToolCalls = toolCallTypes
+    .filter((t) => t.type === "delegate")
+    .map((t) => t.toolCall)
+  const interactiveToolCalls = toolCallTypes
+    .filter((t) => t.type === "interactive")
+    .map((t) => t.toolCall)
+  if (mcpToolCalls.length > 0) {
+    const mcpResults = await Promise.all(
+      mcpToolCalls.map((tc) => executeMcpToolCall(tc, skillManagers)),
+    )
+    toolResults.push(...mcpResults)
+  }
+  const remainingToolCalls = [...delegateToolCalls, ...interactiveToolCalls]
+  if (delegateToolCalls.length > 0) {
+    const delegateToolCall = delegateToolCalls[0]
+    if (!delegateToolCall) {
+      throw new Error("No delegate tool call found")
     }
-    if (toolName === "readImageFile") {
-      return resolveImageFile(setting, checkpoint, { toolResult })
+    step.partialToolResults = toolResults
+    step.pendingToolCalls = remainingToolCalls
+    return callDelegate(setting, checkpoint, {
+      newMessage: checkpoint.messages[checkpoint.messages.length - 1] as never,
+      toolCall: delegateToolCall,
+      usage: step.usage,
+    })
+  }
+  if (interactiveToolCalls.length > 0) {
+    const interactiveToolCall = interactiveToolCalls[0]
+    if (!interactiveToolCall) {
+      throw new Error("No interactive tool call found")
     }
+    step.partialToolResults = toolResults
+    step.pendingToolCalls = remainingToolCalls
+    return callInteractiveTool(setting, checkpoint, {
+      newMessage: checkpoint.messages[checkpoint.messages.length - 1] as never,
+      toolCall: interactiveToolCall,
+      usage: step.usage,
+    })
   }
-  return resolveToolResult(setting, checkpoint, { toolResult })
+  return resolveToolResults(setting, checkpoint, { toolResults })
 }
diff --git a/packages/runtime/src/states/generating-run-result.test.ts b/packages/runtime/src/states/generating-run-result.test.ts
index 6fb077fd..3bbf76c9 100644
--- a/packages/runtime/src/states/generating-run-result.test.ts
+++ b/packages/runtime/src/states/generating-run-result.test.ts
@@ -33,18 +33,22 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         args: {},
       },
-      toolResult: {
+      ],
+      toolResults: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }],
       },
+      ],
     })
     mockGetModel.mockReturnValue(createMockLanguageModel("Task completed successfully"))
     const event = await StateMachineLogics.GeneratingRunResult({
@@ -65,18 +69,22 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         args: {},
       },
-      toolResult: {
+      ],
+      toolResults: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }],
       },
+      ],
     })
     const errorModel = new MockLanguageModelV2({
       doGenerate: async () => {
@@ -94,10 +102,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => {
     expect(event.type).toBe("retry")
   })
 
-  it("throws error when tool call or result missing", async () => {
+  it("throws error when tool calls or results missing", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
-    const step = createStep({ toolCall: undefined, toolResult: undefined })
+    const step = createStep({ toolCalls: undefined, toolResults: undefined })
     await expect(
       StateMachineLogics.GeneratingRunResult({
         setting,
@@ -106,25 +114,29 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => {
         eventListener: async () => {},
         skillManagers: {},
       }),
-    ).rejects.toThrow("No tool call or tool result found")
+    ).rejects.toThrow("No tool calls or tool results found")
   })
 
   it("includes proper event metadata", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         args: {},
       },
-      toolResult: {
+      ],
+      toolResults: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "attemptCompletion",
         result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }],
       },
+      ],
     })
     mockGetModel.mockReturnValue(createMockLanguageModel("Final result"))
     const event = await StateMachineLogics.GeneratingRunResult({
diff --git a/packages/runtime/src/states/generating-run-result.ts b/packages/runtime/src/states/generating-run-result.ts
index 9a2cdf93..0912d98f 100644
--- a/packages/runtime/src/states/generating-run-result.ts
+++ b/packages/runtime/src/states/generating-run-result.ts
@@ -15,21 +15,24 @@ export async function generatingRunResultLogic({
   checkpoint,
   step,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall || !step.toolResult) {
-    throw new Error("No tool call or tool result found")
+  if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) {
+    throw new Error("No tool calls or tool results found")
   }
-  const { id, toolName } = step.toolCall
-  const { result } = step.toolResult
-  const toolMessage = createToolMessage([
-    {
-      type: "toolResultPart",
-      toolCallId: id,
-      toolName,
-      contents: result.filter(
-        (part) => part.type === "textPart" || part.type === "imageInlinePart",
+  const toolResultParts = step.toolResults.map((toolResult) => {
+    const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id)
+    return {
+      type: "toolResultPart" as const,
+      toolCallId: toolResult.id,
+      toolName: toolCall?.toolName ?? toolResult.toolName,
+      contents: toolResult.result.filter(
+        (part) =>
+          part.type === "textPart" ||
+          part.type === "imageInlinePart" ||
+          part.type === "fileInlinePart",
       ),
-    },
-  ])
+    }
+  })
+  const toolMessage = createToolMessage(toolResultParts)
   const model = getModel(setting.model, setting.providerConfig)
   const { messages } = checkpoint
   let generationResult: GenerateTextResult<ToolSet, never>
diff --git a/packages/runtime/src/states/generating-tool-call.test.ts b/packages/runtime/src/states/generating-tool-call.test.ts
index 3142513f..cbadeaa9 100644
--- a/packages/runtime/src/states/generating-tool-call.test.ts
+++ b/packages/runtime/src/states/generating-tool-call.test.ts
@@ -186,10 +186,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => {
       eventListener: async () => {},
       skillManagers: { "test-skill": skillManager },
     })
-    expect(event.type).toBe("callTool")
+    expect(event.type).toBe("callTools")
   })
 
-  it("returns callInteractiveTool event for interactive skill", async () => {
+  it("returns callTools event for interactive skill (processed later in CallingTool)", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep()
@@ -214,10 +214,10 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => {
       eventListener: async () => {},
       skillManagers: { "interactive-skill": skillManager },
     })
-    expect(event.type).toBe("callInteractiveTool")
+    expect(event.type).toBe("callTools")
   })
 
-  it("returns callDelegate event for delegate skill", async () => {
+  it("returns callTools event for delegate skill (processed later in CallingTool)", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep()
@@ -242,7 +242,47 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => {
       eventListener: async () => {},
       skillManagers: { "delegate-skill": skillManager },
     })
-    expect(event.type).toBe("callDelegate")
+    expect(event.type).toBe("callTools")
+  })
+
+  it("sorts tool calls by priority: MCP → Delegate → Interactive", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep()
+    const mcpSkillManager = createMockSkillManager("mcp-skill", "mcp", "mcpTool")
+    const delegateSkillManager = createMockSkillManager("delegate-skill", "delegate", "delegateTool")
+    const interactiveSkillManager = createMockSkillManager(
+      "interactive-skill",
+      "interactive",
+      "interactiveTool",
+    )
+    mockGetModel.mockReturnValue(
+      createMockLanguageModel({
+        finishReason: "tool-calls",
+        toolCalls: [
+          { type: "tool-call", toolCallId: "tc_int", toolName: "interactiveTool", input: "{}" },
+          { type: "tool-call", toolCallId: "tc_del", toolName: "delegateTool", input: "{}" },
+          { type: "tool-call", toolCallId: "tc_mcp", toolName: "mcpTool", input: "{}" },
+        ],
+      }),
+    )
+    const event = await StateMachineLogics.GeneratingToolCall({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {
+        "mcp-skill": mcpSkillManager,
+        "delegate-skill": delegateSkillManager,
+        "interactive-skill": interactiveSkillManager,
+      },
+    })
+    expect(event.type).toBe("callTools")
+    if (event.type === "callTools") {
+      expect(event.toolCalls[0].toolName).toBe("mcpTool")
+      expect(event.toolCalls[1].toolName).toBe("delegateTool")
+      expect(event.toolCalls[2].toolName).toBe("interactiveTool")
+    }
   })
 
   it("returns retry event when finish reason is length", async () => {
@@ -307,6 +347,6 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingToolCall']", () => {
       eventListener: async () => {},
       skillManagers: { "test-skill": skillManager },
     })
-    expect(event.type).toBe("callTool")
+    expect(event.type).toBe("callTools")
   })
 })
diff --git a/packages/runtime/src/states/generating-tool-call.ts b/packages/runtime/src/states/generating-tool-call.ts
index 75543b78..934b9b21 100644
--- a/packages/runtime/src/states/generating-tool-call.ts
+++ b/packages/runtime/src/states/generating-tool-call.ts
@@ -1,11 +1,10 @@
 import { createId } from "@paralleldrive/cuid2"
 import {
-  callDelegate,
-  callInteractiveTool,
-  callTool,
+  callTools,
   type RunEvent,
   retry,
   type TextPart,
+  type ToolCall,
   type ToolCallPart,
 } from "@perstack/core"
 import { type GenerateTextResult, generateText, type ToolSet } from "ai"
@@ -17,9 +16,59 @@ import {
 } from "../messages/message.js"
 import { getModel } from "../model.js"
 import type { RunSnapshot } from "../runtime-state-machine.js"
+import type { BaseSkillManager } from "../skill-manager/index.js"
 import { getSkillManagerByToolName, getToolSet } from "../skill-manager/index.js"
 import { createEmptyUsage, usageFromGenerateTextResult } from "../usage.js"
 
+type ClassifiedToolCall = {
+  toolCallId: string
+  toolName: string
+  input: Record<string, unknown>
+  skillManager: BaseSkillManager
+}
+
+async function classifyToolCalls(
+  toolCalls: Array<{ toolCallId: string; toolName: string; input: unknown }>,
+  skillManagers: Record<string, BaseSkillManager>,
+): Promise<ClassifiedToolCall[]> {
+  return Promise.all(
+    toolCalls.map(async (tc) => {
+      const skillManager = await getSkillManagerByToolName(skillManagers, tc.toolName)
+      return {
+        toolCallId: tc.toolCallId,
+        toolName: tc.toolName,
+        input: tc.input as Record<string, unknown>,
+        skillManager,
+      }
+    }),
+  )
+}
+
+function sortToolCallsByPriority(toolCalls: ClassifiedToolCall[]): ClassifiedToolCall[] {
+  const priority = { mcp: 0, delegate: 1, interactive: 2 }
+  return [...toolCalls].sort(
+    (a, b) => (priority[a.skillManager.type] ?? 99) - (priority[b.skillManager.type] ?? 99),
+  )
+}
+
+function buildToolCallParts(toolCalls: ClassifiedToolCall[]): Array<Omit<ToolCallPart, "id">> {
+  return toolCalls.map((tc) => ({
+    type: "toolCallPart" as const,
+    toolCallId: tc.toolCallId,
+    toolName: tc.toolName,
+    args: tc.input,
+  }))
+}
+
+function buildToolCalls(toolCalls: ClassifiedToolCall[]): ToolCall[] {
+  return toolCalls.map((tc) => ({
+    id: tc.toolCallId,
+    skillName: tc.skillManager.name,
+    toolName: tc.toolName,
+    args: tc.input,
+  }))
+}
+
 export async function generatingToolCallLogic({
   setting,
   checkpoint,
@@ -51,8 +100,7 @@ export async function generatingToolCallLogic({
   }
   const usage = usageFromGenerateTextResult(result)
   const { text, toolCalls, finishReason } = result
-  const toolCall = toolCalls[0]
-  if (!toolCall) {
+  if (toolCalls.length === 0) {
     const reason = JSON.stringify({
       error: "Error: No tool call generated",
       message: "You must generate a tool call. Try again.",
@@ -63,42 +111,26 @@ export async function generatingToolCallLogic({
       usage,
     })
   }
-  const contents: Array<Omit<TextPart, "id"> | Omit<ToolCallPart, "id">> = [
-    {
-      type: "toolCallPart",
-      toolCallId: toolCall.toolCallId,
-      toolName: toolCall.toolName,
-      args: toolCall.input,
-    },
-  ]
-  if (text) {
-    contents.push({
-      type: "textPart",
-      text,
-    })
-  }
-  const skillManager = await getSkillManagerByToolName(skillManagers, toolCall.toolName)
-  const eventPayload = {
-    newMessage: createExpertMessage(contents),
-    toolCall: {
-      id: toolCall.toolCallId,
-      skillName: skillManager.name,
-      toolName: toolCall.toolName,
-      args: toolCall.input,
-    },
-    usage,
-  }
+  const classified = await classifyToolCalls(toolCalls, skillManagers)
+  const sorted = sortToolCallsByPriority(classified)
   if (finishReason === "tool-calls" || finishReason === "stop") {
-    switch (skillManager.type) {
-      case "mcp":
-        return callTool(setting, checkpoint, eventPayload)
-      case "interactive":
-        return callInteractiveTool(setting, checkpoint, eventPayload)
-      case "delegate":
-        return callDelegate(setting, checkpoint, eventPayload)
+    const toolCallParts = buildToolCallParts(sorted)
+    const contents: Array<Omit<TextPart, "id"> | Omit<ToolCallPart, "id">> = [...toolCallParts]
+    if (text) {
+      contents.push({ type: "textPart", text })
     }
+    const allToolCalls = buildToolCalls(sorted)
+    return callTools(setting, checkpoint, {
+      newMessage: createExpertMessage(contents),
+      toolCalls: allToolCalls,
+      usage,
+    })
   }
   if (finishReason === "length") {
+    const firstToolCall = sorted[0]
+    if (!firstToolCall) {
+      throw new Error("No tool call found")
+    }
     const reason = JSON.stringify({
       error: "Error: Tool call generation failed",
       message: "Generation length exceeded. Try again.",
@@ -109,27 +141,36 @@ export async function generatingToolCallLogic({
         createExpertMessage([
           {
             type: "toolCallPart",
-            toolCallId: toolCall.toolCallId,
-            toolName: toolCall.toolName,
-            args: toolCall.input,
+            toolCallId: firstToolCall.toolCallId,
+            toolName: firstToolCall.toolName,
+            args: firstToolCall.input,
           },
         ]),
         createToolMessage([
           {
             type: "toolResultPart",
-            toolCallId: toolCall.toolCallId,
-            toolName: toolCall.toolName,
+            toolCallId: firstToolCall.toolCallId,
+            toolName: firstToolCall.toolName,
             contents: [{ type: "textPart", text: reason }],
           },
         ]),
       ],
-      toolCall: eventPayload.toolCall,
-      toolResult: {
-        id: toolCall.toolCallId,
-        skillName: skillManager.name,
-        toolName: toolCall.toolName,
-        result: [{ type: "textPart", id: createId(), text: reason }],
-      },
+      toolCalls: [
+        {
+          id: firstToolCall.toolCallId,
+          skillName: firstToolCall.skillManager.name,
+          toolName: firstToolCall.toolName,
+          args: firstToolCall.input,
+        },
+      ],
+      toolResults: [
+        {
+          id: firstToolCall.toolCallId,
+          skillName: firstToolCall.skillManager.name,
+          toolName: firstToolCall.toolName,
+          result: [{ type: "textPart", id: createId(), text: reason }],
+        },
+      ],
       usage,
     })
   }
diff --git a/packages/runtime/src/states/init.test.ts b/packages/runtime/src/states/init.test.ts
index d76ba805..336af517 100644
--- a/packages/runtime/src/states/init.test.ts
+++ b/packages/runtime/src/states/init.test.ts
@@ -70,40 +70,23 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => {
     })
     const checkpoint = createCheckpoint({
       status: "stoppedByDelegate",
+      pendingToolCalls: [{ id: "123", skillName: "test-skill", toolName: "test", args: {} }],
     })
     const step = createStep()
-    await expect(
-      StateMachineLogics.Init({
+    const event = await StateMachineLogics.Init({
         setting,
         checkpoint,
         step,
         eventListener: async () => {},
         skillManagers: {},
-      }),
-    ).resolves.toStrictEqual({
-      type: "startRun",
-      id: expect.any(String),
-      expertKey: setting.expertKey,
-      timestamp: expect.any(Number),
-      runId: setting.runId,
-      stepNumber: checkpoint.stepNumber,
-      initialCheckpoint: checkpoint,
-      inputMessages: [
-        {
-          type: "toolMessage",
-          id: expect.any(String),
-          contents: [
-            {
-              type: "toolResultPart",
-              id: expect.any(String),
-              toolCallId: "123",
-              toolName: "test",
-              contents: [{ type: "textPart", id: expect.any(String), text: "test-delegate" }],
-            },
-          ],
-        },
-      ],
     })
+    expect(event.type).toBe("startRun")
+    if (event.type === "startRun") {
+      expect(event.inputMessages).toEqual([])
+      expect(event.initialCheckpoint.partialToolResults).toHaveLength(1)
+      expect(event.initialCheckpoint.partialToolResults?.[0].id).toBe("123")
+      expect(event.initialCheckpoint.pendingToolCalls).toBeUndefined()
+    }
   })
 
   it("throws error when delegate call result is undefined", async () => {
@@ -137,42 +120,23 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => {
     })
     const checkpoint = createCheckpoint({
       status: "stoppedByInteractiveTool",
+      pendingToolCalls: [{ id: "123", skillName: "test-skill", toolName: "test", args: {} }],
     })
     const step = createStep()
-    await expect(
-      StateMachineLogics.Init({
+    const event = await StateMachineLogics.Init({
         setting,
         checkpoint,
         step,
         eventListener: async () => {},
         skillManagers: {},
-      }),
-    ).resolves.toStrictEqual({
-      type: "startRun",
-      id: expect.any(String),
-      expertKey: setting.expertKey,
-      timestamp: expect.any(Number),
-      runId: setting.runId,
-      stepNumber: checkpoint.stepNumber,
-      initialCheckpoint: checkpoint,
-      inputMessages: [
-        {
-          type: "toolMessage",
-          id: expect.any(String),
-          contents: [
-            {
-              type: "toolResultPart",
-              id: expect.any(String),
-              toolCallId: "123",
-              toolName: "test",
-              contents: [
-                { type: "textPart", id: expect.any(String), text: "test-interactive-tool" },
-              ],
-            },
-          ],
-        },
-      ],
     })
+    expect(event.type).toBe("startRun")
+    if (event.type === "startRun") {
+      expect(event.inputMessages).toEqual([])
+      expect(event.initialCheckpoint.partialToolResults).toHaveLength(1)
+      expect(event.initialCheckpoint.partialToolResults?.[0].id).toBe("123")
+      expect(event.initialCheckpoint.pendingToolCalls).toBeUndefined()
+    }
   })
 
   it("throws error when interactive tool call result is undefined", async () => {
diff --git a/packages/runtime/src/states/init.ts b/packages/runtime/src/states/init.ts
index 71b148c6..97d59ef9 100644
--- a/packages/runtime/src/states/init.ts
+++ b/packages/runtime/src/states/init.ts
@@ -1,6 +1,7 @@
-import { type RunEvent, startRun } from "@perstack/core"
+import { createId } from "@paralleldrive/cuid2"
+import { type RunEvent, startRun, type ToolResult } from "@perstack/core"
 import { createInstructionMessage } from "../messages/instruction-message.js"
-import { createToolMessage, createUserMessage } from "../messages/message.js"
+import { createUserMessage } from "../messages/message.js"
 import type { RunSnapshot } from "../runtime-state-machine.js"
 
 export async function initLogic({
@@ -27,18 +28,29 @@ export async function initLogic({
       if (!setting.input.interactiveToolCallResult) {
         throw new Error("Interactive tool call result is undefined")
       }
-      return startRun(setting, checkpoint, {
-        initialCheckpoint: checkpoint,
-        inputMessages: [
-          createToolMessage([
-            {
-              type: "toolResultPart",
-              toolCallId: setting.input.interactiveToolCallResult.toolCallId,
-              toolName: setting.input.interactiveToolCallResult.toolName,
-              contents: [{ type: "textPart", text: setting.input.interactiveToolCallResult.text }],
-            },
-          ]),
-        ],
+      const { toolCallId, toolName, text } = setting.input.interactiveToolCallResult
+      const pendingToolCalls = checkpoint.pendingToolCalls ?? []
+      const completedToolCall = pendingToolCalls.find((tc) => tc.id === toolCallId)
+      const skillName =
+        completedToolCall?.skillName ??
+        (checkpoint.status === "stoppedByDelegate" ? checkpoint.delegateTo?.expert.key : "") ??
+        ""
+      const newToolResult: ToolResult = {
+        id: toolCallId,
+        skillName,
+        toolName,
+        result: [{ type: "textPart", id: createId(), text }],
+      }
+      const updatedPartialResults = [...(checkpoint.partialToolResults ?? []), newToolResult]
+      const updatedPendingToolCalls = pendingToolCalls.filter((tc) => tc.id !== toolCallId)
+      const updatedCheckpoint = {
+        ...checkpoint,
+        partialToolResults: updatedPartialResults,
+        pendingToolCalls: updatedPendingToolCalls.length > 0 ? updatedPendingToolCalls : undefined,
+      }
+      return startRun(setting, updatedCheckpoint, {
+        initialCheckpoint: updatedCheckpoint,
+        inputMessages: [],
       })
     }
     default:
diff --git a/packages/runtime/src/states/preparing-for-step.test.ts b/packages/runtime/src/states/preparing-for-step.test.ts
index 24566e59..efb276b9 100644
--- a/packages/runtime/src/states/preparing-for-step.test.ts
+++ b/packages/runtime/src/states/preparing-for-step.test.ts
@@ -1,9 +1,10 @@
+import { createId } from "@paralleldrive/cuid2"
 import { describe, expect, it } from "vitest"
 import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js"
 import { StateMachineLogics } from "../index.js"
 
 describe("@perstack/runtime: StateMachineLogic['PreparingForStep']", () => {
-  it("prepares execution steps correctly", async () => {
+  it("returns startGeneration when no pending tool calls or partial results", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep()
@@ -25,4 +26,89 @@ describe("@perstack/runtime: StateMachineLogic['PreparingForStep']", () => {
       messages: checkpoint.messages,
     })
   })
+
+  it("returns resumeToolCalls when pendingToolCalls exist", async () => {
+    const setting = createRunSetting()
+    const pendingToolCalls = [
+      { id: "tc_1", skillName: "test-skill", toolName: "testTool", args: {} },
+    ]
+    const partialToolResults = [
+      { id: "tc_0", skillName: "test-skill", toolName: "prevTool", result: [] },
+    ]
+    const checkpoint = createCheckpoint({ pendingToolCalls, partialToolResults })
+    const step = createStep()
+    const result = await StateMachineLogics.PreparingForStep({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("resumeToolCalls")
+    if (result.type === "resumeToolCalls") {
+      expect(result.pendingToolCalls).toEqual(pendingToolCalls)
+      expect(result.partialToolResults).toEqual(partialToolResults)
+    }
+  })
+
+  it("returns finishAllToolCalls when only partialToolResults exist", async () => {
+    const setting = createRunSetting()
+    const partialToolResults = [
+      {
+        id: "tc_1",
+        skillName: "test-skill",
+        toolName: "testTool",
+        result: [{ type: "textPart" as const, text: "result", id: createId() }],
+      },
+    ]
+    const checkpoint = createCheckpoint({ partialToolResults })
+    const step = createStep()
+    const result = await StateMachineLogics.PreparingForStep({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("finishAllToolCalls")
+    if (result.type === "finishAllToolCalls") {
+      expect(result.newMessages).toHaveLength(1)
+      expect(result.newMessages[0].type).toBe("toolMessage")
+    }
+  })
+
+  it("filters partialToolResults contents to allowed types", async () => {
+    const setting = createRunSetting()
+    const partialToolResults = [
+      {
+        id: "tc_1",
+        skillName: "test-skill",
+        toolName: "testTool",
+        result: [
+          { type: "textPart" as const, text: "text", id: createId() },
+          { type: "imageInlinePart" as const, encodedData: "base64", mimeType: "image/png", id: createId() },
+          { type: "fileInlinePart" as const, encodedData: "base64", mimeType: "application/pdf", id: createId() },
+        ],
+      },
+    ]
+    const checkpoint = createCheckpoint({ partialToolResults })
+    const step = createStep()
+    const result = await StateMachineLogics.PreparingForStep({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("finishAllToolCalls")
+    if (result.type === "finishAllToolCalls") {
+      const toolMessage = result.newMessages[0]
+      if (toolMessage.type === "toolMessage") {
+        const toolResultPart = toolMessage.contents[0]
+        if (toolResultPart.type === "toolResultPart") {
+          expect(toolResultPart.contents).toHaveLength(3)
+        }
+      }
+    }
+  })
 })
diff --git a/packages/runtime/src/states/preparing-for-step.ts b/packages/runtime/src/states/preparing-for-step.ts
index e4a4d5f3..0f697fb7 100644
--- a/packages/runtime/src/states/preparing-for-step.ts
+++ b/packages/runtime/src/states/preparing-for-step.ts
@@ -1,11 +1,33 @@
-import { type RunEvent, startGeneration } from "@perstack/core"
+import { finishAllToolCalls, type RunEvent, resumeToolCalls, startGeneration } from "@perstack/core"
+import { createToolMessage } from "../messages/message.js"
 import type { RunSnapshot } from "../runtime-state-machine.js"
 
 export async function preparingForStepLogic({
   setting,
   checkpoint,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  // TODO: add logic to count tokens and check if it's exceeded the limit
+  if (checkpoint.pendingToolCalls && checkpoint.pendingToolCalls.length > 0) {
+    return resumeToolCalls(setting, checkpoint, {
+      pendingToolCalls: checkpoint.pendingToolCalls,
+      partialToolResults: checkpoint.partialToolResults ?? [],
+    })
+  }
+  if (checkpoint.partialToolResults && checkpoint.partialToolResults.length > 0) {
+    const toolResultParts = checkpoint.partialToolResults.map((tr) => ({
+      type: "toolResultPart" as const,
+      toolCallId: tr.id,
+      toolName: tr.toolName,
+      contents: tr.result.filter(
+        (part) =>
+          part.type === "textPart" ||
+          part.type === "imageInlinePart" ||
+          part.type === "fileInlinePart",
+      ),
+    }))
+    return finishAllToolCalls(setting, checkpoint, {
+      newMessages: [createToolMessage(toolResultParts)],
+    })
+  }
   return startGeneration(setting, checkpoint, {
     messages: checkpoint.messages,
   })
diff --git a/packages/runtime/src/states/resolving-image-file.test.ts b/packages/runtime/src/states/resolving-image-file.test.ts
index a7ba693e..005477af 100644
--- a/packages/runtime/src/states/resolving-image-file.test.ts
+++ b/packages/runtime/src/states/resolving-image-file.test.ts
@@ -25,14 +25,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         args: { path: "/test/image.png" },
       },
-      toolResult: {
-        id: "tr_123",
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         result: [
@@ -43,6 +46,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => {
           },
         ],
       },
+      ],
     })
     await expect(
       StateMachineLogics.ResolvingImageFile({
@@ -94,14 +98,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         args: { path: "/nonexistent.png" },
       },
-      toolResult: {
-        id: "tr_123",
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         result: [
@@ -112,6 +119,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => {
           },
         ],
       },
+      ],
     })
     const result = await StateMachineLogics.ResolvingImageFile({
       setting,
@@ -129,4 +137,70 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingImageFile']", () => {
       text: expect.stringContaining('Failed to read image file "/nonexistent.png"'),
     })
   })
+
+  it("throws error when tool calls are missing", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({ toolCalls: undefined, toolResults: [] })
+    await expect(
+      StateMachineLogics.ResolvingImageFile({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No tool calls or tool results found")
+  })
+
+  it("throws error when tool results are empty", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      toolCalls: [{ id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: {} }],
+      toolResults: [],
+    })
+    await expect(
+      StateMachineLogics.ResolvingImageFile({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No tool calls or tool results found")
+  })
+
+  it("handles invalid JSON in text part gracefully", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      toolCalls: [
+        { id: "tc_123", skillName: "@perstack/base", toolName: "readImageFile", args: {} },
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readImageFile",
+          result: [{ type: "textPart" as const, text: "not json", id: createId() }],
+        },
+      ],
+    })
+    const result = await StateMachineLogics.ResolvingImageFile({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("finishToolCall")
+    if (result.type !== "finishToolCall") throw new Error("Unexpected event type")
+    const toolResultPart = result.newMessages[0].contents[0]
+    if (toolResultPart.type !== "toolResultPart") throw new Error("Unexpected part type")
+    expect(toolResultPart.contents[0]).toMatchObject({
+      type: "textPart",
+      text: "not json",
+    })
+  })
 })
diff --git a/packages/runtime/src/states/resolving-image-file.ts b/packages/runtime/src/states/resolving-image-file.ts
index 3c06a6a9..1931ff78 100644
--- a/packages/runtime/src/states/resolving-image-file.ts
+++ b/packages/runtime/src/states/resolving-image-file.ts
@@ -9,12 +9,16 @@ export async function resolvingImageFileLogic({
   checkpoint,
   step,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall || !step.toolResult) {
-    throw new Error("No tool call or tool result found")
+  if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) {
+    throw new Error("No tool calls or tool results found")
   }
-  const { id, toolName } = step.toolCall
-  const { result } = step.toolResult
-  const textParts = result.filter((part) => part.type === "textPart")
+  const toolResult = step.toolResults[0]
+  if (!toolResult) {
+    throw new Error("No tool result found")
+  }
+  const toolCall = step.toolCalls.find((tc) => tc.id === toolResult.id)
+  const { result } = toolResult
+  const textParts = result.filter((part): part is TextPart => part.type === "textPart")
   const files: (Omit<ImageInlinePart, "id"> | Omit<TextPart, "id">)[] = []
   for (const textPart of textParts) {
     let imageInfo: ReadImageFileResult | undefined
@@ -27,7 +31,7 @@ export async function resolvingImageFileLogic({
       })
       continue
     }
-    const { path, mimeType, size } = imageInfo
+    const { path, mimeType } = imageInfo
     try {
       const buffer = await readFile(path)
       files.push({
@@ -47,8 +51,8 @@ export async function resolvingImageFileLogic({
       createToolMessage([
         {
           type: "toolResultPart",
-          toolCallId: id,
-          toolName,
+          toolCallId: toolResult.id,
+          toolName: toolCall?.toolName ?? toolResult.toolName,
           contents: files,
         },
       ]),
diff --git a/packages/runtime/src/states/resolving-pdf-file.test.ts b/packages/runtime/src/states/resolving-pdf-file.test.ts
index 05bbdbff..71c4210b 100644
--- a/packages/runtime/src/states/resolving-pdf-file.test.ts
+++ b/packages/runtime/src/states/resolving-pdf-file.test.ts
@@ -1,5 +1,5 @@
-import { createId } from "@paralleldrive/cuid2"
 import { readFile } from "node:fs/promises"
+import { createId } from "@paralleldrive/cuid2"
 import { beforeEach, describe, expect, it, vi } from "vitest"
 import { createCheckpoint, createRunSetting, createStep } from "../../test/run-params.js"
 import { StateMachineLogics } from "../index.js"
@@ -25,24 +25,28 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "readPdfFile",
-        args: { path: "/test/file.pdf" },
-      },
-      toolResult: {
-        id: "tr_123",
-        skillName: "@perstack/base",
-        toolName: "readPdfFile",
-        result: [
-          {
-            type: "textPart" as const,
-            text: JSON.stringify(pdfInfo),
-            id: createId(),
-          },
-        ],
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          args: { path: "/test/file.pdf" },
+        },
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          result: [
+            {
+              type: "textPart" as const,
+              text: JSON.stringify(pdfInfo),
+              id: createId(),
+            },
+          ],
+        },
+      ],
     })
     await expect(
       StateMachineLogics.ResolvingPdfFile({
@@ -71,26 +75,15 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => {
               toolName: "readPdfFile",
               contents: [
                 {
-                  type: "textPart",
+                  type: "fileInlinePart",
                   id: expect.any(String),
-                  text: "User uploads PDF file as follows.",
+                  encodedData: Buffer.from("encoded_pdf_content").toString("base64"),
+                  mimeType: "application/pdf",
                 },
               ],
             },
           ],
         },
-        {
-          type: "userMessage",
-          id: expect.any(String),
-          contents: [
-            {
-              type: "fileInlinePart",
-              id: expect.any(String),
-              encodedData: Buffer.from("encoded_pdf_content").toString("base64"),
-              mimeType: "application/pdf",
-            },
-          ],
-        },
       ],
     })
   })
@@ -105,24 +98,28 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
-        id: "tc_123",
-        skillName: "@perstack/base",
-        toolName: "readPdfFile",
-        args: { path: "/nonexistent.pdf" },
-      },
-      toolResult: {
-        id: "tr_123",
-        skillName: "@perstack/base",
-        toolName: "readPdfFile",
-        result: [
-          {
-            type: "textPart" as const,
-            text: JSON.stringify(pdfInfo),
-            id: createId(),
-          },
-        ],
-      },
+      toolCalls: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          args: { path: "/nonexistent.pdf" },
+        },
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          result: [
+            {
+              type: "textPart" as const,
+              text: JSON.stringify(pdfInfo),
+              id: createId(),
+            },
+          ],
+        },
+      ],
     })
     const result = await StateMachineLogics.ResolvingPdfFile({
       setting,
@@ -133,10 +130,81 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingPdfFile']", () => {
     })
     expect(result.type).toBe("finishToolCall")
     if (result.type !== "finishToolCall") throw new Error("Unexpected event type")
-    const userMessage = result.newMessages[1]
-    expect(userMessage.contents[0]).toMatchObject({
+    const toolMessage = result.newMessages[0]
+    if (toolMessage.type !== "toolMessage") throw new Error("Expected toolMessage")
+    const toolResultPart = toolMessage.contents[0]
+    if (toolResultPart.type !== "toolResultPart") throw new Error("Expected toolResultPart")
+    expect(toolResultPart.contents[0]).toMatchObject({
       type: "textPart",
       text: expect.stringContaining('Failed to read PDF file "/nonexistent.pdf"'),
     })
   })
+
+  it("throws error when tool calls are missing", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({ toolCalls: undefined, toolResults: [] })
+    await expect(
+      StateMachineLogics.ResolvingPdfFile({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No tool calls or tool results found")
+  })
+
+  it("throws error when tool results are empty", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      toolCalls: [{ id: "tc_123", skillName: "@perstack/base", toolName: "readPdfFile", args: {} }],
+      toolResults: [],
+    })
+    await expect(
+      StateMachineLogics.ResolvingPdfFile({
+        setting,
+        checkpoint,
+        step,
+        eventListener: async () => {},
+        skillManagers: {},
+      }),
+    ).rejects.toThrow("No tool calls or tool results found")
+  })
+
+  it("handles invalid JSON in text part gracefully", async () => {
+    const setting = createRunSetting()
+    const checkpoint = createCheckpoint()
+    const step = createStep({
+      toolCalls: [
+        { id: "tc_123", skillName: "@perstack/base", toolName: "readPdfFile", args: {} },
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
+          skillName: "@perstack/base",
+          toolName: "readPdfFile",
+          result: [{ type: "textPart" as const, text: "not valid json", id: createId() }],
+        },
+      ],
+    })
+    const result = await StateMachineLogics.ResolvingPdfFile({
+      setting,
+      checkpoint,
+      step,
+      eventListener: async () => {},
+      skillManagers: {},
+    })
+    expect(result.type).toBe("finishToolCall")
+    if (result.type !== "finishToolCall") throw new Error("Unexpected event type")
+    const toolMessage = result.newMessages[0]
+    if (toolMessage.type !== "toolMessage") throw new Error("Expected toolMessage")
+    const toolResultPart = toolMessage.contents[0]
+    if (toolResultPart.type !== "toolResultPart") throw new Error("Expected toolResultPart")
+    expect(toolResultPart.contents[0]).toMatchObject({
+      type: "textPart",
+      text: "not valid json",
+    })
+  })
 })
diff --git a/packages/runtime/src/states/resolving-pdf-file.ts b/packages/runtime/src/states/resolving-pdf-file.ts
index fafa3d8e..3ed612de 100644
--- a/packages/runtime/src/states/resolving-pdf-file.ts
+++ b/packages/runtime/src/states/resolving-pdf-file.ts
@@ -1,6 +1,6 @@
 import { readFile } from "node:fs/promises"
-import { type FileInlinePart, type RunEvent, type TextPart, finishToolCall } from "@perstack/core"
-import { createToolMessage, createUserMessage } from "../messages/message.js"
+import { type FileInlinePart, finishToolCall, type RunEvent, type TextPart } from "@perstack/core"
+import { createToolMessage } from "../messages/message.js"
 import type { RunSnapshot } from "../runtime-state-machine.js"
 
 type ReadPdfFileResult = { path: string; mimeType: string; size: number }
@@ -9,34 +9,38 @@ export async function resolvingPdfFileLogic({
   checkpoint,
   step,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall || !step.toolResult) {
-    throw new Error("No tool call or tool result found")
+  if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) {
+    throw new Error("No tool calls or tool results found")
   }
-  const { id, toolName } = step.toolCall
-  const { result } = step.toolResult
-  const textParts = result.filter((part) => part.type === "textPart")
-  const files: (Omit<FileInlinePart, "id"> | Omit<TextPart, "id">)[] = []
+  const toolResult = step.toolResults[0]
+  if (!toolResult) {
+    throw new Error("No tool result found")
+  }
+  const toolCall = step.toolCalls.find((tc) => tc.id === toolResult.id)
+  const { result } = toolResult
+  const textParts = result.filter((part): part is TextPart => part.type === "textPart")
+  const contents: (Omit<FileInlinePart, "id"> | Omit<TextPart, "id">)[] = []
   for (const textPart of textParts) {
     let pdfInfo: ReadPdfFileResult | undefined
     try {
       pdfInfo = JSON.parse(textPart.text) as ReadPdfFileResult
     } catch {
-      files.push({
+      contents.push({
         type: "textPart",
         text: textPart.text,
       })
       continue
     }
-    const { path, mimeType, size } = pdfInfo
+    const { path, mimeType } = pdfInfo
     try {
       const buffer = await readFile(path)
-      files.push({
+      contents.push({
         type: "fileInlinePart",
         encodedData: buffer.toString("base64"),
         mimeType,
       })
     } catch (error) {
-      files.push({
+      contents.push({
         type: "textPart",
         text: `Failed to read PDF file "${path}": ${error instanceof Error ? error.message : String(error)}`,
       })
@@ -47,17 +51,11 @@ export async function resolvingPdfFileLogic({
       createToolMessage([
         {
           type: "toolResultPart",
-          toolCallId: id,
-          toolName,
-          contents: [
-            {
-              type: "textPart",
-              text: "User uploads PDF file as follows.",
-            },
-          ],
+          toolCallId: toolResult.id,
+          toolName: toolCall?.toolName ?? toolResult.toolName,
+          contents,
         },
       ]),
-      createUserMessage(files),
     ],
   })
 }
diff --git a/packages/runtime/src/states/resolving-thought.test.ts b/packages/runtime/src/states/resolving-thought.test.ts
index e4089f5c..558c7507 100644
--- a/packages/runtime/src/states/resolving-thought.test.ts
+++ b/packages/runtime/src/states/resolving-thought.test.ts
@@ -8,14 +8,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingThought']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "think",
         args: { thought: "Let me analyze this problem step by step" },
       },
-      toolResult: {
-        id: "tr_123",
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
         skillName: "@perstack/base",
         toolName: "think",
         result: [
@@ -26,6 +29,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingThought']", () => {
           },
         ],
       },
+      ],
     })
     await expect(
       StateMachineLogics.ResolvingThought({
diff --git a/packages/runtime/src/states/resolving-tool-result.test.ts b/packages/runtime/src/states/resolving-tool-result.test.ts
index a6cacdc1..2b0eb144 100644
--- a/packages/runtime/src/states/resolving-tool-result.test.ts
+++ b/packages/runtime/src/states/resolving-tool-result.test.ts
@@ -9,14 +9,17 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readTextFile",
         args: { path: "/test/file.txt" },
       },
-      toolResult: {
-        id: "tr_123",
+      ],
+      toolResults: [
+        {
+          id: "tc_123",
         skillName: "@perstack/base",
         toolName: "readTextFile",
         result: [
@@ -27,6 +30,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => {
           },
         ],
       },
+      ],
     })
     await expect(
       StateMachineLogics.ResolvingToolResult({
@@ -67,7 +71,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => {
     })
   })
 
-  it("throws error when tool call or result missing", async () => {
+  it("throws error when tool calls or results missing", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
@@ -84,21 +88,24 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => {
         eventListener: async () => {},
         skillManagers: {},
       }),
-    ).rejects.toThrow("No tool call or tool result found")
+    ).rejects.toThrow("No tool calls or tool results found")
   })
 
   it("filters non-text and non-image parts from result", async () => {
     const setting = createRunSetting()
     const checkpoint = createCheckpoint()
     const step = createStep({
-      toolCall: {
+      toolCalls: [
+        {
         id: "tc_456",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         args: { path: "/test/image.png" },
       },
-      toolResult: {
-        id: "tr_456",
+      ],
+      toolResults: [
+        {
+          id: "tc_456",
         skillName: "@perstack/base",
         toolName: "readImageFile",
         result: [
@@ -117,6 +124,7 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => {
           },
         ],
       },
+      ],
     })
     const result = await StateMachineLogics.ResolvingToolResult({
       setting,
diff --git a/packages/runtime/src/states/resolving-tool-result.ts b/packages/runtime/src/states/resolving-tool-result.ts
index 5ad4486a..4dabd109 100644
--- a/packages/runtime/src/states/resolving-tool-result.ts
+++ b/packages/runtime/src/states/resolving-tool-result.ts
@@ -1,4 +1,4 @@
-import { type RunEvent, finishToolCall } from "@perstack/core"
+import { finishToolCall, type RunEvent } from "@perstack/core"
 import { createToolMessage } from "../messages/message.js"
 import type { RunSnapshot } from "../runtime-state-machine.js"
 
@@ -7,23 +7,24 @@ export async function resolvingToolResultLogic({
   checkpoint,
   step,
 }: RunSnapshot["context"]): Promise<RunEvent> {
-  if (!step.toolCall || !step.toolResult) {
-    throw new Error("No tool call or tool result found")
+  if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) {
+    throw new Error("No tool calls or tool results found")
   }
-  const { id, toolName } = step.toolCall
-  const { result } = step.toolResult
+  const toolResultParts = step.toolResults.map((toolResult) => {
+    const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id)
+    return {
+      type: "toolResultPart" as const,
+      toolCallId: toolResult.id,
+      toolName: toolCall?.toolName ?? toolResult.toolName,
+      contents: toolResult.result.filter(
+        (part) =>
+          part.type === "textPart" ||
+          part.type === "imageInlinePart" ||
+          part.type === "fileInlinePart",
+      ),
+    }
+  })
   return finishToolCall(setting, checkpoint, {
-    newMessages: [
-      createToolMessage([
-        {
-          type: "toolResultPart",
-          toolCallId: id,
-          toolName,
-          contents: result.filter(
-            (part) => part.type === "textPart" || part.type === "imageInlinePart",
-          ),
-        },
-      ]),
-    ],
+    newMessages: [createToolMessage(toolResultParts)],
   })
 }
diff --git a/packages/tui/src/hooks/state/use-step-store.ts b/packages/tui/src/hooks/state/use-step-store.ts
index 2481d1b2..f05db413 100644
--- a/packages/tui/src/hooks/state/use-step-store.ts
+++ b/packages/tui/src/hooks/state/use-step-store.ts
@@ -9,18 +9,22 @@ type StepBuilder = {
   completion?: string
 }
 const TOOL_RESULT_EVENT_TYPES = new Set([
-  "resolveToolResult",
+  "resolveToolResults",
   "resolveThought",
   "resolvePdfFile",
   "resolveImageFile",
   "attemptCompletion",
 ])
+const isToolCallsEvent = (event: PerstackEvent): event is RunEvent & { toolCalls: ToolCall[] } =>
+  "type" in event && event.type === "callTools" && "toolCalls" in event
 const isToolCallEvent = (event: PerstackEvent): event is RunEvent & { toolCall: ToolCall } =>
   "type" in event &&
-  (event.type === "callTool" ||
-    event.type === "callInteractiveTool" ||
-    event.type === "callDelegate") &&
+  (event.type === "callInteractiveTool" || event.type === "callDelegate") &&
   "toolCall" in event
+const isToolResultsEvent = (
+  event: PerstackEvent,
+): event is RunEvent & { toolResults: ToolResult[] } =>
+  "type" in event && event.type === "resolveToolResults" && "toolResults" in event
 const isToolResultEvent = (event: PerstackEvent): event is RunEvent & { toolResult: ToolResult } =>
   "type" in event && TOOL_RESULT_EVENT_TYPES.has(event.type) && "toolResult" in event
 const checkIsSuccess = (result: Array<{ type: string; text?: string }>): boolean => {
@@ -48,6 +52,14 @@ const processEvent = (stepMap: Map<number, StepBuilder>, event: PerstackEvent):
     builder.query = extractQuery(event)
   } else if (event.type === "completeRun") {
     builder.completion = event.text
+  } else if (isToolCallsEvent(event)) {
+    for (const toolCall of event.toolCalls) {
+      builder.tools.set(toolCall.id, {
+        id: toolCall.id,
+        toolName: toolCall.toolName,
+        args: toolCall.args as Record<string, unknown>,
+      })
+    }
   } else if (isToolCallEvent(event)) {
     const { toolCall } = event
     builder.tools.set(toolCall.id, {
@@ -55,6 +67,14 @@ const processEvent = (stepMap: Map<number, StepBuilder>, event: PerstackEvent):
       toolName: toolCall.toolName,
       args: toolCall.args as Record<string, unknown>,
     })
+  } else if (isToolResultsEvent(event)) {
+    for (const toolResult of event.toolResults) {
+      const existing = builder.tools.get(toolResult.id)
+      if (existing && Array.isArray(toolResult.result)) {
+        existing.result = toolResult.result
+        existing.isSuccess = checkIsSuccess(toolResult.result)
+      }
+    }
   } else if (isToolResultEvent(event)) {
     const { toolResult } = event
     const existing = builder.tools.get(toolResult.id)
diff --git a/vitest.config.ts b/vitest.config.ts
index b6fa48dc..4d4c2f3b 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -9,7 +9,17 @@ export default defineConfig({
           globals: true,
           environment: "node",
           include: ["**/*.test.ts"],
-          exclude: ["**/node_modules/**", "**/dist/**"],
+          exclude: ["**/node_modules/**", "**/dist/**", "e2e/**"],
+        },
+      },
+      {
+        test: {
+          name: "e2e",
+          globals: true,
+          environment: "node",
+          include: ["e2e/**/*.test.ts"],
+          testTimeout: 300000,
+          hookTimeout: 300000,
         },
       },
     ],