From 97fc5c216d95e4476e97e9522cceea6d848d6472 Mon Sep 17 00:00:00 2001 From: electronicBlacksmith <35152297+electronicBlacksmith@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:36:23 -0500 Subject: [PATCH 1/2] fix(loop): restore Slack feedback for phantom_loop runs (#7) Closes #5. AsyncLocalStorage context injection, reaction ladder, progress bar, state.md summary on completion. Stop button persists across tick edits. Tick/finalize race eliminated. LoopNotifier extracted from runner.ts. Verified end-to-end in Slack + non-Slack trigger. 945 tests passing. --- src/agent/__tests__/slack-context.test.ts | 78 ++++ src/agent/slack-context.ts | 22 ++ src/db/__tests__/migrate.test.ts | 4 +- src/db/schema.ts | 6 + src/index.ts | 57 +-- src/loop/__tests__/notifications.test.ts | 435 ++++++++++++++++++++++ src/loop/__tests__/runner.test.ts | 24 ++ src/loop/__tests__/tool.test.ts | 68 ++++ src/loop/notifications.ts | 203 ++++++++++ src/loop/runner.ts | 80 +--- src/loop/store.ts | 6 +- src/loop/tool.ts | 11 +- src/loop/types.ts | 5 + 13 files changed, 904 insertions(+), 95 deletions(-) create mode 100644 src/agent/__tests__/slack-context.test.ts create mode 100644 src/agent/slack-context.ts create mode 100644 src/loop/__tests__/notifications.test.ts create mode 100644 src/loop/notifications.ts diff --git a/src/agent/__tests__/slack-context.test.ts b/src/agent/__tests__/slack-context.test.ts new file mode 100644 index 0000000..550223a --- /dev/null +++ b/src/agent/__tests__/slack-context.test.ts @@ -0,0 +1,78 @@ +import { describe, expect, test } from "bun:test"; +import { type SlackContext, slackContextStore } from "../slack-context.ts"; + +const SAMPLE: SlackContext = { + slackChannelId: "C123", + slackThreadTs: "1700000000.000100", + slackMessageTs: "1700000000.000200", +}; + +describe("slackContextStore", () => { + test("getStore() is undefined outside a run()", () => { + expect(slackContextStore.getStore()).toBeUndefined(); + }); + + test("synchronous read inside run() sees the context", () => { + const seen = slackContextStore.run(SAMPLE, () => slackContextStore.getStore()); + expect(seen).toEqual(SAMPLE); + }); + + test("context propagates across a plain await boundary", async () => { + const seen = await slackContextStore.run(SAMPLE, async () => { + await Promise.resolve(); + return slackContextStore.getStore(); + }); + expect(seen).toEqual(SAMPLE); + }); + + test("context propagates across a setImmediate hop", async () => { + const seen = await slackContextStore.run(SAMPLE, async () => { + await new Promise((resolve) => setImmediate(resolve)); + return slackContextStore.getStore(); + }); + expect(seen).toEqual(SAMPLE); + }); + + test("context propagates through an async generator for-await loop", async () => { + async function* producer(): AsyncGenerator { + for (let i = 0; i < 3; i++) { + await Promise.resolve(); + yield i; + } + } + + const observations: (SlackContext | undefined)[] = []; + await slackContextStore.run(SAMPLE, async () => { + for await (const _ of producer()) { + observations.push(slackContextStore.getStore()); + } + }); + + expect(observations.length).toBe(3); + for (const seen of observations) { + expect(seen).toEqual(SAMPLE); + } + }); + + test("concurrent run() calls keep contexts isolated", async () => { + const other: SlackContext = { + slackChannelId: "C999", + slackThreadTs: "2700000000.000100", + slackMessageTs: "2700000000.000200", + }; + + const [a, b] = await Promise.all([ + slackContextStore.run(SAMPLE, async () => { + await Promise.resolve(); + return slackContextStore.getStore(); + }), + slackContextStore.run(other, async () => { + await Promise.resolve(); + return slackContextStore.getStore(); + }), + ]); + + expect(a).toEqual(SAMPLE); + expect(b).toEqual(other); + }); +}); diff --git a/src/agent/slack-context.ts b/src/agent/slack-context.ts new file mode 100644 index 0000000..a967315 --- /dev/null +++ b/src/agent/slack-context.ts @@ -0,0 +1,22 @@ +import { AsyncLocalStorage } from "node:async_hooks"; + +/** + * Request-scoped Slack context for the current agent turn. + * + * Populated by the channel router when a Slack-origin message enters the + * runtime, and read by in-process MCP tool handlers that need to target the + * operator's originating message/thread without relying on the agent to + * forward the IDs through tool arguments. This is the minimum-surface + * plumbing that lets tools (e.g. phantom_loop) auto-fill channel/thread when + * the agent omits them. + * + * Non-Slack turns (telegram, email, webhook, cli, scheduled triggers) leave + * the store unset; consumers must treat `getStore()` as possibly undefined. + */ +export type SlackContext = { + slackChannelId: string; + slackThreadTs: string; + slackMessageTs: string; +}; + +export const slackContextStore = new AsyncLocalStorage(); diff --git a/src/db/__tests__/migrate.test.ts b/src/db/__tests__/migrate.test.ts index 349c152..cc06d02 100644 --- a/src/db/__tests__/migrate.test.ts +++ b/src/db/__tests__/migrate.test.ts @@ -36,7 +36,7 @@ describe("runMigrations", () => { runMigrations(db); const migrationCount = db.query("SELECT COUNT(*) as count FROM _migrations").get() as { count: number }; - expect(migrationCount.count).toBe(11); + expect(migrationCount.count).toBe(12); }); test("tracks applied migration indices", () => { @@ -48,6 +48,6 @@ describe("runMigrations", () => { .all() .map((r) => (r as { index_num: number }).index_num); - expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]); }); }); diff --git a/src/db/schema.ts b/src/db/schema.ts index beaf254..4fb3302 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -120,4 +120,10 @@ export const MIGRATIONS: string[] = [ )`, "CREATE INDEX IF NOT EXISTS idx_loops_status ON loops(status)", + + // Track the operator's originating Slack message so the loop runner can + // drive a reaction ladder on it (hourglass → cycle → terminal emoji). + // Appended, never inserted mid-array: existing deployments have already + // applied migrations 0–10, so the new column must land at index 11. + "ALTER TABLE loops ADD COLUMN trigger_message_ts TEXT", ]; diff --git a/src/index.ts b/src/index.ts index 013c7fb..bd9986e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ import { join, resolve } from "node:path"; import { createInProcessToolServer } from "./agent/in-process-tools.ts"; import { AgentRuntime } from "./agent/runtime.ts"; import type { RuntimeEvent } from "./agent/runtime.ts"; +import { slackContextStore } from "./agent/slack-context.ts"; import { CliChannel } from "./channels/cli.ts"; import { EmailChannel } from "./channels/email.ts"; import { emitFeedback, setFeedbackHandler } from "./channels/feedback.ts"; @@ -426,31 +427,37 @@ async function main(): Promise { telegramChannel.startTyping(telegramChatId); } - const response = await runtime.handleMessage( - msg.channelId, - msg.conversationId, - promptText, - (event: RuntimeEvent) => { - switch (event.type) { - case "init": - console.log(`\n[phantom] Session: ${event.sessionId}`); - break; - case "thinking": - statusReactions?.setThinking(); - break; - case "tool_use": - statusReactions?.setTool(event.tool); - if (progressStream) { - const summary = formatToolActivity(event.tool, event.input); - progressStream.addToolActivity(event.tool, summary); - } - break; - case "error": - statusReactions?.setError(); - break; - } - }, - ); + const onEvent = (event: RuntimeEvent): void => { + switch (event.type) { + case "init": + console.log(`\n[phantom] Session: ${event.sessionId}`); + break; + case "thinking": + statusReactions?.setThinking(); + break; + case "tool_use": + statusReactions?.setTool(event.tool); + if (progressStream) { + const summary = formatToolActivity(event.tool, event.input); + progressStream.addToolActivity(event.tool, summary); + } + break; + case "error": + statusReactions?.setError(); + break; + } + }; + + const runHandle = (): ReturnType => + runtime.handleMessage(msg.channelId, msg.conversationId, promptText, onEvent); + + // Slack-origin turns run inside an AsyncLocalStorage scope so in-process + // MCP tools (phantom_loop, etc.) can auto-target the operator's thread + // and original message without relying on the agent to forward the IDs. + const response = + isSlack && slackChannelId && slackThreadTs && slackMessageTs + ? await slackContextStore.run({ slackChannelId, slackThreadTs, slackMessageTs }, runHandle) + : await runHandle(); // Track assistant messages if (response.text) { diff --git a/src/loop/__tests__/notifications.test.ts b/src/loop/__tests__/notifications.test.ts new file mode 100644 index 0000000..1443125 --- /dev/null +++ b/src/loop/__tests__/notifications.test.ts @@ -0,0 +1,435 @@ +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import type { SlackChannel } from "../../channels/slack.ts"; +import { runMigrations } from "../../db/migrate.ts"; +import { LoopNotifier, buildProgressBar, terminalEmoji } from "../notifications.ts"; +import { LoopStore } from "../store.ts"; +import type { Loop, LoopStatus } from "../types.ts"; + +// Minimal SlackChannel shape the notifier actually calls. Every method is +// a mock so we can assert call args and ordering. +type MockSlack = { + postToChannel: ReturnType; + updateMessage: ReturnType; + addReaction: ReturnType; + removeReaction: ReturnType; +}; + +function makeSlack(overrides: Partial = {}): MockSlack { + return { + postToChannel: mock(async () => "1700000000.100100"), + updateMessage: mock(async () => undefined), + addReaction: mock(async () => undefined), + removeReaction: mock(async () => undefined), + ...overrides, + }; +} + +function asSlack(m: MockSlack): SlackChannel { + return m as unknown as SlackChannel; +} + +function makeLoop(overrides: Partial = {}): Loop { + return { + id: "abcdef0123456789", + goal: "test goal", + workspaceDir: "/tmp/ws", + stateFile: "/tmp/ws/state.md", + successCommand: null, + maxIterations: 10, + maxCostUsd: 5, + status: "running", + iterationCount: 0, + totalCostUsd: 0, + channelId: "C100", + conversationId: "1700000000.000100", + statusMessageTs: null, + triggerMessageTs: "1700000000.000200", + interruptRequested: false, + lastError: null, + startedAt: "2026-04-05T00:00:00Z", + lastTickAt: null, + finishedAt: null, + ...overrides, + }; +} + +describe("buildProgressBar", () => { + test("renders empty bar at 0/N", () => { + expect(buildProgressBar(0, 10)).toBe("[░░░░░░░░░░]"); + }); + test("renders full bar at N/N", () => { + expect(buildProgressBar(10, 10)).toBe("[██████████]"); + }); + test("renders half bar at N/2", () => { + expect(buildProgressBar(5, 10)).toBe("[█████░░░░░]"); + }); + test("rounds to nearest cell", () => { + // 3/7 ≈ 43% → 4 cells of 10 + expect(buildProgressBar(3, 7)).toBe("[████░░░░░░]"); + }); + test("clamps overflow", () => { + expect(buildProgressBar(99, 10)).toBe("[██████████]"); + }); + test("handles zero total safely", () => { + expect(buildProgressBar(0, 0)).toBe("[░░░░░░░░░░]"); + }); +}); + +describe("terminalEmoji", () => { + test("maps every known status", () => { + expect(terminalEmoji("done")).toBe(":white_check_mark:"); + expect(terminalEmoji("stopped")).toBe(":octagonal_sign:"); + expect(terminalEmoji("budget_exceeded")).toBe(":warning:"); + expect(terminalEmoji("failed")).toBe(":x:"); + expect(terminalEmoji("running")).toBe(":repeat:"); + }); +}); + +describe("LoopNotifier", () => { + let db: Database; + let store: LoopStore; + + beforeEach(() => { + db = new Database(":memory:"); + db.run("PRAGMA journal_mode = WAL"); + runMigrations(db); + store = new LoopStore(db); + }); + + afterEach(() => { + db.close(); + }); + + describe("postStartNotice", () => { + test("no-ops when slackChannel is null", async () => { + const notifier = new LoopNotifier(null, store); + await notifier.postStartNotice(makeLoop()); + // Nothing to assert beyond "did not throw"; the null guard is the + // whole point. + expect(true).toBe(true); + }); + + test("no-ops when loop.channelId is null", async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postStartNotice(makeLoop({ channelId: null })); + expect(slack.postToChannel).not.toHaveBeenCalled(); + expect(slack.addReaction).not.toHaveBeenCalled(); + }); + + test("posts, persists ts, attaches stop button, stamps start reaction", async () => { + // Insert a real row so setStatusMessageTs can UPDATE it. + const loop = store.insert({ + id: "abcdef0123456789", + goal: "g", + workspaceDir: "/tmp/ws", + stateFile: "/tmp/ws/state.md", + successCommand: null, + maxIterations: 10, + maxCostUsd: 5, + channelId: "C100", + conversationId: "1700000000.000100", + triggerMessageTs: "1700000000.000200", + }); + + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postStartNotice(loop); + + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + const [channel, text, threadTs] = slack.postToChannel.mock.calls[0]; + expect(channel).toBe("C100"); + expect(text).toContain("Starting loop"); + expect(threadTs).toBe("1700000000.000100"); + + // Stop button attached via updateMessage with blocks + expect(slack.updateMessage).toHaveBeenCalledTimes(1); + const updateArgs = slack.updateMessage.mock.calls[0]; + expect(updateArgs[0]).toBe("C100"); + expect(updateArgs[3]).toBeDefined(); // blocks array + const blocks = updateArgs[3] as Array>; + const actionsBlock = blocks.find((b) => b.type === "actions"); + expect(actionsBlock).toBeDefined(); + + // Reaction stamped on the operator's trigger message + expect(slack.addReaction).toHaveBeenCalledWith("C100", "1700000000.000200", "hourglass_flowing_sand"); + + // Persisted status_message_ts round-trips back through findById + const reloaded = store.findById(loop.id); + expect(reloaded?.statusMessageTs).toBe("1700000000.100100"); + }); + + test("skips reaction when triggerMessageTs is null", async () => { + const loop = store.insert({ + id: "abcdef0123456789", + goal: "g", + workspaceDir: "/tmp/ws", + stateFile: "/tmp/ws/state.md", + successCommand: null, + maxIterations: 10, + maxCostUsd: 5, + channelId: "C100", + conversationId: null, + triggerMessageTs: null, + }); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postStartNotice(loop); + expect(slack.postToChannel).toHaveBeenCalled(); + expect(slack.addReaction).not.toHaveBeenCalled(); + }); + }); + + describe("postTickUpdate", () => { + function insertWithStatusTs(overrides: { triggerMessageTs?: string | null; iteration?: number } = {}) { + const row = store.insert({ + id: "abcdef0123456789", + goal: "g", + workspaceDir: "/tmp/ws", + stateFile: "/tmp/ws/state.md", + successCommand: null, + maxIterations: 10, + maxCostUsd: 5, + channelId: "C100", + conversationId: "1700000000.000100", + triggerMessageTs: overrides.triggerMessageTs ?? "1700000000.000200", + }); + store.setStatusMessageTs(row.id, "1700000000.100100"); + if (overrides.iteration) store.recordTick(row.id, overrides.iteration, 0); + const reloaded = store.findById(row.id); + if (!reloaded) throw new Error("failed to reload"); + return reloaded; + } + + test("edits the status message with a progress bar and cost", async () => { + const loop = insertWithStatusTs(); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postTickUpdate(loop.id, 3, "in-progress"); + + expect(slack.updateMessage).toHaveBeenCalledTimes(1); + const [ch, ts, text] = slack.updateMessage.mock.calls[0]; + expect(ch).toBe("C100"); + expect(ts).toBe("1700000000.100100"); + expect(text).toContain("3/10"); + expect(text).toContain("abcdef01"); + expect(text).toMatch(/\[█+░+\]/); + expect(text).toContain("in-progress"); + }); + + test("re-sends blocks on every tick edit so the Stop button persists", async () => { + // Regression test: Slack's chat.update replaces the entire message + // and drops blocks the caller does not include. Without passing + // blocks on tick updates, the Stop button would disappear after + // the first tick edit. Verify the button survives. + insertWithStatusTs(); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postTickUpdate("abcdef0123456789", 2, "in-progress"); + + expect(slack.updateMessage).toHaveBeenCalledTimes(1); + const updateArgs = slack.updateMessage.mock.calls[0]; + const blocks = updateArgs[3] as Array> | undefined; + expect(blocks).toBeDefined(); + const actionsBlock = blocks?.find((b) => b.type === "actions"); + expect(actionsBlock).toBeDefined(); + const elements = (actionsBlock as { elements: Array> }).elements; + expect(elements[0].action_id).toBe("phantom:loop_stop:abcdef0123456789"); + }); + + test("swaps hourglass → cycle on the first tick", async () => { + insertWithStatusTs(); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postTickUpdate("abcdef0123456789", 1, "in-progress"); + + expect(slack.removeReaction).toHaveBeenCalledWith("C100", "1700000000.000200", "hourglass_flowing_sand"); + expect(slack.addReaction).toHaveBeenCalledWith("C100", "1700000000.000200", "arrows_counterclockwise"); + }); + + test("does not swap reactions on tick 2+", async () => { + insertWithStatusTs(); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postTickUpdate("abcdef0123456789", 2, "in-progress"); + + expect(slack.removeReaction).not.toHaveBeenCalled(); + expect(slack.addReaction).not.toHaveBeenCalled(); + }); + + test("no-ops when statusMessageTs is not yet set", async () => { + store.insert({ + id: "abcdef0123456789", + goal: "g", + workspaceDir: "/tmp/ws", + stateFile: "/tmp/ws/state.md", + successCommand: null, + maxIterations: 10, + maxCostUsd: 5, + channelId: "C100", + conversationId: null, + triggerMessageTs: "1700000000.000200", + }); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postTickUpdate("abcdef0123456789", 1, "in-progress"); + expect(slack.updateMessage).not.toHaveBeenCalled(); + }); + }); + + describe("postFinalNotice", () => { + const cases: Array<{ status: LoopStatus; reaction: string }> = [ + { status: "done", reaction: "white_check_mark" }, + { status: "stopped", reaction: "octagonal_sign" }, + { status: "budget_exceeded", reaction: "warning" }, + { status: "failed", reaction: "x" }, + ]; + + for (const { status, reaction } of cases) { + test(`stamps terminal reaction for status=${status}`, async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ statusMessageTs: "1700000000.100100", status }), status); + const addCalls = slack.addReaction.mock.calls.map((c: unknown[]) => c[2]); + expect(addCalls).toContain(reaction); + // Both in-flight reactions best-effort removed + const removeCalls = slack.removeReaction.mock.calls.map((c: unknown[]) => c[2]); + expect(removeCalls).toContain("hourglass_flowing_sand"); + expect(removeCalls).toContain("arrows_counterclockwise"); + }); + } + + test("edits existing status message when set", async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ statusMessageTs: "1700000000.100100" }), "done"); + expect(slack.updateMessage).toHaveBeenCalledTimes(1); + expect(slack.postToChannel).not.toHaveBeenCalled(); + }); + + test("posts new message when statusMessageTs is null", async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ statusMessageTs: null }), "done"); + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + expect(slack.updateMessage).not.toHaveBeenCalled(); + }); + + test("no-ops when triggerMessageTs is null", async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice( + makeLoop({ statusMessageTs: "1700000000.100100", triggerMessageTs: null }), + "done", + ); + expect(slack.addReaction).not.toHaveBeenCalled(); + expect(slack.removeReaction).not.toHaveBeenCalled(); + }); + + describe("state summary thread reply", () => { + let workDir: string; + + beforeEach(() => { + workDir = mkdtempSync(join(tmpdir(), "loop-notifier-summary-")); + }); + + afterEach(() => { + rmSync(workDir, { recursive: true, force: true }); + }); + + function writeStateFile(body: string): string { + const stateFile = join(workDir, "state.md"); + mkdirSync(workDir, { recursive: true }); + writeFileSync(stateFile, `---\nloop_id: abc\nstatus: done\niteration: 3\n---\n\n${body}\n`, "utf-8"); + return stateFile; + } + + test("posts the state.md body as a threaded reply on completion", async () => { + const stateFile = writeStateFile("# Progress\n- Tick 1: Hello!\n- Tick 2: Hello!\n- Tick 3: Hello!"); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ stateFile, statusMessageTs: "1700000000.100100" }), "done"); + + // The status message edit is one call; the summary is a second + // postToChannel call in the same thread. + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + const [channel, text, threadTs] = slack.postToChannel.mock.calls[0]; + expect(channel).toBe("C100"); + expect(text).toContain("Tick 1: Hello!"); + expect(text).toContain("Tick 3: Hello!"); + expect(text).toContain("final state"); + // Frontmatter must be stripped + expect(text).not.toContain("loop_id: abc"); + expect(text).not.toContain("iteration: 3"); + // Posted in the same thread as the original turn + expect(threadTs).toBe("1700000000.000100"); + }); + + test("falls back to status_message_ts when conversationId is null", async () => { + const stateFile = writeStateFile("# Progress\n- done"); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice( + makeLoop({ + stateFile, + statusMessageTs: "1700000000.100100", + conversationId: null, + }), + "done", + ); + + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + const threadTs = slack.postToChannel.mock.calls[0][2]; + expect(threadTs).toBe("1700000000.100100"); + }); + + test("silently skips summary when state file does not exist", async () => { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice( + makeLoop({ stateFile: "/nonexistent/path/state.md", statusMessageTs: "1700000000.100100" }), + "done", + ); + // The terminal reaction path still runs, but no summary post. + expect(slack.postToChannel).not.toHaveBeenCalled(); + }); + + test("silently skips summary when body is empty", async () => { + const stateFile = writeStateFile(""); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ stateFile, statusMessageTs: "1700000000.100100" }), "done"); + expect(slack.postToChannel).not.toHaveBeenCalled(); + }); + + test("truncates very long summaries", async () => { + // 5000 chars of body, well over the 3500 cap + const body = "x".repeat(5000); + const stateFile = writeStateFile(body); + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ stateFile, statusMessageTs: "1700000000.100100" }), "done"); + const text = slack.postToChannel.mock.calls[0][1] as string; + expect(text).toContain("…(truncated)"); + // Total posted text must be bounded by 3500 chars of body + small + // amount of surrounding formatting, so under ~3700. + expect(text.length).toBeLessThan(3800); + }); + + test("summary also fires for stopped/failed/budget_exceeded outcomes", async () => { + const stateFile = writeStateFile("# Progress\n- partial work"); + for (const status of ["stopped", "failed", "budget_exceeded"] as const) { + const slack = makeSlack(); + const notifier = new LoopNotifier(asSlack(slack), store); + await notifier.postFinalNotice(makeLoop({ stateFile, statusMessageTs: "1700000000.100100", status }), status); + expect(slack.postToChannel).toHaveBeenCalledTimes(1); + expect(slack.postToChannel.mock.calls[0][1]).toContain("partial work"); + } + }); + }); + }); +}); diff --git a/src/loop/__tests__/runner.test.ts b/src/loop/__tests__/runner.test.ts index d24a3f8..ee1ffb4 100644 --- a/src/loop/__tests__/runner.test.ts +++ b/src/loop/__tests__/runner.test.ts @@ -97,6 +97,30 @@ describe("LoopRunner", () => { expect(loop.maxCostUsd).toBe(50); }); + test("triggerMessageTs round-trips through start → store → findById", () => { + const runtime = createMockRuntime(); + const runner = new LoopRunner({ db, runtime: runtime, dataDir, autoSchedule: false }); + const loop = runner.start({ + goal: "with trigger", + channelId: "C100", + conversationId: "1700000000.000100", + triggerMessageTs: "1700000000.000200", + }); + expect(loop.triggerMessageTs).toBe("1700000000.000200"); + + const reloaded = runner.getLoop(loop.id); + expect(reloaded?.triggerMessageTs).toBe("1700000000.000200"); + expect(reloaded?.channelId).toBe("C100"); + expect(reloaded?.conversationId).toBe("1700000000.000100"); + }); + + test("triggerMessageTs is null when omitted at start", () => { + const runtime = createMockRuntime(); + const runner = new LoopRunner({ db, runtime: runtime, dataDir, autoSchedule: false }); + const loop = runner.start({ goal: "no trigger" }); + expect(loop.triggerMessageTs).toBeNull(); + }); + test("tick invokes runtime with loop channel and rotating conversation ids", async () => { const runtime = createMockRuntime(); const runner = new LoopRunner({ db, runtime: runtime, dataDir, autoSchedule: false }); diff --git a/src/loop/__tests__/tool.test.ts b/src/loop/__tests__/tool.test.ts index 5315b98..b29958b 100644 --- a/src/loop/__tests__/tool.test.ts +++ b/src/loop/__tests__/tool.test.ts @@ -3,6 +3,7 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { slackContextStore } from "../../agent/slack-context.ts"; import { runMigrations } from "../../db/migrate.ts"; import { LoopRunner } from "../runner.ts"; import { LOOP_TOOL_NAME, createLoopToolServer } from "../tool.ts"; @@ -108,4 +109,71 @@ describe("phantom_loop MCP tool", () => { const body = parseResult(result); expect(body.count).toBeGreaterThanOrEqual(2); }); + + describe("slackContextStore fallback", () => { + test("start fills channel/thread/trigger from context when args omitted", async () => { + const result = await slackContextStore.run( + { + slackChannelId: "C42", + slackThreadTs: "1700000000.000100", + slackMessageTs: "1700000000.000200", + }, + () => handler({ action: "start", goal: "from context" }), + ); + const { loop } = parseResult(result); + // triggerMessageTs is intentionally not exposed in serializeLoop, + // so read back through the runner directly. + const stored = runner.getLoop(loop.id); + expect(stored?.channelId).toBe("C42"); + expect(stored?.conversationId).toBe("1700000000.000100"); + expect(stored?.triggerMessageTs).toBe("1700000000.000200"); + }); + + test("explicit args override context", async () => { + const result = await slackContextStore.run( + { + slackChannelId: "C_CTX", + slackThreadTs: "1700000000.000100", + slackMessageTs: "1700000000.000200", + }, + () => + handler({ + action: "start", + goal: "explicit wins", + channel_id: "C_EXPLICIT", + conversation_id: "1800000000.000100", + trigger_message_ts: "1800000000.000200", + }), + ); + const { loop } = parseResult(result); + const stored = runner.getLoop(loop.id); + expect(stored?.channelId).toBe("C_EXPLICIT"); + expect(stored?.conversationId).toBe("1800000000.000100"); + expect(stored?.triggerMessageTs).toBe("1800000000.000200"); + }); + + test("missing context leaves fields null without crashing", async () => { + // No slackContextStore.run wrapper here. + const result = await handler({ action: "start", goal: "no context" }); + const { loop } = parseResult(result); + const stored = runner.getLoop(loop.id); + expect(stored?.channelId).toBeNull(); + expect(stored?.conversationId).toBeNull(); + expect(stored?.triggerMessageTs).toBeNull(); + }); + + test("serializeLoop does not expose triggerMessageTs to the agent", async () => { + const result = await slackContextStore.run( + { + slackChannelId: "C42", + slackThreadTs: "1700000000.000100", + slackMessageTs: "1700000000.000200", + }, + () => handler({ action: "start", goal: "hidden field" }), + ); + const body = parseResult(result); + expect(body.loop.trigger_message_ts).toBeUndefined(); + expect(body.loop.triggerMessageTs).toBeUndefined(); + }); + }); }); diff --git a/src/loop/notifications.ts b/src/loop/notifications.ts new file mode 100644 index 0000000..ced2119 --- /dev/null +++ b/src/loop/notifications.ts @@ -0,0 +1,203 @@ +import type { SlackBlock } from "../channels/feedback.ts"; +import type { SlackChannel } from "../channels/slack.ts"; +import { readStateFile } from "./state-file.ts"; +import type { LoopStore } from "./store.ts"; +import type { Loop, LoopStatus } from "./types.ts"; + +const PROGRESS_BAR_CELLS = 10; + +// Single source of truth for status → emoji. Bare names (no colons) because +// the Slack reactions.add/remove APIs take bare names; the status-message +// text wraps them with colons via `terminalEmoji()`. Keeping both formats +// derived from one map eliminates the silent drift risk when a new terminal +// status is added. +const TERMINAL_REACTION: Partial> = { + done: "white_check_mark", + stopped: "octagonal_sign", + budget_exceeded: "warning", + failed: "x", +}; + +const REACTION_START = "hourglass_flowing_sand"; +const REACTION_IN_FLIGHT = "arrows_counterclockwise"; + +const IN_FLIGHT_REACTIONS = [REACTION_START, REACTION_IN_FLIGHT] as const; + +function terminalReaction(status: LoopStatus): string | null { + return TERMINAL_REACTION[status] ?? null; +} + +export function buildProgressBar(done: number, total: number): string { + if (total <= 0) return `[${"░".repeat(PROGRESS_BAR_CELLS)}]`; + const clamped = Math.max(0, Math.min(done, total)); + const filled = Math.round((clamped / total) * PROGRESS_BAR_CELLS); + const empty = PROGRESS_BAR_CELLS - filled; + return `[${"█".repeat(filled)}${"░".repeat(empty)}]`; +} + +export function terminalEmoji(status: LoopStatus): string { + const reaction = TERMINAL_REACTION[status]; + if (reaction) return `:${reaction}:`; + // Non-terminal statuses still need a glyph for the running-state text. + return status === "running" ? ":repeat:" : ":grey_question:"; +} + +function truncate(text: string, max: number): string { + return text.length <= max ? text : `${text.slice(0, max - 1)}…`; +} + +/** + * Status message blocks: one section for the current text plus a Stop button. + * These must be re-sent on every updateMessage call, because Slack's chat.update + * replaces the message wholesale and drops any blocks the caller does not + * include. Passing this on tick updates is how the Stop button survives across + * progress edits. The final notice deliberately omits blocks so the button + * disappears on completion. + */ +function buildStatusBlocks(text: string, loopId: string): SlackBlock[] { + return [ + { type: "section", text: { type: "mrkdwn", text } }, + { + type: "actions", + block_id: `phantom_loop_actions_${loopId}`, + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Stop loop", emoji: true }, + action_id: `phantom:loop_stop:${loopId}`, + style: "danger", + value: loopId, + }, + ], + }, + ]; +} + +const FRONTMATTER_RE = /^---\s*\n[\s\S]*?\n---\s*\n?/; +const MAX_SUMMARY_CHARS = 3500; + +/** + * Extract the human-readable body of the state file for the end-of-loop + * summary. Drops the YAML frontmatter (runner plumbing) and truncates at a + * safe limit so a runaway state file does not blow out a Slack message. + * Returns null if the file is unreadable or effectively empty, which signals + * the caller to skip the summary cleanly. + */ +function extractStateSummary(stateFilePath: string): string | null { + try { + const contents = readStateFile(stateFilePath); + const body = contents.replace(FRONTMATTER_RE, "").trim(); + if (!body) return null; + if (body.length <= MAX_SUMMARY_CHARS) return body; + return `${body.slice(0, MAX_SUMMARY_CHARS)}\n\n…(truncated)`; + } catch { + return null; + } +} + +/** + * Slack feedback for the loop lifecycle: start notice, per-tick progress + * edit, final notice, and a reaction ladder on the operator's original + * message (hourglass → cycle → terminal emoji). + * + * Extracted from LoopRunner because runner.ts was already at the 300-line + * CONTRIBUTING.md cap and the progress-bar + reaction-ladder additions push + * it over. All Slack-API failures are swallowed upstream in SlackChannel; + * if a call-site here still throws, we catch and warn so loop execution is + * never derailed by chat plumbing. + * + * Why not reuse createStatusReactionController: that controller debounces + * per-tool-call runtime events via a promise-chain serializer. The loop + * ladder has exactly three sequential lifecycle states (start, first tick, + * terminal), no debouncing is required, and wiring it into the controller + * would entangle two unrelated lifecycles. Plain best-effort + * addReaction/removeReaction is the right choice here. + */ +export class LoopNotifier { + constructor( + private slackChannel: SlackChannel | null, + private store: LoopStore, + ) {} + + async postStartNotice(loop: Loop): Promise { + if (!this.slackChannel || !loop.channelId) return; + const text = `:repeat: Starting loop \`${loop.id.slice(0, 8)}\` (max ${loop.maxIterations} iter, $${loop.maxCostUsd.toFixed(2)} budget)\n> ${truncate(loop.goal, 200)}`; + // When conversationId (a Slack thread ts) is set, thread the updates into it; + // otherwise post a top-level message in the channel. + const ts = await this.slackChannel.postToChannel(loop.channelId, text, loop.conversationId ?? undefined); + if (!ts) return; + this.store.setStatusMessageTs(loop.id, ts); + + // Attach the stop button so the operator can interrupt without using MCP. + // Routed via setLoopStopHandler in slack-actions.ts. + await this.slackChannel.updateMessage(loop.channelId, ts, text, buildStatusBlocks(text, loop.id)); + + if (loop.triggerMessageTs) { + await this.slackChannel.addReaction(loop.channelId, loop.triggerMessageTs, REACTION_START); + } + } + + async postTickUpdate(id: string, iteration: number, status: string): Promise { + const loop = this.store.findById(id); + if (!loop || !this.slackChannel || !loop.channelId || !loop.statusMessageTs) return; + + const bar = buildProgressBar(iteration, loop.maxIterations); + const shortId = loop.id.slice(0, 8); + const text = `:repeat: Loop \`${shortId}\` · ${bar} ${iteration}/${loop.maxIterations} · $${loop.totalCostUsd.toFixed(2)}/$${loop.maxCostUsd.toFixed(2)} · ${status}`; + // Re-send the blocks on every edit, otherwise Slack strips the Stop + // button (chat.update replaces the entire message, including blocks). + await this.slackChannel.updateMessage(loop.channelId, loop.statusMessageTs, text, buildStatusBlocks(text, loop.id)); + + // On the first tick, swap hourglass → cycling arrows. Restart-safe by + // construction: iteration is sourced from the call site, so on resume + // the swap only fires if the loop is actually transitioning through + // iteration 1, no in-memory flag to repopulate. + if (iteration === 1 && loop.triggerMessageTs) { + await this.slackChannel.removeReaction(loop.channelId, loop.triggerMessageTs, REACTION_START); + await this.slackChannel.addReaction(loop.channelId, loop.triggerMessageTs, REACTION_IN_FLIGHT); + } + } + + async postFinalNotice(loop: Loop, status: LoopStatus): Promise { + if (!this.slackChannel || !loop.channelId) return; + const emoji = terminalEmoji(status); + const bar = buildProgressBar(loop.iterationCount, loop.maxIterations); + const shortId = loop.id.slice(0, 8); + const text = `${emoji} Loop \`${shortId}\` · ${bar} ${loop.iterationCount}/${loop.maxIterations} · $${loop.totalCostUsd.toFixed(4)} · ${status}`; + // Intentionally no blocks on the terminal edit: this strips the Stop + // button since the loop is no longer interruptible. + if (loop.statusMessageTs) { + await this.slackChannel.updateMessage(loop.channelId, loop.statusMessageTs, text); + } else { + await this.slackChannel.postToChannel(loop.channelId, text); + } + + // Post the state.md body as a threaded reply so the operator can see + // what the agent actually did across the run. The state file is the + // agent's working memory, curated every tick, so it already contains + // a progress log the operator wants to read. This costs no extra + // agent calls; we simply surface content the agent already wrote. + const summary = extractStateSummary(loop.stateFile); + if (summary) { + const summaryThreadTs = loop.conversationId ?? loop.statusMessageTs ?? undefined; + await this.slackChannel.postToChannel( + loop.channelId, + `:notebook: *Loop \`${loop.id.slice(0, 8)}\` final state:*\n\`\`\`\n${summary}\n\`\`\``, + summaryThreadTs, + ); + } + + if (loop.triggerMessageTs) { + // Best-effort: clear whichever in-flight reaction is currently on + // the message (removeReaction is idempotent on missing), then stamp + // the terminal one. + for (const reaction of IN_FLIGHT_REACTIONS) { + await this.slackChannel.removeReaction(loop.channelId, loop.triggerMessageTs, reaction); + } + const terminal = terminalReaction(status); + if (terminal) { + await this.slackChannel.addReaction(loop.channelId, loop.triggerMessageTs, terminal); + } + } + } +} diff --git a/src/loop/runner.ts b/src/loop/runner.ts index 94b876b..d2ced99 100644 --- a/src/loop/runner.ts +++ b/src/loop/runner.ts @@ -4,6 +4,7 @@ import { join, relative, resolve } from "node:path"; import type { AgentRuntime } from "../agent/runtime.ts"; import type { SlackChannel } from "../channels/slack.ts"; import { buildSafeEnv } from "../mcp/dynamic-handlers.ts"; +import { LoopNotifier } from "./notifications.ts"; import { buildTickPrompt } from "./prompt.ts"; import { initStateFile, parseFrontmatter, readStateFile } from "./state-file.ts"; import { LoopStore } from "./store.ts"; @@ -61,6 +62,7 @@ export class LoopRunner { private dataDir: string; private autoSchedule: boolean; private inFlight = new Set(); + private notifier: LoopNotifier; constructor(deps: RunnerDeps) { this.store = new LoopStore(deps.db); @@ -68,10 +70,12 @@ export class LoopRunner { this.slackChannel = deps.slackChannel; this.dataDir = deps.dataDir ?? resolve(process.cwd(), "data"); this.autoSchedule = deps.autoSchedule ?? true; + this.notifier = new LoopNotifier(this.slackChannel ?? null, this.store); } setSlackChannel(channel: SlackChannel): void { this.slackChannel = channel; + this.notifier = new LoopNotifier(channel, this.store); } /** @@ -111,9 +115,10 @@ export class LoopRunner { maxCostUsd, channelId: input.channelId ?? null, conversationId: input.conversationId ?? null, + triggerMessageTs: input.triggerMessageTs ?? null, }); - this.postStartNotice(loop).catch((err: unknown) => { + this.notifier.postStartNotice(loop).catch((err: unknown) => { const msg = err instanceof Error ? err.message : String(err); console.warn(`[loop] Failed to post start notice for ${id}: ${msg}`); }); @@ -200,10 +205,17 @@ export class LoopRunner { } } - this.postTickUpdate(id, nextIteration, frontmatter?.status ?? "in-progress").catch((err: unknown) => { + // Await the tick update so its Slack write finishes before the next + // tick can start (and potentially finalize). Without this, a stop on + // tick N+1 can race: postFinalNotice strips the Stop button, then the + // fire-and-forget postTickUpdate from tick N resolves and re-sends the + // blocks, making the button reappear on a finalized message. + try { + await this.notifier.postTickUpdate(id, nextIteration, frontmatter?.status ?? "in-progress"); + } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); console.warn(`[loop] Failed to post tick update for ${id}: ${msg}`); - }); + } this.scheduleTick(id); } finally { @@ -231,73 +243,13 @@ export class LoopRunner { private finalize(id: string, status: LoopStatus, error: string | null): void { const loop = this.store.finalize(id, status, error); if (!loop) return; - this.postFinalNotice(loop, status).catch((err: unknown) => { + this.notifier.postFinalNotice(loop, status).catch((err: unknown) => { const msg = err instanceof Error ? err.message : String(err); console.warn(`[loop] Failed to post final notice for ${id}: ${msg}`); }); } - - private async postStartNotice(loop: Loop): Promise { - if (!this.slackChannel || !loop.channelId) return; - const text = `:repeat: Starting loop \`${loop.id.slice(0, 8)}\` (max ${loop.maxIterations} iter, $${loop.maxCostUsd.toFixed(2)} budget)\n> ${truncate(loop.goal, 200)}`; - // When conversationId (a Slack thread ts) is set, thread the updates into it; - // otherwise post a top-level message in the channel. - const ts = await this.slackChannel.postToChannel(loop.channelId, text, loop.conversationId ?? undefined); - if (!ts) return; - this.store.setStatusMessageTs(loop.id, ts); - - // Attach a stop button so the operator can interrupt without using MCP. - // Routed via setLoopStopHandler in slack-actions.ts. - const blocks = [ - { type: "section", text: { type: "mrkdwn", text } }, - { - type: "actions", - block_id: `phantom_loop_actions_${loop.id}`, - elements: [ - { - type: "button", - text: { type: "plain_text", text: "Stop loop", emoji: true }, - action_id: `phantom:loop_stop:${loop.id}`, - style: "danger", - value: loop.id, - }, - ], - }, - ]; - await this.slackChannel.updateMessage(loop.channelId, ts, text, blocks); - } - - private async postTickUpdate(id: string, iteration: number, status: string): Promise { - const loop = this.store.findById(id); - if (!loop || !this.slackChannel || !loop.channelId || !loop.statusMessageTs) return; - const text = `:repeat: Loop \`${loop.id.slice(0, 8)}\` iteration ${iteration}/${loop.maxIterations} - ${status} ($${loop.totalCostUsd.toFixed(4)} of $${loop.maxCostUsd.toFixed(2)})`; - await this.slackChannel.updateMessage(loop.channelId, loop.statusMessageTs, text); - } - - private async postFinalNotice(loop: Loop, status: LoopStatus): Promise { - if (!this.slackChannel || !loop.channelId) return; - const emoji = FINAL_EMOJI[status] ?? ":grey_question:"; - const text = `${emoji} Loop \`${loop.id.slice(0, 8)}\` finished (${status}) after ${loop.iterationCount} iterations, $${loop.totalCostUsd.toFixed(4)} spent`; - if (loop.statusMessageTs) { - await this.slackChannel.updateMessage(loop.channelId, loop.statusMessageTs, text); - } else { - await this.slackChannel.postToChannel(loop.channelId, text); - } - } } -const FINAL_EMOJI: Record = { - running: ":repeat:", - done: ":white_check_mark:", - stopped: ":octagonal_sign:", - budget_exceeded: ":warning:", - failed: ":x:", -}; - function clamp(value: number, min: number, max: number): number { return Math.min(Math.max(value, min), max); } - -function truncate(text: string, max: number): string { - return text.length <= max ? text : `${text.slice(0, max - 1)}…`; -} diff --git a/src/loop/store.ts b/src/loop/store.ts index 930265e..9f53000 100644 --- a/src/loop/store.ts +++ b/src/loop/store.ts @@ -11,6 +11,7 @@ export type LoopInsertInput = { maxCostUsd: number; channelId: string | null; conversationId: string | null; + triggerMessageTs: string | null; }; /** @@ -22,8 +23,8 @@ export class LoopStore { insert(input: LoopInsertInput): Loop { this.db.run( - `INSERT INTO loops (id, goal, workspace_dir, state_file, success_command, max_iterations, max_cost_usd, status, channel_id, conversation_id) - VALUES (?, ?, ?, ?, ?, ?, ?, 'running', ?, ?)`, + `INSERT INTO loops (id, goal, workspace_dir, state_file, success_command, max_iterations, max_cost_usd, status, channel_id, conversation_id, trigger_message_ts) + VALUES (?, ?, ?, ?, ?, ?, ?, 'running', ?, ?, ?)`, [ input.id, input.goal, @@ -34,6 +35,7 @@ export class LoopStore { input.maxCostUsd, input.channelId, input.conversationId, + input.triggerMessageTs, ], ); const created = this.findById(input.id); diff --git a/src/loop/tool.ts b/src/loop/tool.ts index 0feb167..9c86437 100644 --- a/src/loop/tool.ts +++ b/src/loop/tool.ts @@ -1,6 +1,7 @@ import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk"; import type { McpSdkServerConfigWithInstance } from "@anthropic-ai/claude-agent-sdk"; import { z } from "zod"; +import { slackContextStore } from "../agent/slack-context.ts"; import type { LoopRunner } from "./runner.ts"; import { parseFrontmatter, readStateFile } from "./state-file.ts"; import type { Loop } from "./types.ts"; @@ -74,6 +75,7 @@ regression". Each iteration is fresh - all context must live in the state file.` success_command: z.string().optional(), channel_id: z.string().optional(), conversation_id: z.string().optional(), + trigger_message_ts: z.string().optional(), loop_id: z.string().optional().describe("Loop ID (required for status and stop)"), include_finished: z.boolean().optional().describe("For list: include terminated loops"), }, @@ -82,14 +84,19 @@ regression". Each iteration is fresh - all context must live in the state file.` switch (input.action) { case "start": { if (!input.goal) return err("goal is required for start"); + // Explicit tool arguments always win. When the agent omits + // channel/thread plumbing, fall back to the Slack context + // captured by the router for the current turn. + const ctx = slackContextStore.getStore(); const loop = runner.start({ goal: input.goal, workspace: input.workspace, maxIterations: input.max_iterations, maxCostUsd: input.max_cost_usd, successCommand: input.success_command, - channelId: input.channel_id, - conversationId: input.conversation_id, + channelId: input.channel_id ?? ctx?.slackChannelId, + conversationId: input.conversation_id ?? ctx?.slackThreadTs, + triggerMessageTs: input.trigger_message_ts ?? ctx?.slackMessageTs, }); return ok({ started: true, loop: serializeLoop(loop) }); } diff --git a/src/loop/types.ts b/src/loop/types.ts index cdb5abc..d30249e 100644 --- a/src/loop/types.ts +++ b/src/loop/types.ts @@ -16,6 +16,7 @@ export type Loop = { channelId: string | null; conversationId: string | null; statusMessageTs: string | null; + triggerMessageTs: string | null; interruptRequested: boolean; lastError: string | null; startedAt: string; @@ -37,6 +38,7 @@ export type LoopRow = { channel_id: string | null; conversation_id: string | null; status_message_ts: string | null; + trigger_message_ts: string | null; interrupt_requested: number; last_error: string | null; started_at: string; @@ -58,6 +60,7 @@ export type LoopStartInput = { successCommand?: string; channelId?: string; conversationId?: string; + triggerMessageTs?: string; }; // Hard ceilings the agent cannot raise. Caller-provided values are clamped. @@ -74,6 +77,7 @@ export const LoopStartInputSchema = z.object({ success_command: z.string().optional(), channel_id: z.string().optional(), conversation_id: z.string().optional(), + trigger_message_ts: z.string().optional(), }); export const LoopIdSchema = z.object({ loop_id: z.string().min(1) }); @@ -95,6 +99,7 @@ export function rowToLoop(row: LoopRow): Loop { channelId: row.channel_id, conversationId: row.conversation_id, statusMessageTs: row.status_message_ts, + triggerMessageTs: row.trigger_message_ts, interruptRequested: row.interrupt_requested === 1, lastError: row.last_error, startedAt: row.started_at, From 29c32a80daf87518fa65513319bf69e84b2a391e Mon Sep 17 00:00:00 2001 From: electronicBlacksmith Date: Mon, 6 Apr 2026 23:01:02 +0000 Subject: [PATCH 2/2] docs: add phantom_loop documentation for upstream PR Covers MCP tool parameters, state file contract, tick lifecycle, Slack integration, mid-loop critique, post-loop evolution pipeline, memory context injection, and tips for writing effective goals. Closes #12 --- CLAUDE.md | 1 + docs/loop.md | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 docs/loop.md diff --git a/CLAUDE.md b/CLAUDE.md index f5d3cd7..91091c3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -222,3 +222,4 @@ Production deployments are managed internally. Do NOT modify production deployme - [Self-Evolution](docs/self-evolution.md) - The 6-step reflection pipeline - [Security](docs/security.md) - Auth, secrets, permissions, and hardening - [Roles](docs/roles.md) - Customizing the agent's specialization +- [Loop](docs/loop.md) - Autonomous iteration primitive (phantom_loop) diff --git a/docs/loop.md b/docs/loop.md new file mode 100644 index 0000000..37cee17 --- /dev/null +++ b/docs/loop.md @@ -0,0 +1,173 @@ +# Loop + +Phantom loop is an autonomous iteration primitive. The agent runs repeatedly against a goal, each tick in a fresh SDK session, with a markdown state file as the only contract between ticks. Budgets, mid-loop critique, Slack feedback, and post-loop learning are all built in. + +## Overview + +Regular sessions are conversational: the operator sends a message, the agent responds, back and forth. Loops are different. The operator defines a goal and a budget, then walks away. The runner drives ticks automatically until the goal is met or a budget is hit. + +Use loops for long-horizon tasks where the agent should grind autonomously: +- "Keep refactoring until tests pass" +- "Iterate on this design doc until the reviewer approves" +- "Bisect this regression across the last 50 commits" + +## MCP Tool + +The `phantom_loop` tool exposes four actions: `start`, `status`, `stop`, `list`. + +### Start Parameters + +| Parameter | Default | Ceiling | Description | +|-----------|---------|---------|-------------| +| `goal` (required) | - | 10,000 chars | What the loop should achieve | +| `workspace` | `data/loops//` | - | Working directory for the agent | +| `max_iterations` | 20 | 200 | Maximum ticks before budget termination | +| `max_cost_usd` | 5 | 50 | Maximum total cost before budget termination | +| `checkpoint_interval` | off | 200 | Run a Sonnet critique every N ticks (0 = disabled) | +| `success_command` | off | - | Shell command run after each tick; exit 0 = done | +| `channel_id` | auto | - | Slack channel for status updates | +| `conversation_id` | auto | - | Slack thread for threading updates | +| `trigger_message_ts` | auto | - | Slack message timestamp for reaction ladder | + +When started from Slack, `channel_id`, `conversation_id`, and `trigger_message_ts` are auto-filled from the originating message context. Explicit tool arguments always take precedence. + +### Other Actions + +- **status**: Returns the loop row, parsed state file frontmatter, and the first 40 lines of the state file. +- **stop**: Sets an interrupt flag. The loop stops gracefully before the next tick. +- **list**: Returns active loops. Pass `include_finished: true` for recent history. + +## State File + +The state file (`state.md` in the workspace) is the loop's memory across ticks. It has YAML frontmatter that the runner inspects for control flow, and a markdown body that belongs entirely to the agent. + +### Frontmatter + +```yaml +--- +loop_id: +status: in-progress # in-progress | done | blocked +iteration: 3 +--- +``` + +The runner acts on `done` (finalize immediately) and `blocked` (continue, but the agent should explain in Notes). Everything else is treated as `in-progress`. + +### Body Sections + +```markdown +# Goal +Keep refactoring src/auth until all 47 tests pass. + +# Progress +- Tick 1: Fixed the missing import in auth/middleware.ts +- Tick 2: Updated the session type to include refreshToken +- Tick 3: Fixed the mock in auth.test.ts, 44/47 tests passing + +# Next Action +The remaining 3 failures are all in auth/oauth.test.ts. Read the test file, +identify the common cause, and fix it. + +# Notes +(empty) +``` + +The agent reads Progress and Next Action at the start of each tick to understand what happened before and what to do now. The runner does not parse the body, only the frontmatter. + +## Tick Lifecycle + +Each tick follows a fixed sequence: + +1. **Lock** - acquire in-flight guard (prevents concurrent ticks on the same loop) +2. **Pre-checks** - verify loop is still "running"; check interrupt flag; enforce budget limits +3. **Read state** - load the current state file from disk +4. **Build prompt** - assemble the tick prompt with: goal, state file contents, budget info, optional memory context, optional critique feedback +5. **Fresh session** - call `runtime.handleMessage()` with a rotating conversation ID (`{loopId}:{iteration}`) +6. **Agent works** - executes tools, makes progress, writes updated state file +7. **Record cost** - increment iteration count and accumulate cost from the SDK response +8. **Parse frontmatter** - re-read the state file; if the agent declared `done`, finalize immediately (steps 9-11 are skipped) +9. **Success command** - if configured, run the shell command (5-minute timeout, sanitized env with only PATH, HOME, LANG, TERM, TOOL_INPUT where TOOL_INPUT is a JSON string containing loop_id and workspace) +10. **Critique checkpoint** - if `checkpoint_interval` is set and the current tick is a multiple, run a Sonnet critique (see below) +11. **Slack update** - post tick progress to the status message +12. **Schedule next** - queue the next tick via `setImmediate` + +## Slack Integration + +When a loop is started from Slack (or with explicit `channel_id`), the `LoopNotifier` provides real-time feedback: + +**Start notice** - posted to the channel/thread with the goal excerpt and budget: +``` +:repeat: Starting loop `abcdef01` (max 20 iter, $5.00 budget) +> Keep refactoring src/auth until all 47 tests pass +``` +Includes a Stop button routed through Slack interactive actions. + +**Tick updates** - the same message is edited on each tick with a progress bar: +``` +:repeat: Loop `abcdef01` · [████░░░░░░] 4/10 · $1.20/$5.00 · in-progress +``` +The Stop button survives across edits (blocks are re-sent on every `chat.update`). + +**Reaction ladder** on the operator's original message: +- Start: hourglass +- First tick: swap to cycling arrows +- Terminal: checkmark (done), stop sign (stopped), warning (budget exceeded), X (failed) + +**Final notice** - progress bar with terminal emoji, and the state file body posted as a threaded code block so the operator can see the full progress log. + +## Mid-Loop Critique + +When `checkpoint_interval` is set, Sonnet 4.6 reviews the loop's progress every N ticks. This catches drift, stuck patterns, and wasted budget before the loop exhausts its resources. + +The critique runs after terminal checks (so the final tick is never wasted on a critique call) and is guarded by judge availability and cost cap. + +The reviewer sees: +- The original goal +- Rolling tick summaries (up to 10) +- The current state file (truncated to 3,000 chars) +- The agent's last response (truncated to 1,000 chars) + +The assessment is injected into the next tick's prompt as a "REVIEWER FEEDBACK" section. + +## Post-Loop Pipeline + +After a loop finalizes, a fire-and-forget pipeline runs evolution and memory consolidation. Neither can affect the loop's final status, and errors are logged but never propagated. + +**Evolution**: A bounded transcript (rolling summaries, first/last prompt-response pairs) is synthesized into a `SessionData` object and fed to the evolution engine's `afterSession()` pipeline. If the engine applies changes, the runtime's evolved config is updated. + +**Memory consolidation**: If vector memory is ready, the session data is consolidated into episodic memory. When LLM judges are available and within cost cap, Sonnet extracts facts while checking for contradictions with existing knowledge. Otherwise, a heuristic fallback runs. + +Loop status maps to evolution outcome: `done` becomes success, `stopped` becomes abandoned, everything else becomes failure. + +## Memory Context + +Memory context is cached once at loop start and injected into every tick prompt as a "RECALLED MEMORIES" section. Caching avoids re-querying the vector database on every tick (the goal is constant, so recall results don't change). The cache is cleared on finalize and rebuilt on resume. + +## Writing Effective Goals + +**Be specific and incremental:** +- Good: "Refactor src/auth/ to use the new session types from types.ts. Run `bun test src/auth` after each change. Stop when all tests pass." +- Bad: "Fix the auth system." + +**One concrete action per tick:** +- The agent works best when Next Action describes a single, verifiable step +- Goals that encourage small steps ("fix one test at a time") produce more reliable loops than goals that demand large leaps + +**Use success_command for objective verification:** +- `bun test src/auth` - loop runs until all auth tests pass +- `curl -sf http://localhost:3000/health` - loop runs until the service is healthy +- `grep -q 'TODO' src/module.ts && exit 1 || exit 0` - loop runs until no TODOs remain + +## Key Files + +| File | Purpose | +|------|---------| +| `src/loop/runner.ts` | LoopRunner: tick lifecycle, memory caching, critique scheduling, finalization | +| `src/loop/prompt.ts` | Per-tick prompt builder with memory and critique injection | +| `src/loop/types.ts` | Types, Zod schemas, constants, ceilings | +| `src/loop/store.ts` | SQLite persistence layer | +| `src/loop/state-file.ts` | State file init, read, YAML frontmatter parsing | +| `src/loop/tool.ts` | `phantom_loop` MCP tool (start/status/stop/list) | +| `src/loop/critique.ts` | Mid-loop Sonnet 4.6 critique judge | +| `src/loop/post-loop.ts` | Post-loop evolution and memory consolidation pipeline | +| `src/loop/notifications.ts` | Slack progress bar, reaction ladder, stop button |