diff --git a/src/acceptance/setup.ts b/src/acceptance/setup.ts index dd1bda5..d00a028 100644 --- a/src/acceptance/setup.ts +++ b/src/acceptance/setup.ts @@ -2,5 +2,6 @@ import '@domain/rules/cycle-rules.steps.js'; import '@domain/services/cycle-manager.steps.js'; import '@features/cycle-management/bridge-run-syncer.steps.js'; import '@features/cycle-management/cooldown-belt-computer.steps.js'; +import '@features/cycle-management/cooldown-follow-up-runner.steps.js'; import '@features/cycle-management/cycle-activation-name-resolver.steps.js'; import '@infra/execution/session-bridge.steps.js'; diff --git a/src/features/cycle-management/cooldown-follow-up-runner.feature b/src/features/cycle-management/cooldown-follow-up-runner.feature new file mode 100644 index 0000000..dfe8c62 --- /dev/null +++ b/src/features/cycle-management/cooldown-follow-up-runner.feature @@ -0,0 +1,116 @@ +Feature: Follow-up analysis pipeline during cooldown + After a cooldown phase completes its core processing, a series of follow-up + analyses run to improve the knowledge system: predictions are matched to + outcomes, calibration biases are detected, learnings are promoted through + the knowledge hierarchy, expired learnings are flagged, and friction + points are resolved. + + Background: + Given the follow-up pipeline environment is ready + + # ── Prediction matching ────────────────────────────────────── + + Scenario: predictions are matched to outcomes for each bet run + Given prediction matching is enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then predictions are matched for run "run-1" + And predictions are matched for run "run-2" + + Scenario: prediction matching is skipped when not enabled + Given prediction matching is not enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then no prediction matching occurs + + # ── Calibration detection ──────────────────────────────────── + + Scenario: calibration biases are detected for each bet run + Given calibration detection is enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then calibration is checked for run "run-1" + And calibration is checked for run "run-2" + + Scenario: calibration detection is skipped when not enabled + Given calibration detection is not enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then no calibration detection occurs + + # ── Hierarchical promotion ─────────────────────────────────── + + Scenario: learnings are promoted through the knowledge hierarchy + Given hierarchical promotion is enabled + And the knowledge store contains step-tier learnings + When the follow-up pipeline runs + Then step learnings are promoted to flavor tier + And flavor learnings are promoted to stage tier + And stage learnings are promoted to category tier + + Scenario: hierarchical promotion failure does not abort the pipeline + Given hierarchical promotion is enabled + And hierarchical promotion will fail with an internal error + When the follow-up pipeline runs + Then a warning is logged about hierarchical promotion failure + And cooldown continues normally + + # ── Expiry check ───────────────────────────────────────────── + + Scenario: expired learnings are flagged during cooldown + Given expiry checking is available + And learnings have expired + When the follow-up pipeline runs + Then expired learnings are flagged + + Scenario: expiry check is skipped when the knowledge store lacks the capability + Given expiry checking is not available + When the follow-up pipeline runs + Then no expiry check occurs + + Scenario: expiry check failure does not abort the pipeline + Given expiry checking is available + And the expiry check will fail with an internal error + When the follow-up pipeline runs + Then a warning is logged about expiry check failure + And cooldown continues normally + + # ── Friction analysis ──────────────────────────────────────── + + Scenario: friction points are resolved for each bet run + Given friction analysis is enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then friction is analyzed for run "run-1" + And friction is analyzed for run "run-2" + + Scenario: friction analysis is skipped when not enabled + Given friction analysis is not enabled + And the cycle has bets with runs "run-1" and "run-2" + When the follow-up pipeline runs + Then no friction analysis occurs + + # ── Pipeline ordering ───────────────────────────────────────── + + Scenario: calibration runs after prediction matching to use its results + Given prediction matching is enabled + And calibration detection is enabled + And the cycle has bets with runs "run-1" + When the follow-up pipeline runs + Then predictions are matched before calibration is checked for run "run-1" + + # ── Per-run error isolation ────────────────────────────────── + + Scenario: a failing run does not prevent other runs from being analyzed + Given prediction matching is enabled + And the cycle has bets with runs "run-1" and "run-2" + And prediction matching will fail for run "run-1" + When the follow-up pipeline runs + Then predictions are matched for run "run-2" + And a warning is logged about the run failure + + Scenario: bets without a run are silently skipped + Given prediction matching is enabled + And the cycle has a bet without a run + When the follow-up pipeline runs + Then no prediction matching occurs diff --git a/src/features/cycle-management/cooldown-follow-up-runner.steps.ts b/src/features/cycle-management/cooldown-follow-up-runner.steps.ts new file mode 100644 index 0000000..d70ca1c --- /dev/null +++ b/src/features/cycle-management/cooldown-follow-up-runner.steps.ts @@ -0,0 +1,428 @@ +import { After, Given, Then, When, QuickPickleWorld } from 'quickpickle'; +import { expect, vi } from 'vitest'; +import { logger } from '@shared/lib/logger.js'; +import { CooldownFollowUpRunner, type CooldownFollowUpDeps } from './cooldown-follow-up-runner.js'; +import type { PredictionMatcher } from '@features/self-improvement/prediction-matcher.js'; +import type { CalibrationDetector } from '@features/self-improvement/calibration-detector.js'; +import type { HierarchicalPromoter } from '@infra/knowledge/hierarchical-promoter.js'; +import type { FrictionAnalyzer } from '@features/self-improvement/friction-analyzer.js'; +import type { Cycle } from '@domain/types/cycle.js'; +import type { Learning } from '@domain/types/learning.js'; + +type MatchFn = PredictionMatcher['match']; +type DetectFn = CalibrationDetector['detect']; +type AnalyzeFn = FrictionAnalyzer['analyze']; +type PromoteStepToFlavorFn = HierarchicalPromoter['promoteStepToFlavor']; +type PromoteFlavorToStageFn = HierarchicalPromoter['promoteFlavorToStage']; +type PromoteStageToCategoryFn = HierarchicalPromoter['promoteStageToCategory']; + +// -- World ------------------------------------------------------- + +interface CooldownFollowUpRunnerWorld extends QuickPickleWorld { + predictionMatcherSpy?: { match: ReturnType> }; + calibrationDetectorSpy?: { detect: ReturnType> }; + hierarchicalPromoterSpy?: { + promoteStepToFlavor: ReturnType>; + promoteFlavorToStage: ReturnType>; + promoteStageToCategory: ReturnType>; + }; + frictionAnalyzerSpy?: { analyze: ReturnType> }; + knowledgeStoreSpy: { + query: ReturnType; + checkExpiry?: ReturnType; + }; + cycle: Cycle; + runner?: CooldownFollowUpRunner; + loggerWarnSpy: ReturnType; + loggerDebugSpy: ReturnType; + lastError?: Error; +} + +// -- Helpers ----------------------------------------------------- + +function buildCycle(bets: { runId?: string }[]): Cycle { + return { + id: 'cycle-1', + name: 'Test Cycle', + budget: {}, + bets: bets.map((b, i) => ({ + id: `bet-${i + 1}`, + description: `Bet ${i + 1}`, + appetite: 1, + issueRefs: [], + outcome: 'pending' as const, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + ...b, + })), + pipelineMappings: [], + state: 'cooldown' as const, + cooldownReserve: 10, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + } as Cycle; +} + +function buildRunner(world: CooldownFollowUpRunnerWorld): CooldownFollowUpRunner { + const deps: CooldownFollowUpDeps = { + predictionMatcher: world.predictionMatcherSpy ?? null, + calibrationDetector: world.calibrationDetectorSpy ?? null, + hierarchicalPromoter: world.hierarchicalPromoterSpy ?? { + promoteStepToFlavor: vi.fn().mockReturnValue({ learnings: [], events: [] }), + promoteFlavorToStage: vi.fn().mockReturnValue({ learnings: [], events: [] }), + promoteStageToCategory: vi.fn(), + }, + frictionAnalyzer: world.frictionAnalyzerSpy ?? null, + knowledgeStore: world.knowledgeStoreSpy as unknown as CooldownFollowUpDeps['knowledgeStore'], + }; + return new CooldownFollowUpRunner(deps); +} + +// -- Background -------------------------------------------------- + +Given( + 'the follow-up pipeline environment is ready', + (world: CooldownFollowUpRunnerWorld) => { + world.knowledgeStoreSpy = { query: vi.fn().mockReturnValue([]) }; + world.cycle = buildCycle([]); + world.loggerWarnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => {}); + world.loggerDebugSpy = vi.spyOn(logger, 'debug').mockImplementation(() => {}); + }, +); + +// -- Given: prediction matching ---------------------------------- + +Given( + 'prediction matching is enabled', + (world: CooldownFollowUpRunnerWorld) => { + world.predictionMatcherSpy = { match: vi.fn() }; + }, +); + +Given( + 'prediction matching is not enabled', + (_world: CooldownFollowUpRunnerWorld) => { + // predictionMatcherSpy left undefined -> null dep + }, +); + +Given( + 'prediction matching will fail for run {string}', + (world: CooldownFollowUpRunnerWorld, failRunId: string) => { + world.predictionMatcherSpy!.match.mockImplementation((runId: string) => { + if (runId === failRunId) throw new Error(`Prediction matching failed for ${runId}`); + return { runId, matched: [], unmatched: [], reflectionsWritten: 0 }; + }); + }, +); + +// -- Given: calibration detection -------------------------------- + +Given( + 'calibration detection is enabled', + (world: CooldownFollowUpRunnerWorld) => { + world.calibrationDetectorSpy = { detect: vi.fn() }; + }, +); + +Given( + 'calibration detection is not enabled', + (_world: CooldownFollowUpRunnerWorld) => { + // calibrationDetectorSpy left undefined -> null dep + }, +); + +// -- Given: hierarchical promotion ------------------------------- + +Given( + 'hierarchical promotion is enabled', + (world: CooldownFollowUpRunnerWorld) => { + world.hierarchicalPromoterSpy = { + promoteStepToFlavor: vi.fn().mockReturnValue({ learnings: [{ id: 'flavor-1' } as Learning], events: [] }), + promoteFlavorToStage: vi.fn().mockReturnValue({ learnings: [{ id: 'stage-1' } as Learning], events: [] }), + promoteStageToCategory: vi.fn(), + }; + }, +); + +Given( + 'the knowledge store contains step-tier learnings', + (world: CooldownFollowUpRunnerWorld) => { + world.knowledgeStoreSpy.query.mockReturnValue([{ id: 'step-1', tier: 'step' } as Learning]); + }, +); + +Given( + 'hierarchical promotion will fail with an internal error', + (world: CooldownFollowUpRunnerWorld) => { + world.hierarchicalPromoterSpy!.promoteStepToFlavor.mockImplementation(() => { + throw new Error('Simulated promotion failure'); + }); + }, +); + +// -- Given: expiry checking -------------------------------------- + +Given( + 'expiry checking is available', + (world: CooldownFollowUpRunnerWorld) => { + world.knowledgeStoreSpy.checkExpiry = vi.fn().mockReturnValue({ archived: [], flaggedStale: [] }); + }, +); + +Given( + 'learnings have expired', + (world: CooldownFollowUpRunnerWorld) => { + world.knowledgeStoreSpy.checkExpiry!.mockReturnValue({ + archived: [{ id: 'expired-1' } as Learning], + flaggedStale: [{ id: 'stale-1' } as Learning], + }); + }, +); + +Given( + 'expiry checking is not available', + (_world: CooldownFollowUpRunnerWorld) => { + // checkExpiry left undefined on knowledgeStoreSpy - duck-type check will skip + }, +); + +Given( + 'the expiry check will fail with an internal error', + (world: CooldownFollowUpRunnerWorld) => { + world.knowledgeStoreSpy.checkExpiry!.mockImplementation(() => { + throw new Error('Simulated expiry failure'); + }); + }, +); + +// -- Given: friction analysis ------------------------------------ + +Given( + 'friction analysis is enabled', + (world: CooldownFollowUpRunnerWorld) => { + world.frictionAnalyzerSpy = { analyze: vi.fn() }; + }, +); + +Given( + 'friction analysis is not enabled', + (_world: CooldownFollowUpRunnerWorld) => { + // frictionAnalyzerSpy left undefined -> null dep + }, +); + +// -- Given: cycle bets ------------------------------------------- + +Given( + 'the cycle has bets with runs {string} and {string}', + (world: CooldownFollowUpRunnerWorld, runId1: string, runId2: string) => { + world.cycle = buildCycle([{ runId: runId1 }, { runId: runId2 }]); + }, +); + +Given( + 'the cycle has bets with runs {string}', + (world: CooldownFollowUpRunnerWorld, runId: string) => { + world.cycle = buildCycle([{ runId }]); + }, +); + +Given( + 'the cycle has a bet without a run', + (world: CooldownFollowUpRunnerWorld) => { + world.cycle = buildCycle([{}]); // no runId + }, +); + +// -- When -------------------------------------------------------- + +When( + 'the follow-up pipeline runs', + (world: CooldownFollowUpRunnerWorld) => { + world.runner = buildRunner(world); + try { + world.runner.run(world.cycle); + } catch (err) { + world.lastError = err as Error; + } + }, +); + +// -- Then: prediction matching assertions ------------------------ + +Then( + 'predictions are matched for run {string}', + (world: CooldownFollowUpRunnerWorld, runId: string) => { + expect(world.lastError).toBeUndefined(); + expect(world.predictionMatcherSpy).toBeDefined(); + const calls = world.predictionMatcherSpy!.match.mock.calls as [string][]; + const match = calls.find(([id]) => id === runId); + expect(match).toBeDefined(); + }, +); + +Then( + 'no prediction matching occurs', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + if (world.predictionMatcherSpy) { + expect(world.predictionMatcherSpy.match).not.toHaveBeenCalled(); + } + }, +); + +// -- Then: calibration detection assertions ---------------------- + +Then( + 'calibration is checked for run {string}', + (world: CooldownFollowUpRunnerWorld, runId: string) => { + expect(world.lastError).toBeUndefined(); + expect(world.calibrationDetectorSpy).toBeDefined(); + const calls = world.calibrationDetectorSpy!.detect.mock.calls as [string][]; + const match = calls.find(([id]) => id === runId); + expect(match).toBeDefined(); + }, +); + +Then( + 'no calibration detection occurs', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + if (world.calibrationDetectorSpy) { + expect(world.calibrationDetectorSpy.detect).not.toHaveBeenCalled(); + } + }, +); + +// -- Then: hierarchical promotion assertions --------------------- + +Then( + 'step learnings are promoted to flavor tier', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + expect(world.hierarchicalPromoterSpy!.promoteStepToFlavor).toHaveBeenCalled(); + }, +); + +Then( + 'flavor learnings are promoted to stage tier', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + expect(world.hierarchicalPromoterSpy!.promoteFlavorToStage).toHaveBeenCalled(); + }, +); + +Then( + 'stage learnings are promoted to category tier', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + expect(world.hierarchicalPromoterSpy!.promoteStageToCategory).toHaveBeenCalled(); + }, +); + +// -- Then: expiry check assertions ------------------------------- + +Then( + 'expired learnings are flagged', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + expect(world.knowledgeStoreSpy.checkExpiry).toHaveBeenCalled(); + }, +); + +Then( + 'no expiry check occurs', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + // checkExpiry is undefined on the spy - duck-type check should have skipped it + if (world.knowledgeStoreSpy.checkExpiry) { + expect(world.knowledgeStoreSpy.checkExpiry).not.toHaveBeenCalled(); + } + }, +); + +// -- Then: friction analysis assertions -------------------------- + +Then( + 'friction is analyzed for run {string}', + (world: CooldownFollowUpRunnerWorld, runId: string) => { + expect(world.lastError).toBeUndefined(); + expect(world.frictionAnalyzerSpy).toBeDefined(); + const calls = world.frictionAnalyzerSpy!.analyze.mock.calls as [string][]; + const match = calls.find(([id]) => id === runId); + expect(match).toBeDefined(); + }, +); + +Then( + 'no friction analysis occurs', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.lastError).toBeUndefined(); + if (world.frictionAnalyzerSpy) { + expect(world.frictionAnalyzerSpy.analyze).not.toHaveBeenCalled(); + } + }, +); + +// -- Then: pipeline ordering assertions -------------------------- + +Then( + 'predictions are matched before calibration is checked for run {string}', + (world: CooldownFollowUpRunnerWorld, runId: string) => { + expect(world.lastError).toBeUndefined(); + expect(world.predictionMatcherSpy).toBeDefined(); + expect(world.calibrationDetectorSpy).toBeDefined(); + + const matchCalls = world.predictionMatcherSpy!.match.mock.calls as [string][]; + const detectCalls = world.calibrationDetectorSpy!.detect.mock.calls as [string][]; + const matchIdx = matchCalls.findIndex(([id]) => id === runId); + const detectIdx = detectCalls.findIndex(([id]) => id === runId); + expect(matchIdx).toBeGreaterThanOrEqual(0); + expect(detectIdx).toBeGreaterThanOrEqual(0); + + // Verify ordering via invocation call order + const matchOrder = world.predictionMatcherSpy!.match.mock.invocationCallOrder[matchIdx]!; + const detectOrder = world.calibrationDetectorSpy!.detect.mock.invocationCallOrder[detectIdx]!; + expect(matchOrder).toBeLessThan(detectOrder); + }, +); + +// -- Then: safety assertions ------------------------------------- + +Then( + 'a warning is logged about hierarchical promotion failure', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.loggerWarnSpy).toHaveBeenCalled(); + const msg = world.loggerWarnSpy.mock.calls[0]![0] as string; + expect(msg).toContain('Hierarchical learning promotion failed'); + }, +); + +Then( + 'a warning is logged about expiry check failure', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.loggerWarnSpy).toHaveBeenCalled(); + const msgs = world.loggerWarnSpy.mock.calls.map((c: unknown[]) => c[0] as string); + const expiryMsg = msgs.find((m) => m.includes('Learning expiry check failed')); + expect(expiryMsg).toBeDefined(); + }, +); + +Then( + 'a warning is logged about the run failure', + (world: CooldownFollowUpRunnerWorld) => { + expect(world.loggerWarnSpy).toHaveBeenCalled(); + const msgs = world.loggerWarnSpy.mock.calls.map((c: unknown[]) => c[0] as string); + const failMsg = msgs.find((m) => m.includes('failed for run')); + expect(failMsg).toBeDefined(); + }, +); + +// -- Cleanup ----------------------------------------------------- + +After((_world: CooldownFollowUpRunnerWorld) => { + vi.restoreAllMocks(); +}); + +// 'cooldown continues normally' step is shared - defined in bridge-run-syncer.steps.ts diff --git a/src/features/cycle-management/cooldown-follow-up-runner.test.ts b/src/features/cycle-management/cooldown-follow-up-runner.test.ts new file mode 100644 index 0000000..52abe9a --- /dev/null +++ b/src/features/cycle-management/cooldown-follow-up-runner.test.ts @@ -0,0 +1,337 @@ +import { vi } from 'vitest'; +import { logger } from '@shared/lib/logger.js'; +import { CooldownFollowUpRunner, type CooldownFollowUpDeps } from './cooldown-follow-up-runner.js'; +import type { PredictionMatcher } from '@features/self-improvement/prediction-matcher.js'; +import type { CalibrationDetector } from '@features/self-improvement/calibration-detector.js'; +import type { HierarchicalPromoter } from '@infra/knowledge/hierarchical-promoter.js'; +import type { FrictionAnalyzer } from '@features/self-improvement/friction-analyzer.js'; +import type { Cycle } from '@domain/types/cycle.js'; +import type { Learning } from '@domain/types/learning.js'; + +type MatchFn = PredictionMatcher['match']; +type DetectFn = CalibrationDetector['detect']; +type AnalyzeFn = FrictionAnalyzer['analyze']; +type PromoteStepToFlavorFn = HierarchicalPromoter['promoteStepToFlavor']; +type PromoteFlavorToStageFn = HierarchicalPromoter['promoteFlavorToStage']; +type PromoteStageToCategoryFn = HierarchicalPromoter['promoteStageToCategory']; + +function makeCycle(bets: { runId?: string }[]): Cycle { + return { + id: 'cycle-1', + name: 'Test', + budget: {}, + bets: bets.map((b, i) => ({ + id: `bet-${i}`, + description: `Bet ${i}`, + appetite: 1, + issueRefs: [], + outcome: 'pending' as const, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + ...b, + })), + pipelineMappings: [], + state: 'cooldown' as const, + cooldownReserve: 10, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + } as Cycle; +} + +function makePromoter() { + return { + promoteStepToFlavor: vi.fn().mockReturnValue({ learnings: [{ id: 'f1' } as Learning], events: [] }), + promoteFlavorToStage: vi.fn().mockReturnValue({ learnings: [{ id: 's1' } as Learning], events: [] }), + promoteStageToCategory: vi.fn(), + }; +} + +function makeDeps(overrides: Partial = {}): CooldownFollowUpDeps { + return { + predictionMatcher: null, + calibrationDetector: null, + hierarchicalPromoter: makePromoter(), + frictionAnalyzer: null, + knowledgeStore: { query: vi.fn().mockReturnValue([]) }, + ...overrides, + }; +} + +describe('CooldownFollowUpRunner', () => { + let warnSpy: ReturnType; + let debugSpy: ReturnType; + + beforeEach(() => { + warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => {}); + debugSpy = vi.spyOn(logger, 'debug').mockImplementation(() => {}); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe('run() orchestration', () => { + it('calls all enabled analyses in order', () => { + const matchSpy = vi.fn(); + const detectSpy = vi.fn(); + const analyzeSpy = vi.fn(); + const promoter = makePromoter(); + const querySpy = vi.fn().mockReturnValue([{ id: 'l1' } as Learning]); + + const runner = new CooldownFollowUpRunner({ + predictionMatcher: { match: matchSpy }, + calibrationDetector: { detect: detectSpy }, + hierarchicalPromoter: promoter, + frictionAnalyzer: { analyze: analyzeSpy }, + knowledgeStore: { query: querySpy }, + }); + + const cycle = makeCycle([{ runId: 'r1' }]); + runner.run(cycle); + + expect(matchSpy).toHaveBeenCalledWith('r1'); + expect(detectSpy).toHaveBeenCalledWith('r1'); + expect(promoter.promoteStepToFlavor).toHaveBeenCalled(); + expect(analyzeSpy).toHaveBeenCalledWith('r1'); + }); + + it('runs prediction matching before calibration detection', () => { + const matchSpy = vi.fn(); + const detectSpy = vi.fn(); + + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + calibrationDetector: { detect: detectSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }])); + + const matchOrder = matchSpy.mock.invocationCallOrder[0]!; + const detectOrder = detectSpy.mock.invocationCallOrder[0]!; + expect(matchOrder).toBeLessThan(detectOrder); + }); + + it('runs for each bet with a runId', () => { + const matchSpy = vi.fn(); + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }, { runId: 'r2' }, {}])); + + expect(matchSpy).toHaveBeenCalledTimes(2); + expect(matchSpy).toHaveBeenCalledWith('r1'); + expect(matchSpy).toHaveBeenCalledWith('r2'); + }); + + it('skips bets without runId', () => { + const matchSpy = vi.fn(); + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + })); + + runner.run(makeCycle([{}])); + + expect(matchSpy).not.toHaveBeenCalled(); + }); + }); + + describe('prediction matching', () => { + it('no-ops when predictionMatcher is null', () => { + const runner = new CooldownFollowUpRunner(makeDeps()); + runner.run(makeCycle([{ runId: 'r1' }])); + // Should not throw + }); + + it('isolates per-run failures', () => { + const matchSpy = vi.fn().mockImplementation((runId: string) => { + if (runId === 'r1') throw new Error('match broke'); + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }, { runId: 'r2' }])); + + expect(matchSpy).toHaveBeenCalledTimes(2); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('Prediction matching failed for run r1')); + }); + }); + + describe('calibration detection', () => { + it('no-ops when calibrationDetector is null', () => { + const runner = new CooldownFollowUpRunner(makeDeps()); + runner.run(makeCycle([{ runId: 'r1' }])); + }); + + it('detects for each bet run', () => { + const detectSpy = vi.fn(); + const runner = new CooldownFollowUpRunner(makeDeps({ + calibrationDetector: { detect: detectSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }, { runId: 'r2' }])); + + expect(detectSpy).toHaveBeenCalledWith('r1'); + expect(detectSpy).toHaveBeenCalledWith('r2'); + }); + }); + + describe('hierarchical promotion', () => { + it('promotes step -> flavor -> stage -> category', () => { + const promoter = makePromoter(); + const querySpy = vi.fn().mockReturnValue([{ id: 'step-1' } as Learning]); + const runner = new CooldownFollowUpRunner(makeDeps({ + hierarchicalPromoter: promoter, + knowledgeStore: { query: querySpy }, + })); + + runner.run(makeCycle([])); + + expect(querySpy).toHaveBeenCalledWith({ tier: 'step' }); + expect(promoter.promoteStepToFlavor).toHaveBeenCalledWith( + [{ id: 'step-1' }], + 'cooldown-retrospective', + ); + expect(promoter.promoteFlavorToStage).toHaveBeenCalledWith( + [{ id: 'f1' }], + 'cooldown', + ); + expect(promoter.promoteStageToCategory).toHaveBeenCalledWith([{ id: 's1' }]); + }); + + it('swallows promotion errors and logs warning', () => { + const promoter = makePromoter(); + promoter.promoteStepToFlavor.mockImplementation(() => { + throw new Error('promotion broke'); + }); + const runner = new CooldownFollowUpRunner(makeDeps({ hierarchicalPromoter: promoter })); + + runner.run(makeCycle([])); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('Hierarchical learning promotion failed')); + }); + + it('logs non-Error throws as strings', () => { + const promoter = makePromoter(); + promoter.promoteStepToFlavor.mockImplementation(() => { + throw 'string error'; // eslint-disable-line no-throw-literal + }); + const runner = new CooldownFollowUpRunner(makeDeps({ hierarchicalPromoter: promoter })); + + runner.run(makeCycle([])); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('string error')); + }); + }); + + describe('expiry check', () => { + it('calls checkExpiry when available', () => { + const checkExpiry = vi.fn().mockReturnValue({ archived: [], flaggedStale: [] }); + const runner = new CooldownFollowUpRunner(makeDeps({ + knowledgeStore: { query: vi.fn().mockReturnValue([]), checkExpiry }, + })); + + runner.run(makeCycle([])); + + expect(checkExpiry).toHaveBeenCalled(); + }); + + it('skips when checkExpiry is not a function', () => { + const runner = new CooldownFollowUpRunner(makeDeps({ + knowledgeStore: { query: vi.fn().mockReturnValue([]) }, + })); + + // Should not throw — duck-type check skips + runner.run(makeCycle([])); + }); + + it('logs debug messages for expiry results', () => { + const checkExpiry = vi.fn().mockReturnValue({ + archived: [{ id: 'a1' }], + flaggedStale: [{ id: 's1' }], + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + knowledgeStore: { query: vi.fn().mockReturnValue([]), checkExpiry }, + })); + + runner.run(makeCycle([])); + + expect(debugSpy).toHaveBeenCalled(); + }); + + it('swallows expiry errors and logs warning', () => { + const checkExpiry = vi.fn().mockImplementation(() => { + throw new Error('expiry broke'); + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + knowledgeStore: { query: vi.fn().mockReturnValue([]), checkExpiry }, + })); + + runner.run(makeCycle([])); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('Learning expiry check failed')); + }); + + it('logs non-Error throws as strings in expiry', () => { + const checkExpiry = vi.fn().mockImplementation(() => { + throw 42; // eslint-disable-line no-throw-literal + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + knowledgeStore: { query: vi.fn().mockReturnValue([]), checkExpiry }, + })); + + runner.run(makeCycle([])); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('42')); + }); + }); + + describe('friction analysis', () => { + it('no-ops when frictionAnalyzer is null', () => { + const runner = new CooldownFollowUpRunner(makeDeps()); + runner.run(makeCycle([{ runId: 'r1' }])); + }); + + it('analyzes friction for each bet run', () => { + const analyzeSpy = vi.fn(); + const runner = new CooldownFollowUpRunner(makeDeps({ + frictionAnalyzer: { analyze: analyzeSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }, { runId: 'r2' }])); + + expect(analyzeSpy).toHaveBeenCalledWith('r1'); + expect(analyzeSpy).toHaveBeenCalledWith('r2'); + }); + }); + + describe('per-run error isolation', () => { + it('continues after a run failure', () => { + const matchSpy = vi.fn().mockImplementation((runId: string) => { + if (runId === 'r1') throw new Error('run broke'); + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }, { runId: 'r2' }])); + + expect(matchSpy).toHaveBeenCalledTimes(2); + expect(warnSpy).toHaveBeenCalledTimes(1); + }); + + it('logs non-Error throws in per-run handler', () => { + const matchSpy = vi.fn().mockImplementation(() => { + throw 'string run error'; // eslint-disable-line no-throw-literal + }); + const runner = new CooldownFollowUpRunner(makeDeps({ + predictionMatcher: { match: matchSpy }, + })); + + runner.run(makeCycle([{ runId: 'r1' }])); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('string run error')); + }); + }); +}); diff --git a/src/features/cycle-management/cooldown-follow-up-runner.ts b/src/features/cycle-management/cooldown-follow-up-runner.ts new file mode 100644 index 0000000..5afd330 --- /dev/null +++ b/src/features/cycle-management/cooldown-follow-up-runner.ts @@ -0,0 +1,124 @@ +import type { PredictionMatcher } from '@features/self-improvement/prediction-matcher.js'; +import type { CalibrationDetector } from '@features/self-improvement/calibration-detector.js'; +import type { HierarchicalPromoter } from '@infra/knowledge/hierarchical-promoter.js'; +import type { FrictionAnalyzer } from '@features/self-improvement/friction-analyzer.js'; +import type { IKnowledgeStore } from '@domain/ports/knowledge-store.js'; +import type { Cycle } from '@domain/types/cycle.js'; +import { logger } from '@shared/lib/logger.js'; +import { buildExpiryCheckMessages } from './cooldown-session.helpers.js'; + +/** + * Dependencies injected into CooldownFollowUpRunner for testability. + */ +export interface CooldownFollowUpDeps { + predictionMatcher: Pick | null; + calibrationDetector: Pick | null; + hierarchicalPromoter: Pick; + frictionAnalyzer: Pick | null; + knowledgeStore: Pick & { checkExpiry?: IKnowledgeStore['checkExpiry'] }; +} + +/** + * Runs follow-up analyses during cooldown to improve the knowledge system. + * + * Extracted from CooldownSession to isolate the follow-up pipeline + * (prediction matching, calibration detection, hierarchical promotion, + * expiry checking, friction analysis) from the cooldown orchestration logic. + */ +export class CooldownFollowUpRunner { + constructor(private readonly deps: CooldownFollowUpDeps) {} + + /** + * Execute the full follow-up pipeline for a cycle. + * + * Analyses run in a fixed order: prediction matching → calibration detection + * → hierarchical promotion → expiry check → friction analysis. + * Calibration detection must run after prediction matching because it reads + * validation reflections that prediction matching produces. + * + * Non-critical: individual analysis failures are logged as warnings + * and do not abort the pipeline or the cooldown. + */ + run(cycle: Cycle): void { + this.runPredictionMatching(cycle); + this.runCalibrationDetection(cycle); + this.runHierarchicalPromotion(); + this.runExpiryCheck(); + this.runFrictionAnalysis(cycle); + } + + private runPredictionMatching(cycle: Cycle): void { + // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun + if (!this.deps.predictionMatcher) return; + // Stryker disable next-line StringLiteral: presentation text — label for error logging + this.runForEachBetRun(cycle, (runId) => this.deps.predictionMatcher!.match(runId), 'Prediction matching'); + } + + /** + * Must run after runPredictionMatching — reads validation reflections it produces. + */ + private runCalibrationDetection(cycle: Cycle): void { + // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun + if (!this.deps.calibrationDetector) return; + // Stryker disable next-line StringLiteral: presentation text — label for error logging + this.runForEachBetRun(cycle, (runId) => this.deps.calibrationDetector!.detect(runId), 'Calibration detection'); + } + + private runHierarchicalPromotion(): void { + try { + // Stryker disable next-line ObjectLiteral: tier filter is tested via hierarchical promotion integration + const stepLearnings = this.deps.knowledgeStore.query({ tier: 'step' }); + const { learnings: flavorLearnings } = this.deps.hierarchicalPromoter.promoteStepToFlavor(stepLearnings, 'cooldown-retrospective'); + const { learnings: stageLearnings } = this.deps.hierarchicalPromoter.promoteFlavorToStage(flavorLearnings, 'cooldown'); + this.deps.hierarchicalPromoter.promoteStageToCategory(stageLearnings); + // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging + } catch (err) { + logger.warn(`Hierarchical learning promotion failed: ${err instanceof Error ? err.message : String(err)}`); + } + } + + private runExpiryCheck(): void { + try { + // Stryker disable next-line ConditionalExpression: guard redundant with catch — checkExpiry absence is swallowed + if (typeof this.deps.knowledgeStore.checkExpiry !== 'function') return; + const result = this.deps.knowledgeStore.checkExpiry(); + for (const message of buildExpiryCheckMessages(result)) { + logger.debug(message); + } + // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging + } catch (err) { + logger.warn(`Learning expiry check failed: ${err instanceof Error ? err.message : String(err)}`); + } + } + + private runFrictionAnalysis(cycle: Cycle): void { + // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun + if (!this.deps.frictionAnalyzer) return; + // Stryker disable next-line StringLiteral: presentation text — label for error logging + this.runForEachBetRun(cycle, (runId) => this.deps.frictionAnalyzer!.analyze(runId), 'Friction analysis'); + } + + private runForEachBetRun( + cycle: Cycle, + runner: (runId: string) => void, + label: string, + ): void { + for (const bet of cycle.bets) { + if (!bet.runId) continue; + this.runForBetRun(bet.runId, runner, label); + } + } + + private runForBetRun( + runId: string, + runner: (runId: string) => void, + label: string, + ): void { + try { + runner(runId); + // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging + } catch (err) { + logger.warn(`${label} failed for run ${runId}: ${err instanceof Error ? err.message : String(err)}`); + } + } +} diff --git a/src/features/cycle-management/cooldown-session.ts b/src/features/cycle-management/cooldown-session.ts index 8b192de..d7839cb 100644 --- a/src/features/cycle-management/cooldown-session.ts +++ b/src/features/cycle-management/cooldown-session.ts @@ -33,10 +33,10 @@ import type { BeltCalculator } from '@features/belt/belt-calculator.js'; import type { BeltComputeResult } from '@features/belt/belt-calculator.js'; import type { KataAgentConfidenceCalculator } from '@features/kata-agent/kata-agent-confidence-calculator.js'; import { CooldownBeltComputer } from './cooldown-belt-computer.js'; +import { CooldownFollowUpRunner } from './cooldown-follow-up-runner.js'; import { buildAgentPerspectiveFromProposals, buildCooldownBudgetUsage, - buildExpiryCheckMessages, buildCooldownLearningDrafts, buildDiaryBetOutcomesFromCycleBets, buildDojoSessionBuildRequest, @@ -262,21 +262,14 @@ export interface CooldownPrepareResult { export class CooldownSession { private readonly deps: CooldownSessionDeps; private readonly proposalGenerator: Pick; - private readonly predictionMatcher: Pick | null; - private readonly calibrationDetector: Pick | null; - private readonly hierarchicalPromoter: Pick; - private readonly frictionAnalyzer: Pick | null; private readonly _nextKeikoProposalGenerator: Pick | null; private readonly bridgeRunSyncer: BridgeRunSyncer; private readonly beltComputer: CooldownBeltComputer; + private readonly followUpRunner: CooldownFollowUpRunner; constructor(deps: CooldownSessionDeps) { this.deps = deps; this.proposalGenerator = this.resolveProposalGenerator(deps); - this.predictionMatcher = this.resolvePredictionMatcher(deps); - this.calibrationDetector = this.resolveCalibrationDetector(deps); - this.hierarchicalPromoter = this.resolveHierarchicalPromoter(deps); - this.frictionAnalyzer = this.resolveFrictionAnalyzer(deps); this._nextKeikoProposalGenerator = this.resolveNextKeikoProposalGenerator(deps); this.bridgeRunSyncer = new BridgeRunSyncer({ bridgeRunsDir: deps.bridgeRunsDir, @@ -291,6 +284,13 @@ export class CooldownSession { agentDir: deps.agentDir, katakaDir: deps.katakaDir, }); + this.followUpRunner = new CooldownFollowUpRunner({ + predictionMatcher: this.resolvePredictionMatcher(deps), + calibrationDetector: this.resolveCalibrationDetector(deps), + hierarchicalPromoter: this.resolveHierarchicalPromoter(deps), + frictionAnalyzer: this.resolveFrictionAnalyzer(deps), + knowledgeStore: deps.knowledgeStore, + }); } private resolveProposalGenerator(deps: CooldownSessionDeps): Pick { @@ -408,13 +408,6 @@ export class CooldownSession { this.bridgeRunSyncer.recordBetOutcomes(cycleId, betOutcomes); } - private runCooldownFollowUps(cycle: Cycle): void { - this.runPredictionMatching(cycle); - this.runCalibrationDetection(cycle); - this.runHierarchicalPromotion(); - this.runExpiryCheck(); - this.runFrictionAnalysis(cycle); - } private writeRunDiary(input: { cycleId: string; @@ -553,7 +546,7 @@ export class CooldownSession { try { const phase = this.buildCooldownPhase(cycleId, betOutcomes); - this.runCooldownFollowUps(phase.cycle); + this.followUpRunner.run(phase.cycle); const beltResult = this.beltComputer.compute(); this.beltComputer.computeAgentConfidence(); this.writeRunDiary({ @@ -609,7 +602,7 @@ export class CooldownSession { try { const phase = this.buildCooldownPhase(cycleId, betOutcomes); - this.runCooldownFollowUps(phase.cycle); + this.followUpRunner.run(phase.cycle); const effectiveDepth = depth ?? this.deps.synthesisDepth ?? 'standard'; const { synthesisInputId, synthesisInputPath } = this.writeSynthesisInput( cycleId, @@ -990,102 +983,6 @@ export class CooldownSession { return filterExecutionHistoryForCycle(allEntries, cycleId); } - /** - * For each bet with a runId, run PredictionMatcher to match predictions to outcomes. - * Writes validation/unmatched reflections to the run's JSONL file. - * No-op when runsDir is absent or no prediction matcher is available. - */ - private runPredictionMatching(cycle: Cycle): void { - // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun - if (!this.predictionMatcher) return; - // Stryker disable next-line StringLiteral: presentation text — label for error logging - this.runForEachBetRun(cycle, (runId) => this.predictionMatcher!.match(runId), 'Prediction matching'); - } - - /** - * For each bet with a runId, run CalibrationDetector to detect systematic prediction biases. - * Writes CalibrationReflections to the run's JSONL file. - * Must run after runPredictionMatching (reads validation reflections it produces). - * No-op when runsDir is absent or no calibration detector is available. - */ - private runCalibrationDetection(cycle: Cycle): void { - // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun - if (!this.calibrationDetector) return; - // Stryker disable next-line StringLiteral: presentation text — label for error logging - this.runForEachBetRun(cycle, (runId) => this.calibrationDetector!.detect(runId), 'Calibration detection'); - } - - private runForEachBetRun( - cycle: Cycle, - runner: (runId: string) => void, - label: string, - ): void { - for (const bet of cycle.bets) { - if (!bet.runId) continue; - this.runForBetRun(bet.runId, runner, label); - } - } - - private runForBetRun( - runId: string, - runner: (runId: string) => void, - label: string, - ): void { - try { - runner(runId); - // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging - } catch (err) { - logger.warn(`${label} failed for run ${runId}: ${err instanceof Error ? err.message : String(err)}`); - } - } - - /** - * Promote step-tier learnings up through flavor → stage → category. - * Non-critical: errors are logged and swallowed. - */ - private runHierarchicalPromotion(): void { - try { - // Stryker disable next-line ObjectLiteral: tier filter is tested via hierarchical promotion integration - const stepLearnings = this.deps.knowledgeStore.query({ tier: 'step' }); - const { learnings: flavorLearnings } = this.hierarchicalPromoter.promoteStepToFlavor(stepLearnings, 'cooldown-retrospective'); - const { learnings: stageLearnings } = this.hierarchicalPromoter.promoteFlavorToStage(flavorLearnings, 'cooldown'); - this.hierarchicalPromoter.promoteStageToCategory(stageLearnings); - // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging - } catch (err) { - logger.warn(`Hierarchical learning promotion failed: ${err instanceof Error ? err.message : String(err)}`); - } - } - - /** - * Scan all learnings for expiry: auto-archives expired operational ones, - * flags stale strategic ones. Non-critical: errors are logged and swallowed. - */ - private runExpiryCheck(): void { - try { - // Stryker disable next-line ConditionalExpression: guard redundant with catch — checkExpiry absence is swallowed - if (typeof (this.deps.knowledgeStore as { checkExpiry?: unknown }).checkExpiry !== 'function') return; - const result = this.deps.knowledgeStore.checkExpiry(); - for (const message of buildExpiryCheckMessages(result)) { - logger.debug(message); - } - // Stryker disable next-line all: catch block is pure error-reporting — non-critical logging - } catch (err) { - logger.warn(`Learning expiry check failed: ${err instanceof Error ? err.message : String(err)}`); - } - } - - /** - * For each bet with a runId, run FrictionAnalyzer to resolve contradiction observations. - * Writes ResolutionReflections and optionally archives/captures learnings. - * No-op when runsDir is absent or no friction analyzer is available. - */ - private runFrictionAnalysis(cycle: Cycle): void { - // Stryker disable next-line ConditionalExpression: guard redundant with catch in runForBetRun - if (!this.frictionAnalyzer) return; - // Stryker disable next-line StringLiteral: presentation text — label for error logging - this.runForEachBetRun(cycle, (runId) => this.frictionAnalyzer!.analyze(runId), 'Friction analysis'); - } - private writeDiaryEntry(input: { cycleId: string; cycleName?: string; diff --git a/stryker.config.mjs b/stryker.config.mjs index a1222fe..0b4c386 100644 --- a/stryker.config.mjs +++ b/stryker.config.mjs @@ -16,6 +16,7 @@ export default { 'src/cli/commands/execute.ts', 'src/features/cycle-management/bridge-run-syncer.ts', 'src/features/cycle-management/cooldown-belt-computer.ts', + 'src/features/cycle-management/cooldown-follow-up-runner.ts', 'src/features/cycle-management/cooldown-session.ts', 'src/features/kata-agent/kata-agent-confidence-calculator.ts', 'src/features/kata-agent/kata-agent-observability-aggregator.ts',