From 962651f78a1ed41940b57527522022a540cd1bd3 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 31 Mar 2026 14:40:58 +0200 Subject: [PATCH 1/4] feat: add broker-level cost tracking for workflow runs Tracks per-step duration and estimates API cost based on CLI type and model pricing. Persists to ~/.agent-relay/usage.jsonl. No external dependencies required. --- src/cost/pricing.ts | 85 +++++++++++++++++ src/cost/tracker.test.ts | 180 +++++++++++++++++++++++++++++++++++ src/cost/tracker.ts | 196 +++++++++++++++++++++++++++++++++++++++ src/cost/types.ts | 24 +++++ 4 files changed, 485 insertions(+) create mode 100644 src/cost/pricing.ts create mode 100644 src/cost/tracker.test.ts create mode 100644 src/cost/tracker.ts create mode 100644 src/cost/types.ts diff --git a/src/cost/pricing.ts b/src/cost/pricing.ts new file mode 100644 index 000000000..32afcfb3b --- /dev/null +++ b/src/cost/pricing.ts @@ -0,0 +1,85 @@ +export interface ModelPricing { + inputPer1M: number; + outputPer1M: number; +} + +export interface TokenEstimate { + inputTokens: number; + outputTokens: number; +} + +// Standard per-1M token pricing for the short model aliases used by the CLI, +// plus a few full IDs already present elsewhere in the repo. +export const MODEL_PRICING: Record = { + '2.5-pro': { inputPer1M: 1.25, outputPer1M: 10 }, + 'claude-opus-4': { inputPer1M: 15, outputPer1M: 75 }, + 'claude-opus-4-20250514': { inputPer1M: 15, outputPer1M: 75 }, + 'claude-sonnet-4': { inputPer1M: 3, outputPer1M: 15 }, + 'claude-sonnet-4-20250514': { inputPer1M: 3, outputPer1M: 15 }, + 'gemini-2.5-pro': { inputPer1M: 1.25, outputPer1M: 10 }, + o3: { inputPer1M: 1, outputPer1M: 4 }, + 'openai/o3': { inputPer1M: 1, outputPer1M: 4 }, + 'opus-4': { inputPer1M: 15, outputPer1M: 75 }, + 'sonnet-4': { inputPer1M: 3, outputPer1M: 15 }, +}; + +export const CLI_DEFAULT_MODEL = { + claude: 'opus-4', + codex: 'o3', + gemini: '2.5-pro', + aider: 'sonnet-4', +} as const; + +const INPUT_TOKENS_PER_SECOND = 200; +const OUTPUT_TOKENS_PER_SECOND = 75; + +function normalizeModel(model: string): string { + const normalized = model.trim().toLowerCase(); + + switch (normalized) { + case 'gemini-2.5-pro': + case '2.5-pro': + return '2.5-pro'; + case 'claude-opus-4': + case 'claude-opus-4-20250514': + case 'opus-4': + return 'opus-4'; + case 'claude-sonnet-4': + case 'claude-sonnet-4-20250514': + case 'sonnet-4': + return 'sonnet-4'; + case 'openai/o3': + case 'o3': + return 'o3'; + default: + return normalized; + } +} + +export function estimateTokensFromDuration(durationMs: number): TokenEstimate { + if (!Number.isFinite(durationMs) || durationMs <= 0) { + return { inputTokens: 0, outputTokens: 0 }; + } + + const seconds = durationMs / 1_000; + + return { + inputTokens: Math.round(seconds * INPUT_TOKENS_PER_SECOND), + outputTokens: Math.round(seconds * OUTPUT_TOKENS_PER_SECOND), + }; +} + +export function estimateCost(model: string, inputTokens: number, outputTokens: number): number { + const pricing = MODEL_PRICING[normalizeModel(model)]; + if (!pricing) { + return 0; + } + + const safeInputTokens = Number.isFinite(inputTokens) ? Math.max(0, inputTokens) : 0; + const safeOutputTokens = Number.isFinite(outputTokens) ? Math.max(0, outputTokens) : 0; + const total = + (safeInputTokens / 1_000_000) * pricing.inputPer1M + + (safeOutputTokens / 1_000_000) * pricing.outputPer1M; + + return Math.round(total * 1_000_000) / 1_000_000; +} diff --git a/src/cost/tracker.test.ts b/src/cost/tracker.test.ts new file mode 100644 index 000000000..0fbba837d --- /dev/null +++ b/src/cost/tracker.test.ts @@ -0,0 +1,180 @@ +import path from 'node:path'; + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { StepCostRecord } from './types.js'; + +const fsMock = vi.hoisted(() => ({ + appendFileSync: vi.fn(), + existsSync: vi.fn(), + mkdirSync: vi.fn(), + readFileSync: vi.fn(), +})); + +vi.mock('node:fs', () => ({ + default: fsMock, +})); + +import { MODEL_PRICING, estimateCost, estimateTokensFromDuration } from './pricing.js'; +import { CostTracker } from './tracker.js'; + +describe('pricing', () => { + it('estimateTokensFromDuration returns reasonable values', () => { + expect(estimateTokensFromDuration(-1)).toEqual({ inputTokens: 0, outputTokens: 0 }); + expect(estimateTokensFromDuration(0)).toEqual({ inputTokens: 0, outputTokens: 0 }); + expect(estimateTokensFromDuration(1_500)).toEqual({ inputTokens: 300, outputTokens: 113 }); + expect(estimateTokensFromDuration(2_000)).toEqual({ inputTokens: 400, outputTokens: 150 }); + }); + + it.each(Object.entries(MODEL_PRICING))('estimateCost calculates correctly for %s', (model, pricing) => { + const inputTokens = 123_456; + const outputTokens = 78_900; + const expected = + Math.round( + ((inputTokens / 1_000_000) * pricing.inputPer1M + (outputTokens / 1_000_000) * pricing.outputPer1M) * + 1_000_000 + ) / 1_000_000; + + expect(estimateCost(model, inputTokens, outputTokens)).toBe(expected); + }); +}); + +describe('CostTracker', () => { + const usageFilePath = '/tmp/agent-relay/usage.jsonl'; + let files: Map; + + beforeEach(() => { + files = new Map(); + vi.clearAllMocks(); + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-01-01T00:00:00.000Z')); + + fsMock.mkdirSync.mockImplementation(() => undefined); + fsMock.appendFileSync.mockImplementation((filePath: string, data: string) => { + const key = String(filePath); + files.set(key, `${files.get(key) ?? ''}${String(data)}`); + }); + fsMock.existsSync.mockImplementation((filePath: string) => files.has(String(filePath))); + fsMock.readFileSync.mockImplementation((filePath: string) => files.get(String(filePath)) ?? ''); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('stepStarted + stepCompleted produces valid record', () => { + const tracker = new CostTracker({ usageFilePath }); + + tracker.stepStarted('run-1', 'compile', 'worker-a', 'codex exec'); + vi.setSystemTime(new Date('2026-01-01T00:00:02.000Z')); + + const record = tracker.stepCompleted('run-1', 'compile', 0); + + expect(record).toEqual({ + runId: 'run-1', + stepName: 'compile', + agent: 'worker-a', + cli: 'codex exec', + model: 'o3', + startedAt: '2026-01-01T00:00:00.000Z', + endedAt: '2026-01-01T00:00:02.000Z', + durationMs: 2_000, + estimatedInputTokens: 400, + estimatedOutputTokens: 150, + estimatedCostUsd: 0.001, + }); + expect(fsMock.mkdirSync).toHaveBeenCalledWith(path.dirname(usageFilePath), { recursive: true }); + expect(files.get(usageFilePath)).toBe(`${JSON.stringify(record)}\n`); + }); + + it('records are appended to usage file', () => { + const tracker = new CostTracker({ usageFilePath }); + + tracker.stepStarted('run-1', 'first-step', 'o3', 'codex'); + vi.setSystemTime(new Date('2026-01-01T00:00:01.000Z')); + const firstRecord = tracker.stepCompleted('run-1', 'first-step', 0); + + tracker.stepStarted('run-1', 'second-step', 'o3', 'codex'); + vi.setSystemTime(new Date('2026-01-01T00:00:03.000Z')); + const secondRecord = tracker.stepCompleted('run-1', 'second-step', 0); + + expect(fsMock.appendFileSync).toHaveBeenCalledTimes(2); + expect(parseJsonLines(files.get(usageFilePath) ?? '')).toEqual([firstRecord, secondRecord]); + }); + + it('getRunSummary filters by runId', () => { + const tracker = new CostTracker({ usageFilePath }); + const firstRecord = createRecord({ + runId: 'run-1', + stepName: 'first-step', + startedAt: '2026-01-01T00:00:01.000Z', + endedAt: '2026-01-01T00:00:02.000Z', + durationMs: 1_000, + estimatedInputTokens: 200, + estimatedOutputTokens: 75, + estimatedCostUsd: 0.0005, + }); + const secondRecord = createRecord({ + runId: 'run-2', + stepName: 'other-run-step', + startedAt: '2026-01-01T00:00:03.000Z', + endedAt: '2026-01-01T00:00:05.000Z', + durationMs: 2_000, + estimatedInputTokens: 400, + estimatedOutputTokens: 150, + estimatedCostUsd: 0.001, + }); + const thirdRecord = createRecord({ + runId: 'run-1', + stepName: 'second-step', + startedAt: '2026-01-01T00:00:06.000Z', + endedAt: '2026-01-01T00:00:08.000Z', + durationMs: 2_000, + estimatedInputTokens: 400, + estimatedOutputTokens: 150, + estimatedCostUsd: 0.001, + }); + + files.set( + usageFilePath, + [ + JSON.stringify(thirdRecord), + '{"broken":true}', + JSON.stringify(secondRecord), + JSON.stringify(firstRecord), + ].join('\n') + ); + + expect(tracker.getRunSummary('run-1')).toEqual({ + runId: 'run-1', + totalCostUsd: 0.0015, + totalDurationMs: 3_000, + steps: [firstRecord, thirdRecord], + }); + }); +}); + +function createRecord(overrides: Partial = {}): StepCostRecord { + return { + runId: 'run-1', + stepName: 'step', + agent: 'worker-a', + cli: 'codex exec', + model: 'o3', + startedAt: '2026-01-01T00:00:00.000Z', + endedAt: '2026-01-01T00:00:01.000Z', + durationMs: 1_000, + estimatedInputTokens: 200, + estimatedOutputTokens: 75, + estimatedCostUsd: 0.0005, + ...overrides, + }; +} + +function parseJsonLines(contents: string): StepCostRecord[] { + return contents + .trim() + .split('\n') + .filter(Boolean) + .map((line) => JSON.parse(line) as StepCostRecord); +} diff --git a/src/cost/tracker.ts b/src/cost/tracker.ts new file mode 100644 index 000000000..4c89582ec --- /dev/null +++ b/src/cost/tracker.ts @@ -0,0 +1,196 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { CLI_DEFAULT_MODEL, MODEL_PRICING, estimateCost, estimateTokensFromDuration } from './pricing.js'; +import type { CostTrackerOptions, RunCostSummary, StepCostRecord } from './types.js'; + +interface StartedStep { + runId: string; + stepName: string; + agent: string; + cli: string; + model: string; + startedAt: string; + startedAtMs: number; +} + +const DEFAULT_USAGE_FILE_PATH = path.join(os.homedir(), '.agent-relay', 'usage.jsonl'); + +export class CostTracker { + private readonly usageFilePath: string; + private readonly startedSteps = new Map(); + + constructor(options: CostTrackerOptions = {}) { + this.usageFilePath = resolveUsageFilePath(options.usageFilePath); + } + + stepStarted(runId: string, stepName: string, agent: string, cli: string): void { + const startedAt = new Date(); + + this.startedSteps.set(this.getStepKey(runId, stepName), { + runId, + stepName, + agent, + cli, + model: resolveModel(agent, cli), + startedAt: startedAt.toISOString(), + startedAtMs: startedAt.getTime(), + }); + } + + stepCompleted(runId: string, stepName: string, _exitCode: number): StepCostRecord { + const stepKey = this.getStepKey(runId, stepName); + const startedStep = this.startedSteps.get(stepKey); + + if (!startedStep) { + throw new Error(`No started step found for run "${runId}" and step "${stepName}"`); + } + + const endedAt = new Date(); + const durationMs = Math.max(0, endedAt.getTime() - startedStep.startedAtMs); + const tokenEstimate = estimateTokensFromDuration(durationMs); + const record: StepCostRecord = { + runId: startedStep.runId, + stepName: startedStep.stepName, + agent: startedStep.agent, + cli: startedStep.cli, + model: startedStep.model, + startedAt: startedStep.startedAt, + endedAt: endedAt.toISOString(), + durationMs, + estimatedInputTokens: tokenEstimate.inputTokens, + estimatedOutputTokens: tokenEstimate.outputTokens, + estimatedCostUsd: estimateCost(startedStep.model, tokenEstimate.inputTokens, tokenEstimate.outputTokens), + }; + + this.appendRecord(record); + this.startedSteps.delete(stepKey); + + return record; + } + + getRunSummary(runId: string): RunCostSummary { + const steps = this.readRecords() + .filter((record) => record.runId === runId) + .sort((left, right) => left.startedAt.localeCompare(right.startedAt)); + + const totalCostUsd = roundUsd(steps.reduce((sum, step) => sum + step.estimatedCostUsd, 0)); + const totalDurationMs = steps.reduce((sum, step) => sum + step.durationMs, 0); + + return { + runId, + totalCostUsd, + totalDurationMs, + steps, + }; + } + + private appendRecord(record: StepCostRecord): void { + fs.mkdirSync(path.dirname(this.usageFilePath), { recursive: true }); + fs.appendFileSync(this.usageFilePath, `${JSON.stringify(record)}\n`, 'utf8'); + } + + private getStepKey(runId: string, stepName: string): string { + return `${runId}:${stepName}`; + } + + private readRecords(): StepCostRecord[] { + if (!fs.existsSync(this.usageFilePath)) { + return []; + } + + const raw = fs.readFileSync(this.usageFilePath, 'utf8').trim(); + if (!raw) { + return []; + } + + const records: StepCostRecord[] = []; + + for (const line of raw.split('\n')) { + try { + const parsed = JSON.parse(line) as unknown; + if (isStepCostRecord(parsed)) { + records.push(parsed); + } + } catch { + // Ignore malformed lines so older or partial records do not block summaries. + } + } + + return records; + } +} + +function resolveUsageFilePath(usageFilePath?: string): string { + if (!usageFilePath) { + return DEFAULT_USAGE_FILE_PATH; + } + + if (usageFilePath === '~') { + return os.homedir(); + } + + if (/^~[\\/]/.test(usageFilePath)) { + return path.join(os.homedir(), usageFilePath.slice(2)); + } + + return usageFilePath; +} + +function resolveModel(agent: string, cli: string): string { + const normalizedAgent = normalizeValue(agent); + if (normalizedAgent && normalizedAgent in MODEL_PRICING) { + return normalizedAgent; + } + + const normalizedCli = normalizeCli(cli); + if (normalizedCli && normalizedCli in CLI_DEFAULT_MODEL) { + return CLI_DEFAULT_MODEL[normalizedCli as keyof typeof CLI_DEFAULT_MODEL]; + } + + return normalizedAgent || 'unknown'; +} + +function normalizeCli(cli: string): string { + const normalized = normalizeValue(cli); + if (!normalized) { + return ''; + } + + const [command] = normalized.split(/\s+/, 1); + const binary = path.basename(command); + const [family] = binary.split(':', 1); + + return family; +} + +function normalizeValue(value: string): string { + return value.trim().toLowerCase(); +} + +function roundUsd(value: number): number { + return Math.round(value * 1_000_000) / 1_000_000; +} + +function isStepCostRecord(value: unknown): value is StepCostRecord { + if (!value || typeof value !== 'object') { + return false; + } + + const record = value as Partial; + + return ( + typeof record.runId === 'string' && + typeof record.stepName === 'string' && + typeof record.agent === 'string' && + typeof record.cli === 'string' && + typeof record.model === 'string' && + typeof record.startedAt === 'string' && + typeof record.endedAt === 'string' && + typeof record.durationMs === 'number' && + typeof record.estimatedInputTokens === 'number' && + typeof record.estimatedOutputTokens === 'number' && + typeof record.estimatedCostUsd === 'number' + ); +} diff --git a/src/cost/types.ts b/src/cost/types.ts new file mode 100644 index 000000000..3ce69d42b --- /dev/null +++ b/src/cost/types.ts @@ -0,0 +1,24 @@ +export interface StepCostRecord { + runId: string; + stepName: string; + agent: string; + cli: string; + model: string; + startedAt: string; + endedAt: string; + durationMs: number; + estimatedInputTokens: number; + estimatedOutputTokens: number; + estimatedCostUsd: number; +} + +export interface RunCostSummary { + runId: string; + totalCostUsd: number; + totalDurationMs: number; + steps: StepCostRecord[]; +} + +export interface CostTrackerOptions { + usageFilePath?: string; +} From 2e03582ae6a7b65e719b214792d11bab74155a24 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 31 Mar 2026 15:00:22 +0200 Subject: [PATCH 2/4] fix: use @agent-relay/config registry for model pricing Import DefaultModels from the generated CLI registry instead of hardcoding model IDs. Covers all CLIs (claude, codex, gemini, cursor, droid, opencode) with proper pricing and model normalization for reasoning effort suffixes and openai/ prefix variations. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/cost/pricing.ts | 117 ++++++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 37 deletions(-) diff --git a/src/cost/pricing.ts b/src/cost/pricing.ts index 32afcfb3b..43f0d11b1 100644 --- a/src/cost/pricing.ts +++ b/src/cost/pricing.ts @@ -1,3 +1,5 @@ +import { DefaultModels } from '@agent-relay/config'; + export interface ModelPricing { inputPer1M: number; outputPer1M: number; @@ -8,52 +10,93 @@ export interface TokenEstimate { outputTokens: number; } -// Standard per-1M token pricing for the short model aliases used by the CLI, -// plus a few full IDs already present elsewhere in the repo. +// Per-1M token pricing. Model IDs match @agent-relay/config cli-registry. +// Pricing is approximate and should be updated as providers change rates. export const MODEL_PRICING: Record = { - '2.5-pro': { inputPer1M: 1.25, outputPer1M: 10 }, - 'claude-opus-4': { inputPer1M: 15, outputPer1M: 75 }, - 'claude-opus-4-20250514': { inputPer1M: 15, outputPer1M: 75 }, - 'claude-sonnet-4': { inputPer1M: 3, outputPer1M: 15 }, - 'claude-sonnet-4-20250514': { inputPer1M: 3, outputPer1M: 15 }, - 'gemini-2.5-pro': { inputPer1M: 1.25, outputPer1M: 10 }, - o3: { inputPer1M: 1, outputPer1M: 4 }, - 'openai/o3': { inputPer1M: 1, outputPer1M: 4 }, - 'opus-4': { inputPer1M: 15, outputPer1M: 75 }, - 'sonnet-4': { inputPer1M: 3, outputPer1M: 15 }, + // ── Anthropic (Claude Code, Cursor, Droid) ────────────────────── + 'sonnet': { inputPer1M: 3, outputPer1M: 15 }, + 'opus': { inputPer1M: 15, outputPer1M: 75 }, + 'haiku': { inputPer1M: 0.80, outputPer1M: 4 }, + 'opus-4.6': { inputPer1M: 15, outputPer1M: 75 }, + 'opus-4.6-fast': { inputPer1M: 15, outputPer1M: 75 }, + 'opus-4.6-thinking': { inputPer1M: 15, outputPer1M: 75 }, + 'opus-4.5': { inputPer1M: 15, outputPer1M: 75 }, + 'opus-4.5-thinking': { inputPer1M: 15, outputPer1M: 75 }, + 'sonnet-4.6': { inputPer1M: 3, outputPer1M: 15 }, + 'sonnet-4.6-thinking': { inputPer1M: 3, outputPer1M: 15 }, + 'sonnet-4.5': { inputPer1M: 3, outputPer1M: 15 }, + 'sonnet-4.5-thinking': { inputPer1M: 3, outputPer1M: 15 }, + 'haiku-4.5': { inputPer1M: 0.80, outputPer1M: 4 }, + + // ── OpenAI (Codex, Cursor, OpenCode) ──────────────────────────── + 'gpt-5.4': { inputPer1M: 2.50, outputPer1M: 10 }, + 'gpt-5.3-codex': { inputPer1M: 2.50, outputPer1M: 10 }, + 'gpt-5.3-codex-spark': { inputPer1M: 1.50, outputPer1M: 6 }, + 'gpt-5.2-codex': { inputPer1M: 2.50, outputPer1M: 10 }, + 'gpt-5.2': { inputPer1M: 2.50, outputPer1M: 10 }, + 'gpt-5.1-codex-max': { inputPer1M: 2.50, outputPer1M: 10 }, + 'gpt-5.1-codex-mini': { inputPer1M: 0.75, outputPer1M: 3 }, + 'openai/gpt-5.2': { inputPer1M: 2.50, outputPer1M: 10 }, + 'openai/gpt-5.4': { inputPer1M: 2.50, outputPer1M: 10 }, + 'openai/o3': { inputPer1M: 1, outputPer1M: 4 }, + 'openai/o3-mini': { inputPer1M: 0.55, outputPer1M: 2.20 }, + 'openai/o4-mini': { inputPer1M: 0.55, outputPer1M: 2.20 }, + + // ── Google (Gemini CLI) ───────────────────────────────────────── + 'gemini-3.1-pro-preview': { inputPer1M: 1.25, outputPer1M: 10 }, + 'gemini-3-flash-preview': { inputPer1M: 0.15, outputPer1M: 0.60 }, + 'gemini-2.5-pro': { inputPer1M: 1.25, outputPer1M: 10 }, + 'gemini-2.5-flash': { inputPer1M: 0.15, outputPer1M: 0.60 }, + 'gemini-2.5-flash-lite': { inputPer1M: 0.075, outputPer1M: 0.30 }, + + // ── Cursor composite models ───────────────────────────────────── + 'composer-1.5': { inputPer1M: 3, outputPer1M: 15 }, + 'composer-1': { inputPer1M: 3, outputPer1M: 15 }, + + // ── Droid ─────────────────────────────────────────────────────── + 'droid-core-glm-4.7': { inputPer1M: 0.50, outputPer1M: 2 }, }; -export const CLI_DEFAULT_MODEL = { - claude: 'opus-4', - codex: 'o3', - gemini: '2.5-pro', - aider: 'sonnet-4', -} as const; +// CLI → default model mapping from the registry +export const CLI_DEFAULT_MODEL: Record = { ...DefaultModels }; +// Rough token estimation from step duration. +// Heuristic: ~200 input tokens/sec (context loading) + ~75 output tokens/sec (generation). const INPUT_TOKENS_PER_SECOND = 200; const OUTPUT_TOKENS_PER_SECOND = 75; +/** + * Normalize model ID to match MODEL_PRICING keys. + * Handles common aliases and prefix variations. + */ function normalizeModel(model: string): string { - const normalized = model.trim().toLowerCase(); - - switch (normalized) { - case 'gemini-2.5-pro': - case '2.5-pro': - return '2.5-pro'; - case 'claude-opus-4': - case 'claude-opus-4-20250514': - case 'opus-4': - return 'opus-4'; - case 'claude-sonnet-4': - case 'claude-sonnet-4-20250514': - case 'sonnet-4': - return 'sonnet-4'; - case 'openai/o3': - case 'o3': - return 'o3'; - default: - return normalized; + const m = model.trim().toLowerCase(); + + // Strip reasoning effort suffixes from Cursor model IDs + // e.g. "gpt-5.4-xhigh" → "gpt-5.4", "gpt-5.3-codex-xhigh-fast" → "gpt-5.3-codex" + const effortSuffixes = ['-xhigh-fast', '-xhigh', '-high-fast', '-high', '-medium-fast', '-medium', '-low-fast', '-low']; + for (const suffix of effortSuffixes) { + if (m.endsWith(suffix) && m.includes('gpt-')) { + const base = m.slice(0, -suffix.length); + if (MODEL_PRICING[base]) return base; + } } + + // Direct match + if (MODEL_PRICING[m]) return m; + + // Try with openai/ prefix stripped + if (m.startsWith('openai/')) { + const stripped = m.slice('openai/'.length); + if (MODEL_PRICING[stripped]) return stripped; + } + + // Try with openai/ prefix added + if (!m.includes('/') && MODEL_PRICING[`openai/${m}`]) { + return `openai/${m}`; + } + + return m; } export function estimateTokensFromDuration(durationMs: number): TokenEstimate { From 0a819ad6e400cb5b519482089dec5749eab422f0 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 31 Mar 2026 15:33:34 +0200 Subject: [PATCH 3/4] fix: update cost tracker tests for gpt-5.4 default model --- src/cost/tracker.test.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cost/tracker.test.ts b/src/cost/tracker.test.ts index 0fbba837d..d97bed861 100644 --- a/src/cost/tracker.test.ts +++ b/src/cost/tracker.test.ts @@ -75,13 +75,13 @@ describe('CostTracker', () => { stepName: 'compile', agent: 'worker-a', cli: 'codex exec', - model: 'o3', + model: 'gpt-5.4', startedAt: '2026-01-01T00:00:00.000Z', endedAt: '2026-01-01T00:00:02.000Z', durationMs: 2_000, estimatedInputTokens: 400, estimatedOutputTokens: 150, - estimatedCostUsd: 0.001, + estimatedCostUsd: 0.0025, }); expect(fsMock.mkdirSync).toHaveBeenCalledWith(path.dirname(usageFilePath), { recursive: true }); expect(files.get(usageFilePath)).toBe(`${JSON.stringify(record)}\n`); @@ -122,7 +122,7 @@ describe('CostTracker', () => { durationMs: 2_000, estimatedInputTokens: 400, estimatedOutputTokens: 150, - estimatedCostUsd: 0.001, + estimatedCostUsd: 0.0025, }); const thirdRecord = createRecord({ runId: 'run-1', @@ -132,7 +132,7 @@ describe('CostTracker', () => { durationMs: 2_000, estimatedInputTokens: 400, estimatedOutputTokens: 150, - estimatedCostUsd: 0.001, + estimatedCostUsd: 0.0025, }); files.set( @@ -147,7 +147,7 @@ describe('CostTracker', () => { expect(tracker.getRunSummary('run-1')).toEqual({ runId: 'run-1', - totalCostUsd: 0.0015, + totalCostUsd: 0.003, totalDurationMs: 3_000, steps: [firstRecord, thirdRecord], }); @@ -160,7 +160,7 @@ function createRecord(overrides: Partial = {}): StepCostRecord { stepName: 'step', agent: 'worker-a', cli: 'codex exec', - model: 'o3', + model: 'gpt-5.4', startedAt: '2026-01-01T00:00:00.000Z', endedAt: '2026-01-01T00:00:01.000Z', durationMs: 1_000, From 0bc8cd941c823be216def7b574a3d555ad99e3ef Mon Sep 17 00:00:00 2001 From: Khaliq Date: Tue, 31 Mar 2026 18:04:38 +0200 Subject: [PATCH 4/4] test: add relayfile-binary download module tests Tests platform detection, cached binary reuse, checksum verification. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/cli/commands/on/relayfile-binary.test.ts | 266 +++++++++++++++++++ src/cli/commands/on/start.ts | 21 +- 2 files changed, 270 insertions(+), 17 deletions(-) create mode 100644 src/cli/commands/on/relayfile-binary.test.ts diff --git a/src/cli/commands/on/relayfile-binary.test.ts b/src/cli/commands/on/relayfile-binary.test.ts new file mode 100644 index 000000000..54c3f1f75 --- /dev/null +++ b/src/cli/commands/on/relayfile-binary.test.ts @@ -0,0 +1,266 @@ +import { createHash } from 'node:crypto'; +import { EventEmitter } from 'node:events'; +import type { ClientRequest } from 'node:http'; +import path from 'node:path'; +import { Readable, Writable } from 'node:stream'; +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; + +const TEST_HOME = vi.hoisted(() => '/tmp/agent-relay-relayfile-binary-test-home'); +const ORIGINAL_RELAYFILE_ROOT = process.env.RELAYFILE_ROOT; +const platformMock = vi.hoisted(() => vi.fn(() => 'linux')); +const archMock = vi.hoisted(() => vi.fn(() => 'x64')); +const homedirMock = vi.hoisted(() => vi.fn(() => '/tmp/agent-relay-relayfile-binary-test-home')); +const httpsGetMock = vi.hoisted(() => vi.fn()); +const fsMocks = vi.hoisted(() => ({ + accessSync: vi.fn(), + chmodSync: vi.fn(), + createWriteStream: vi.fn(), + existsSync: vi.fn(), + mkdirSync: vi.fn(), + readFileSync: vi.fn(), + renameSync: vi.fn(), + rmSync: vi.fn(), + writeFileSync: vi.fn(), +})); + +vi.mock('node:os', async () => { + const actual = await vi.importActual('node:os'); + return { + ...actual, + arch: archMock, + homedir: homedirMock, + platform: platformMock, + default: { + ...actual, + arch: archMock, + homedir: homedirMock, + platform: platformMock, + }, + }; +}); + +vi.mock('node:https', async () => { + const actual = await vi.importActual('node:https'); + return { + ...actual, + get: httpsGetMock, + default: { + ...actual, + get: httpsGetMock, + }, + }; +}); + +vi.mock('node:fs', async () => { + const actual = await vi.importActual('node:fs'); + return { + ...actual, + accessSync: fsMocks.accessSync, + chmodSync: fsMocks.chmodSync, + createWriteStream: fsMocks.createWriteStream, + existsSync: fsMocks.existsSync, + mkdirSync: fsMocks.mkdirSync, + readFileSync: fsMocks.readFileSync, + renameSync: fsMocks.renameSync, + rmSync: fsMocks.rmSync, + writeFileSync: fsMocks.writeFileSync, + }; +}); + +import { ensureRelayfileMountBinary } from './relayfile-binary.js'; + +type QueuedResponse = { + body?: Buffer | string; + headers?: Record; + statusCode?: number; + url?: RegExp | string; +}; + +let realFs: typeof import('node:fs'); +let requestedUrls: string[] = []; +let queuedResponses: QueuedResponse[] = []; + +function getCachePaths(relayfileRoot?: string) { + const cacheDir = relayfileRoot ? path.join(relayfileRoot, 'bin') : path.join(TEST_HOME, '.agent-relay', 'bin'); + return { + cacheDir, + cachePath: path.join(cacheDir, 'relayfile-mount'), + versionPath: path.join(cacheDir, 'relayfile-mount.version'), + }; +} + +function queueResponse(response: QueuedResponse): void { + queuedResponses.push(response); +} + +function sha256(value: Buffer | string): string { + return createHash('sha256').update(value).digest('hex'); +} + +beforeAll(async () => { + realFs = await vi.importActual('node:fs'); +}); + +beforeEach(() => { + requestedUrls = []; + queuedResponses = []; + realFs.rmSync(TEST_HOME, { recursive: true, force: true }); + realFs.mkdirSync(TEST_HOME, { recursive: true }); + delete process.env.RELAYFILE_ROOT; + + platformMock.mockReset(); + archMock.mockReset(); + homedirMock.mockReset(); + httpsGetMock.mockReset(); + Object.values(fsMocks).forEach((mock) => mock.mockReset()); + + platformMock.mockReturnValue('linux'); + archMock.mockReturnValue('x64'); + homedirMock.mockReturnValue(TEST_HOME); + + fsMocks.accessSync.mockImplementation(realFs.accessSync as any); + fsMocks.chmodSync.mockImplementation(realFs.chmodSync as any); + fsMocks.createWriteStream.mockImplementation((filePath: string, options?: { mode?: number }) => { + const chunks: Buffer[] = []; + const stream = new Writable({ + final(callback) { + realFs.writeFileSync(filePath, Buffer.concat(chunks), { mode: options?.mode }); + callback(); + }, + write(chunk, _encoding, callback) { + chunks.push(Buffer.isBuffer(chunk) ? Buffer.from(chunk) : Buffer.from(chunk)); + callback(); + }, + }) as Writable & { close: (callback: () => void) => void }; + + stream.close = (callback: () => void) => { + callback(); + }; + + return stream as any; + }); + fsMocks.existsSync.mockImplementation(realFs.existsSync as any); + fsMocks.mkdirSync.mockImplementation(realFs.mkdirSync as any); + fsMocks.readFileSync.mockImplementation(realFs.readFileSync as any); + fsMocks.renameSync.mockImplementation(realFs.renameSync as any); + fsMocks.rmSync.mockImplementation(realFs.rmSync as any); + fsMocks.writeFileSync.mockImplementation(realFs.writeFileSync as any); + + httpsGetMock.mockImplementation((url: string | URL, callback: (res: Readable) => void) => { + const currentUrl = String(url); + requestedUrls.push(currentUrl); + + const nextResponse = queuedResponses.shift(); + if (!nextResponse) { + throw new Error(`Unexpected https.get call for ${currentUrl}`); + } + + if (typeof nextResponse.url === 'string') { + expect(currentUrl).toBe(nextResponse.url); + } else if (nextResponse.url) { + expect(currentUrl).toMatch(nextResponse.url); + } + + const response = Readable.from(nextResponse.body === undefined ? [] : [nextResponse.body]) as Readable & { + headers: Record; + statusCode?: number; + }; + response.statusCode = nextResponse.statusCode ?? 200; + response.headers = nextResponse.headers ?? {}; + + const request = new EventEmitter() as ClientRequest; + queueMicrotask(() => { + callback(response); + }); + + return request; + }); +}); + +afterEach(() => { + realFs.rmSync(TEST_HOME, { recursive: true, force: true }); + if (ORIGINAL_RELAYFILE_ROOT === undefined) { + delete process.env.RELAYFILE_ROOT; + } else { + process.env.RELAYFILE_ROOT = ORIGINAL_RELAYFILE_ROOT; + } +}); + +describe('ensureRelayfileMountBinary', () => { + it('downloads the platform-specific binary and writes it to the cache', async () => { + const binaryName = 'relayfile-mount-linux-amd64'; + queueResponse({ + body: 'relayfile-binary', + url: /\/relayfile-mount-linux-amd64$/, + }); + queueResponse({ + body: `${sha256('relayfile-binary')} ${binaryName}\n`, + url: /\/checksums\.txt$/, + }); + + const installedPath = await ensureRelayfileMountBinary(); + const { cachePath, versionPath } = getCachePaths(); + + expect(installedPath).toBe(cachePath); + expect(requestedUrls).toHaveLength(2); + expect(requestedUrls[0]).toMatch(/\/relayfile-mount-linux-amd64$/); + expect(requestedUrls[1]).toMatch(/\/checksums\.txt$/); + expect(realFs.readFileSync(cachePath, 'utf8')).toBe('relayfile-binary'); + expect(realFs.readFileSync(versionPath, 'utf8')).toBe('0.1.6\n'); + }); + + it('reuses the cached binary when the version matches', async () => { + const { cacheDir, cachePath, versionPath } = getCachePaths(); + realFs.mkdirSync(cacheDir, { recursive: true }); + realFs.writeFileSync(cachePath, 'cached-binary', 'utf8'); + realFs.chmodSync(cachePath, 0o755); + realFs.writeFileSync(versionPath, '0.1.6\n', 'utf8'); + + await expect(ensureRelayfileMountBinary()).resolves.toBe(cachePath); + expect(httpsGetMock).not.toHaveBeenCalled(); + expect(realFs.readFileSync(cachePath, 'utf8')).toBe('cached-binary'); + }); + + it('installs the binary under RELAYFILE_ROOT/bin when overridden', async () => { + const relayfileRoot = path.join(TEST_HOME, 'custom-relayfile'); + const binaryName = 'relayfile-mount-linux-amd64'; + process.env.RELAYFILE_ROOT = relayfileRoot; + queueResponse({ + body: 'relayfile-binary', + url: /\/relayfile-mount-linux-amd64$/, + }); + queueResponse({ + body: `${sha256('relayfile-binary')} ${binaryName}\n`, + url: /\/checksums\.txt$/, + }); + + const installedPath = await ensureRelayfileMountBinary(); + const { cachePath, versionPath } = getCachePaths(relayfileRoot); + + expect(installedPath).toBe(cachePath); + expect(realFs.readFileSync(cachePath, 'utf8')).toBe('relayfile-binary'); + expect(realFs.readFileSync(versionPath, 'utf8')).toBe('0.1.6\n'); + expect(realFs.existsSync(getCachePaths().cachePath)).toBe(false); + }); + + it('throws when the downloaded binary checksum does not match', async () => { + const binaryName = 'relayfile-mount-linux-amd64'; + const { cacheDir, cachePath, versionPath } = getCachePaths(); + queueResponse({ + body: 'corrupt-binary', + url: /\/relayfile-mount-linux-amd64$/, + }); + queueResponse({ + body: `${'0'.repeat(64)} ${binaryName}\n`, + url: /\/checksums\.txt$/, + }); + + await expect(ensureRelayfileMountBinary()).rejects.toThrow( + `Checksum mismatch for ${binaryName}: expected ${'0'.repeat(64)}, got ${sha256('corrupt-binary')}` + ); + + expect(realFs.existsSync(cachePath)).toBe(false); + expect(realFs.existsSync(versionPath)).toBe(false); + expect(realFs.existsSync(cacheDir) ? realFs.readdirSync(cacheDir).filter((entry) => entry.includes('.download')) : []).toEqual([]); + }); +}); diff --git a/src/cli/commands/on/start.ts b/src/cli/commands/on/start.ts index 75a61435f..66885ea58 100644 --- a/src/cli/commands/on/start.ts +++ b/src/cli/commands/on/start.ts @@ -15,6 +15,7 @@ import { } from 'node:fs'; import path from 'node:path'; import { parse as parseYaml } from 'yaml'; +import { ensureRelayfileMountBinary } from './relayfile-binary.js'; import { mintToken } from './token.js'; import { seedWorkspace as seedWorkspaceFiles } from './workspace.js'; import { ensureAuthenticated } from '@agent-relay/cloud'; @@ -666,21 +667,6 @@ function resolveConfig(projectDir: string, relayDir: string, requestedAgent?: st return writeGeneratedZeroConfig(generatedPath, projectDir, requestedAgent); } -function resolveRelayfileRoot(projectDir: string): string { - const candidates = [ - process.env.RELAYFILE_ROOT, - path.resolve(projectDir, '..', 'relayfile'), - path.resolve(projectDir, '..', '..', 'relayfile'), - path.resolve(process.cwd(), '..', 'relayfile'), - ].filter((value): value is string => !!value); - - for (const candidate of candidates) { - const mountBin = path.join(candidate, 'bin', 'relayfile-mount'); - if (existsSync(mountBin)) return candidate; - } - return candidates[0] ?? path.resolve(projectDir, 'relayfile'); -} - function isCommandAvailable(command: string): boolean { const checker = process.platform === 'win32' ? 'where' : 'sh'; const args = process.platform === 'win32' ? [command] : ['-lc', `command -v "${command}" >/dev/null 2>&1`]; @@ -1196,8 +1182,9 @@ export async function goOnTheRelay( const agent = findAgentConfig(config, defaultAgentName); const authBase = normalizeBaseUrl(options.portAuth); const fileBase = normalizeBaseUrl(options.portFile); - const relayfileRoot = resolveRelayfileRoot(projectDir); - const mountBin = path.join(relayfileRoot, 'bin', 'relayfile-mount'); + const mountBin = process.env.RELAYFILE_ROOT + ? path.join(process.env.RELAYFILE_ROOT, 'bin', 'relayfile-mount') + : await ensureRelayfileMountBinary(); if (!existsSync(mountBin)) { throw new Error(`missing relayfile mount binary: ${mountBin}`);