From 70cc240a91485452ef892b051f66785282f3b4b8 Mon Sep 17 00:00:00 2001 From: Vadym Date: Sun, 5 Apr 2026 07:11:42 +0000 Subject: [PATCH] fix(routing): strip unsupported params when routing to Haiku Requests originating from Opus (with 1M context, effort, thinking) were failing with 400 errors when the proxy routed them to Haiku. Three issues fixed: - context-1m beta header: only stripped for Sonnet, now stripped for all non-Opus models (Haiku rejects it too) - output_config.effort: Claude Code sends {"effort":"high"} inside output_config which Haiku does not support. Strip it in both native and cascade handlers. - cascade handler: was passing raw ctx (with context-1m) instead of stripped ctx to forwardNativeAnthropicRequest. Also uses copy-on-write for output_config so cascade escalation to Sonnet/Opus retains effort. Co-Authored-By: Claude Opus 4.6 (1M context) --- __tests__/strip-unsupported-params.test.ts | 34 +++++++++++ src/standalone-proxy.ts | 68 ++++++++++++++++++---- 2 files changed, 90 insertions(+), 12 deletions(-) diff --git a/__tests__/strip-unsupported-params.test.ts b/__tests__/strip-unsupported-params.test.ts index 0ad9fef..2c77fca 100644 --- a/__tests__/strip-unsupported-params.test.ts +++ b/__tests__/strip-unsupported-params.test.ts @@ -67,6 +67,40 @@ describe('thinking stripping for Haiku models', () => { }); }); +describe('effort / output_config.effort stripping for Haiku models', () => { + it('strips top-level effort when model is Haiku', () => { + const content = getDistContent(); + // The stripping logic must check isHaikuModel and reference effort + expect(content).toMatch(/isHaikuModel.*effort|effort.*isHaikuModel/s); + }); + + it('strips output_config.effort when model is Haiku', () => { + const content = getDistContent(); + expect(content).toContain('output_config'); + // Must reference effort inside output_config handling near isHaikuModel + expect(content).toMatch(/output_config.*effort.*isHaikuModel|isHaikuModel[\s\S]*?output_config[\s\S]*?effort/); + }); + + it('logs when output_config.effort is stripped', () => { + const content = getDistContent(); + expect(content).toContain('output_config.effort'); + expect(content).toContain('Haiku does not support effort'); + }); +}); + +describe('context-1m beta header stripping for non-Opus models', () => { + it('strips context-1m for non-Opus models (not just Sonnet)', () => { + const content = getDistContent(); + // The condition must use !...includes("opus"), not includes("sonnet") + expect(content).toMatch(/!targetModel\.includes\(['"]opus['"]\).*context-1m/); + }); + + it('strips context-1m in cascade handler for non-Opus models', () => { + const content = getDistContent(); + expect(content).toMatch(/!resolved\.model\.includes\(['"]opus['"]\).*context-1m/); + }); +}); + describe('OAT beta flag stripping in header builders', () => { it('filters OAT_UNSUPPORTED_BETA_FLAGS in buildAnthropicHeadersWithAuth', () => { const content = getDistContent(); diff --git a/src/standalone-proxy.ts b/src/standalone-proxy.ts index 31f9ff7..263db4a 100644 --- a/src/standalone-proxy.ts +++ b/src/standalone-proxy.ts @@ -5360,11 +5360,10 @@ export async function startProxy(config: ProxyConfig = {}): Promise } } - // Strip 1M context beta header when routing to Sonnet. - // Sonnet 1M requires "extra usage" on Max plan; without it Anthropic rejects the request. - // Opus 1M is included in Max plan. Stripping the beta falls back to Sonnet's 200K window. + // Strip 1M context beta header when routing to non-Opus models. + // Only Opus supports 1M context on Max plan; Sonnet/Haiku reject the beta header. let effectiveCtx = ctx; - if (targetModel.includes('sonnet') && ctx.betaHeaders?.includes('context-1m')) { + if (!targetModel.includes('opus') && ctx.betaHeaders?.includes('context-1m')) { effectiveCtx = { ...ctx, betaHeaders: ctx.betaHeaders @@ -5373,7 +5372,7 @@ export async function startProxy(config: ProxyConfig = {}): Promise .filter(b => !b.startsWith('context-1m')) .join(',') || undefined, }; - log(`Stripped 1M context beta from Sonnet request (requires extra usage on Max plan)`); + log(`Stripped 1M context beta from ${targetModel} request (only Opus supports 1M context)`); } if ( @@ -5571,22 +5570,53 @@ export async function startProxy(config: ProxyConfig = {}): Promise _strippedThinking = true; log(`Stripped thinking from request (${resolved.model} does not support extended thinking, originally requested: ${requestedModel})`); } + if (isHaikuModel(resolved.model)) { + if ('effort' in attemptBody) { + delete attemptBody.effort; + log(`Stripped effort param from ${resolved.model} request (Haiku does not support effort)`); + } + // Copy-on-write: don't mutate the shared nested object so cascade + // escalation to Sonnet/Opus still sees the original output_config. + const cascOutputConfig = attemptBody['output_config'] as Record | undefined; + if (cascOutputConfig && 'effort' in cascOutputConfig) { + const { effort: _e, ...restConfig } = cascOutputConfig; + if (Object.keys(restConfig).length === 0) { + delete attemptBody['output_config']; + } else { + attemptBody['output_config'] = restConfig; + } + log(`Stripped output_config.effort from ${resolved.model} cascade request (Haiku does not support effort)`); + } + } + // Strip 1M context beta for non-Opus models in cascade + let cascadeCtx = ctx; + if (!resolved.model.includes('opus') && ctx.betaHeaders?.includes('context-1m')) { + cascadeCtx = { + ...ctx, + betaHeaders: ctx.betaHeaders + .split(',') + .map(b => b.trim()) + .filter(b => !b.startsWith('context-1m')) + .join(',') || undefined, + }; + log(`Stripped 1M context beta from ${resolved.model} cascade request (only Opus supports 1M context)`); + } // Hybrid auth: use MAX token for Opus models, API key for others const modelAuth = getAuthForModel(resolved.model, proxyConfig.auth, useAnthropicEnvKey); if (modelAuth.isMax) { log(`Using MAX token for ${resolved.model}`); } // Log OAT beta flag stripping if applicable - const cascadeEffectiveToken = ctx.authHeader?.replace(/^Bearer\s+/i, '') ?? ctx.apiKeyHeader ?? modelAuth.apiKey ?? ''; - const cascadeLocalStrippedBeta = cascadeEffectiveToken.startsWith('sk-ant-oat') && ctx.betaHeaders - ? ctx.betaHeaders.split(',').map(b => b.trim()).filter(b => OAT_UNSUPPORTED_BETA_FLAGS.has(b)) + const cascadeEffectiveToken = cascadeCtx.authHeader?.replace(/^Bearer\s+/i, '') ?? cascadeCtx.apiKeyHeader ?? modelAuth.apiKey ?? ''; + const cascadeLocalStrippedBeta = cascadeEffectiveToken.startsWith('sk-ant-oat') && cascadeCtx.betaHeaders + ? cascadeCtx.betaHeaders.split(',').map(b => b.trim()).filter(b => OAT_UNSUPPORTED_BETA_FLAGS.has(b)) : []; if (cascadeLocalStrippedBeta.length > 0) { _strippedBetaFlags = cascadeLocalStrippedBeta; log(`Stripped OAT-unsupported beta flags from request: ${cascadeLocalStrippedBeta.join(', ')}`); } const isCascadeRerouted = resolved.model !== originalModel; - const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted); + const providerResponse = await forwardNativeAnthropicRequest(attemptBody, cascadeCtx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted); const responseData = (await providerResponse.json()) as Record; if (!providerResponse.ok) { if (proxyConfig.reliability?.cooldowns?.enabled) { @@ -5643,6 +5673,20 @@ export async function startProxy(config: ProxyConfig = {}): Promise _strippedThinking = true; log(`Stripped thinking from request (${finalModel} does not support extended thinking, originally requested: ${requestedModel})`); } + if (isHaikuModel(finalModel)) { + if ('effort' in _nativeReqBody) { + delete _nativeReqBody.effort; + log(`Stripped effort param from ${finalModel} request (Haiku does not support effort)`); + } + const outputConfig = _nativeReqBody['output_config'] as Record | undefined; + if (outputConfig && 'effort' in outputConfig) { + delete outputConfig.effort; + if (Object.keys(outputConfig).length === 0) { + delete _nativeReqBody['output_config']; + } + log(`Stripped output_config.effort from ${finalModel} request (Haiku does not support effort)`); + } + } // Log OAT beta flag stripping if applicable const _nativeEffectiveToken = _poolSelectedToken @@ -6480,9 +6524,9 @@ export async function startProxy(config: ProxyConfig = {}): Promise } } - // Strip 1M context beta header when routing to Sonnet (same as native handler above) + // Strip 1M context beta header when routing to non-Opus models (same as native handler above) let effectiveCtx = ctx; - if (targetModel.includes('sonnet') && ctx.betaHeaders?.includes('context-1m')) { + if (!targetModel.includes('opus') && ctx.betaHeaders?.includes('context-1m')) { effectiveCtx = { ...ctx, betaHeaders: ctx.betaHeaders @@ -6491,7 +6535,7 @@ export async function startProxy(config: ProxyConfig = {}): Promise .filter(b => !b.startsWith('context-1m')) .join(',') || undefined, }; - log(`Stripped 1M context beta from Sonnet request (requires extra usage on Max plan)`); + log(`Stripped 1M context beta from ${targetModel} request (only Opus supports 1M context)`); } // ── Ollama routing: intercept before cloud dispatch ──