Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions __tests__/strip-unsupported-params.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,40 @@ describe('thinking stripping for Haiku models', () => {
});
});

describe('effort / output_config.effort stripping for Haiku models', () => {
it('strips top-level effort when model is Haiku', () => {
const content = getDistContent();
// The stripping logic must check isHaikuModel and reference effort
expect(content).toMatch(/isHaikuModel.*effort|effort.*isHaikuModel/s);
});

it('strips output_config.effort when model is Haiku', () => {
const content = getDistContent();
expect(content).toContain('output_config');
// Must reference effort inside output_config handling near isHaikuModel
expect(content).toMatch(/output_config.*effort.*isHaikuModel|isHaikuModel[\s\S]*?output_config[\s\S]*?effort/);
});

it('logs when output_config.effort is stripped', () => {
const content = getDistContent();
expect(content).toContain('output_config.effort');
expect(content).toContain('Haiku does not support effort');
});
});

describe('context-1m beta header stripping for non-Opus models', () => {
it('strips context-1m for non-Opus models (not just Sonnet)', () => {
const content = getDistContent();
// The condition must use !...includes("opus"), not includes("sonnet")
expect(content).toMatch(/!targetModel\.includes\(['"]opus['"]\).*context-1m/);
});

it('strips context-1m in cascade handler for non-Opus models', () => {
const content = getDistContent();
expect(content).toMatch(/!resolved\.model\.includes\(['"]opus['"]\).*context-1m/);
});
});

describe('OAT beta flag stripping in header builders', () => {
it('filters OAT_UNSUPPORTED_BETA_FLAGS in buildAnthropicHeadersWithAuth', () => {
const content = getDistContent();
Expand Down
68 changes: 56 additions & 12 deletions src/standalone-proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5360,11 +5360,10 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
}
}

// Strip 1M context beta header when routing to Sonnet.
// Sonnet 1M requires "extra usage" on Max plan; without it Anthropic rejects the request.
// Opus 1M is included in Max plan. Stripping the beta falls back to Sonnet's 200K window.
// Strip 1M context beta header when routing to non-Opus models.
// Only Opus supports 1M context on Max plan; Sonnet/Haiku reject the beta header.
let effectiveCtx = ctx;
if (targetModel.includes('sonnet') && ctx.betaHeaders?.includes('context-1m')) {
if (!targetModel.includes('opus') && ctx.betaHeaders?.includes('context-1m')) {
effectiveCtx = {
...ctx,
betaHeaders: ctx.betaHeaders
Expand All @@ -5373,7 +5372,7 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
.filter(b => !b.startsWith('context-1m'))
.join(',') || undefined,
};
log(`Stripped 1M context beta from Sonnet request (requires extra usage on Max plan)`);
log(`Stripped 1M context beta from ${targetModel} request (only Opus supports 1M context)`);
}

if (
Expand Down Expand Up @@ -5571,22 +5570,53 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
_strippedThinking = true;
log(`Stripped thinking from request (${resolved.model} does not support extended thinking, originally requested: ${requestedModel})`);
}
if (isHaikuModel(resolved.model)) {
if ('effort' in attemptBody) {
delete attemptBody.effort;
log(`Stripped effort param from ${resolved.model} request (Haiku does not support effort)`);
}
// Copy-on-write: don't mutate the shared nested object so cascade
// escalation to Sonnet/Opus still sees the original output_config.
const cascOutputConfig = attemptBody['output_config'] as Record<string, unknown> | undefined;
if (cascOutputConfig && 'effort' in cascOutputConfig) {
const { effort: _e, ...restConfig } = cascOutputConfig;
if (Object.keys(restConfig).length === 0) {
delete attemptBody['output_config'];
} else {
attemptBody['output_config'] = restConfig;
}
log(`Stripped output_config.effort from ${resolved.model} cascade request (Haiku does not support effort)`);
}
}
// Strip 1M context beta for non-Opus models in cascade
let cascadeCtx = ctx;
if (!resolved.model.includes('opus') && ctx.betaHeaders?.includes('context-1m')) {
cascadeCtx = {
...ctx,
betaHeaders: ctx.betaHeaders
.split(',')
.map(b => b.trim())
.filter(b => !b.startsWith('context-1m'))
.join(',') || undefined,
};
log(`Stripped 1M context beta from ${resolved.model} cascade request (only Opus supports 1M context)`);
}
// Hybrid auth: use MAX token for Opus models, API key for others
const modelAuth = getAuthForModel(resolved.model, proxyConfig.auth, useAnthropicEnvKey);
if (modelAuth.isMax) {
log(`Using MAX token for ${resolved.model}`);
}
// Log OAT beta flag stripping if applicable
const cascadeEffectiveToken = ctx.authHeader?.replace(/^Bearer\s+/i, '') ?? ctx.apiKeyHeader ?? modelAuth.apiKey ?? '';
const cascadeLocalStrippedBeta = cascadeEffectiveToken.startsWith('sk-ant-oat') && ctx.betaHeaders
? ctx.betaHeaders.split(',').map(b => b.trim()).filter(b => OAT_UNSUPPORTED_BETA_FLAGS.has(b))
const cascadeEffectiveToken = cascadeCtx.authHeader?.replace(/^Bearer\s+/i, '') ?? cascadeCtx.apiKeyHeader ?? modelAuth.apiKey ?? '';
const cascadeLocalStrippedBeta = cascadeEffectiveToken.startsWith('sk-ant-oat') && cascadeCtx.betaHeaders
? cascadeCtx.betaHeaders.split(',').map(b => b.trim()).filter(b => OAT_UNSUPPORTED_BETA_FLAGS.has(b))
: [];
if (cascadeLocalStrippedBeta.length > 0) {
_strippedBetaFlags = cascadeLocalStrippedBeta;
log(`Stripped OAT-unsupported beta flags from request: ${cascadeLocalStrippedBeta.join(', ')}`);
}
const isCascadeRerouted = resolved.model !== originalModel;
const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted);
const providerResponse = await forwardNativeAnthropicRequest(attemptBody, cascadeCtx, modelAuth.apiKey, modelAuth.isMax, isCascadeRerouted);
const responseData = (await providerResponse.json()) as Record<string, unknown>;
if (!providerResponse.ok) {
if (proxyConfig.reliability?.cooldowns?.enabled) {
Expand Down Expand Up @@ -5643,6 +5673,20 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
_strippedThinking = true;
log(`Stripped thinking from request (${finalModel} does not support extended thinking, originally requested: ${requestedModel})`);
}
if (isHaikuModel(finalModel)) {
if ('effort' in _nativeReqBody) {
delete _nativeReqBody.effort;
log(`Stripped effort param from ${finalModel} request (Haiku does not support effort)`);
}
const outputConfig = _nativeReqBody['output_config'] as Record<string, unknown> | undefined;
if (outputConfig && 'effort' in outputConfig) {
delete outputConfig.effort;
if (Object.keys(outputConfig).length === 0) {
delete _nativeReqBody['output_config'];
}
log(`Stripped output_config.effort from ${finalModel} request (Haiku does not support effort)`);
}
}

// Log OAT beta flag stripping if applicable
const _nativeEffectiveToken = _poolSelectedToken
Expand Down Expand Up @@ -6480,9 +6524,9 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
}
}

// Strip 1M context beta header when routing to Sonnet (same as native handler above)
// Strip 1M context beta header when routing to non-Opus models (same as native handler above)
let effectiveCtx = ctx;
if (targetModel.includes('sonnet') && ctx.betaHeaders?.includes('context-1m')) {
if (!targetModel.includes('opus') && ctx.betaHeaders?.includes('context-1m')) {
effectiveCtx = {
...ctx,
betaHeaders: ctx.betaHeaders
Expand All @@ -6491,7 +6535,7 @@ export async function startProxy(config: ProxyConfig = {}): Promise<http.Server>
.filter(b => !b.startsWith('context-1m'))
.join(',') || undefined,
};
log(`Stripped 1M context beta from Sonnet request (requires extra usage on Max plan)`);
log(`Stripped 1M context beta from ${targetModel} request (only Opus supports 1M context)`);
}

// ── Ollama routing: intercept before cloud dispatch ──
Expand Down