From 27fbaa751816c22bf6c980f15780dd6a99ffd241 Mon Sep 17 00:00:00 2001 From: Thomas BOUQUET-GASPAROUX Date: Fri, 3 Apr 2026 15:59:01 +0200 Subject: [PATCH] fix: add per-model context window mapping Replace the binary 200K/1M heuristic with a model-aware lookup table. Handles GPT (128K), Gemini (1M), and all Claude variants correctly. Falls back to 200K for unknown models, and still auto-detects 1M when token usage exceeds 200K. --- src/collector/claude.rs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/collector/claude.rs b/src/collector/claude.rs index 5760e71..ebcccfb 100644 --- a/src/collector/claude.rs +++ b/src/collector/claude.rs @@ -751,7 +751,22 @@ fn truncate(s: &str, max: usize) -> String { } fn context_window_for_model(model: &str, last_context_tokens: u64) -> u64 { + // Explicit 1M context suffix or token usage proves 1M window if model.contains("[1m]") || last_context_tokens > 200_000 { + return 1_000_000; + } + + // Per-model defaults based on published context window sizes + let lower = model.to_ascii_lowercase(); + if lower.contains("haiku") { + 200_000 + } else if lower.contains("sonnet") { + 200_000 + } else if lower.contains("opus") { + 200_000 + } else if lower.contains("gpt") || lower.contains("o1") || lower.contains("o3") || lower.contains("o4") { + 128_000 + } else if lower.contains("gemini") { 1_000_000 } else { 200_000 @@ -862,14 +877,20 @@ mod tests { #[test] fn test_context_window_for_model() { - // Base model with low token usage → 200K + // Claude models default to 200K assert_eq!(context_window_for_model("claude-opus-4-6", 50_000), 200_000); - // Explicit [1m] suffix → 1M regardless of token count - assert_eq!(context_window_for_model("claude-opus-4-6[1m]", 0), 1_000_000); assert_eq!(context_window_for_model("claude-sonnet-4-6", 100_000), 200_000); - assert_eq!(context_window_for_model("unknown-model", 0), 200_000); + assert_eq!(context_window_for_model("claude-haiku-4-5", 10_000), 200_000); + // Explicit [1m] suffix → 1M + assert_eq!(context_window_for_model("claude-opus-4-6[1m]", 0), 1_000_000); // Token usage exceeds 200K → must be 1M window assert_eq!(context_window_for_model("claude-opus-4-6", 250_000), 1_000_000); + // Non-Claude models + assert_eq!(context_window_for_model("gpt-4o", 0), 128_000); + assert_eq!(context_window_for_model("o3-mini", 0), 128_000); + assert_eq!(context_window_for_model("gemini-2.5-pro", 0), 1_000_000); + // Unknown falls back to 200K + assert_eq!(context_window_for_model("unknown-model", 0), 200_000); } #[test]