From 27fbaa751816c22bf6c980f15780dd6a99ffd241 Mon Sep 17 00:00:00 2001
From: Thomas BOUQUET-GASPAROUX <github.com@mail.bouquet-gasparoux.com>
Date: Fri, 3 Apr 2026 15:59:01 +0200
Subject: [PATCH] fix: add per-model context window mapping

Replace the binary 200K/1M heuristic with a model-aware lookup
table. Handles GPT (128K), Gemini (1M), and all Claude variants
correctly. Falls back to 200K for unknown models, and still
auto-detects 1M when token usage exceeds 200K.
---
 src/collector/claude.rs | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/collector/claude.rs b/src/collector/claude.rs
index 5760e71..ebcccfb 100644
--- a/src/collector/claude.rs
+++ b/src/collector/claude.rs
@@ -751,7 +751,22 @@ fn truncate(s: &str, max: usize) -> String {
 }
 
 fn context_window_for_model(model: &str, last_context_tokens: u64) -> u64 {
+    // Explicit 1M context suffix or token usage proves 1M window
     if model.contains("[1m]") || last_context_tokens > 200_000 {
+        return 1_000_000;
+    }
+
+    // Per-model defaults based on published context window sizes
+    let lower = model.to_ascii_lowercase();
+    if lower.contains("haiku") {
+        200_000
+    } else if lower.contains("sonnet") {
+        200_000
+    } else if lower.contains("opus") {
+        200_000
+    } else if lower.contains("gpt") || lower.contains("o1") || lower.contains("o3") || lower.contains("o4") {
+        128_000
+    } else if lower.contains("gemini") {
         1_000_000
     } else {
         200_000
@@ -862,14 +877,20 @@ mod tests {
 
     #[test]
     fn test_context_window_for_model() {
-        // Base model with low token usage → 200K
+        // Claude models default to 200K
         assert_eq!(context_window_for_model("claude-opus-4-6", 50_000), 200_000);
-        // Explicit [1m] suffix → 1M regardless of token count
-        assert_eq!(context_window_for_model("claude-opus-4-6[1m]", 0), 1_000_000);
         assert_eq!(context_window_for_model("claude-sonnet-4-6", 100_000), 200_000);
-        assert_eq!(context_window_for_model("unknown-model", 0), 200_000);
+        assert_eq!(context_window_for_model("claude-haiku-4-5", 10_000), 200_000);
+        // Explicit [1m] suffix → 1M
+        assert_eq!(context_window_for_model("claude-opus-4-6[1m]", 0), 1_000_000);
         // Token usage exceeds 200K → must be 1M window
         assert_eq!(context_window_for_model("claude-opus-4-6", 250_000), 1_000_000);
+        // Non-Claude models
+        assert_eq!(context_window_for_model("gpt-4o", 0), 128_000);
+        assert_eq!(context_window_for_model("o3-mini", 0), 128_000);
+        assert_eq!(context_window_for_model("gemini-2.5-pro", 0), 1_000_000);
+        // Unknown falls back to 200K
+        assert_eq!(context_window_for_model("unknown-model", 0), 200_000);
     }
 
     #[test]