From 331d70e8ca68451ed79434406d02b245cd9d4b3e Mon Sep 17 00:00:00 2001
From: vl3c <95963142+vl3c@users.noreply.github.com>
Date: Mon, 16 Feb 2026 22:24:30 +0200
Subject: [PATCH] Fix tool search failing with reasoning models

Reasoning models (o3, o4-mini, gpt-5*) reject the max_tokens parameter
in the Chat Completions API and require max_completion_tokens instead.
When the user's active model is a reasoning model, the tool search
service inherits that model and the API call fails with a 400 error,
preventing tool discovery entirely.

Tool search is a lightweight classification task that doesn't benefit
from reasoning. For OpenAI reasoning models, automatically downgrade
to gpt-4.1-mini which is cheaper, faster, and uses the standard
max_tokens parameter without issues.
---
 static/tool_search_service.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/static/tool_search_service.py b/static/tool_search_service.py
index f23cd943..c67864c8 100644
--- a/static/tool_search_service.py
+++ b/static/tool_search_service.py
@@ -171,9 +171,15 @@ def search_tools(
         # Clamp max_results to valid range
         max_results = max(1, min(20, max_results))
 
-        # Use provided model, instance default, or fallback to gpt-4.1-mini
+        # Use provided model, instance default, or fallback to gpt-4.1-mini.
+        # Tool search is a lightweight classification task — reasoning models
+        # are overkill and their max_completion_tokens budget includes internal
+        # reasoning tokens, so 500 tokens may not leave enough room for output.
+        # For OpenAI reasoning models, downgrade to gpt-4.1-mini automatically.
         if model is None:
             model = self.default_model or AIModel.from_identifier("gpt-4.1-mini")
+        if getattr(model, "is_reasoning_model", False) and getattr(model, "provider", "") == "openai":
+            model = AIModel.from_identifier("gpt-4.1-mini")
 
         # Build the prompt
         tool_descriptions = self.build_tool_descriptions()