From 331d70e8ca68451ed79434406d02b245cd9d4b3e Mon Sep 17 00:00:00 2001 From: vl3c <95963142+vl3c@users.noreply.github.com> Date: Mon, 16 Feb 2026 22:24:30 +0200 Subject: [PATCH] Fix tool search failing with reasoning models Reasoning models (o3, o4-mini, gpt-5*) reject the max_tokens parameter in the Chat Completions API and require max_completion_tokens instead. When the user's active model is a reasoning model, the tool search service inherits that model and the API call fails with a 400 error, preventing tool discovery entirely. Tool search is a lightweight classification task that doesn't benefit from reasoning. For OpenAI reasoning models, automatically downgrade to gpt-4.1-mini which is cheaper, faster, and uses the standard max_tokens parameter without issues. --- static/tool_search_service.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/static/tool_search_service.py b/static/tool_search_service.py index f23cd943..c67864c8 100644 --- a/static/tool_search_service.py +++ b/static/tool_search_service.py @@ -171,9 +171,15 @@ def search_tools( # Clamp max_results to valid range max_results = max(1, min(20, max_results)) - # Use provided model, instance default, or fallback to gpt-4.1-mini + # Use provided model, instance default, or fallback to gpt-4.1-mini. + # Tool search is a lightweight classification task — reasoning models + # are overkill and their max_completion_tokens budget includes internal + # reasoning tokens, so 500 tokens may not leave enough room for output. + # For OpenAI reasoning models, downgrade to gpt-4.1-mini automatically. if model is None: model = self.default_model or AIModel.from_identifier("gpt-4.1-mini") + if getattr(model, "is_reasoning_model", False) and getattr(model, "provider", "") == "openai": + model = AIModel.from_identifier("gpt-4.1-mini") # Build the prompt tool_descriptions = self.build_tool_descriptions()