ulab-uiuc · 4R5T · Oct 5, 2025
diff --git a/tiny_scientist/utils/llm.py b/tiny_scientist/utils/llm.py
@@ -30,6 +30,7 @@
     "gpt-4o-mini-2024-07-18",
     "gpt-4o-2024-05-13",
     "gpt-4o-2024-08-06",
+    "gpt-5",
     "o1-preview-2024-09-12",
     "o1-mini-2024-09-12",
     "o1-2024-12-17",
@@ -142,6 +143,28 @@ def get_batch_responses_from_llm(
             output_tokens = getattr(response.usage, "completion_tokens", 0)
             if cost_tracker is not None:
                 cost_tracker.add_cost(model, input_tokens, output_tokens, task_name)
+    elif model.startswith("gpt-5"):
+        new_msg_history = msg_history + [{"role": "user", "content": msg}]
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_message},
+                *new_msg_history,
+            ],
+            max_completion_tokens=MAX_NUM_TOKENS,
+            n=n_responses,
+            stop=None,
+            seed=0,
+        )
+        content = [r.message.content for r in response.choices]
+        new_msg_history = [
+            new_msg_history + [{"role": "assistant", "content": c}] for c in content
+        ]
+        if hasattr(response, "usage"):
+            input_tokens = getattr(response.usage, "prompt_tokens", 0)
+            output_tokens = getattr(response.usage, "completion_tokens", 0)
+            if cost_tracker is not None:
+                cost_tracker.add_cost(model, input_tokens, output_tokens, task_name)
     elif any(
         model.startswith(prefix)
         for prefix in ["meta-llama/", "Qwen/", "deepseek-ai/", "mistralai/"]
@@ -282,6 +305,24 @@ def get_response_from_llm(
         if hasattr(response, "usage"):
             input_tokens = getattr(response.usage, "prompt_tokens", 0)
             output_tokens = getattr(response.usage, "completion_tokens", 0)
+    elif model.startswith("gpt-5"):
+        new_msg_history = msg_history + [{"role": "user", "content": msg}]
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_message},
+                *new_msg_history,
+            ],
+            max_completion_tokens=MAX_NUM_TOKENS,
+            n=1,
+            stop=None,
+            seed=0,
+        )
+        content = response.choices[0].message.content
+        new_msg_history = new_msg_history + [{"role": "assistant", "content": content}]
+        if hasattr(response, "usage"):
+            input_tokens = getattr(response.usage, "prompt_tokens", 0)
+            output_tokens = getattr(response.usage, "completion_tokens", 0)
     elif model in ["o1-preview-2024-09-12", "o1-mini-2024-09-12"]:
         new_msg_history = msg_history + [{"role": "user", "content": msg}]
         response = client.chat.completions.create(
@@ -461,20 +502,27 @@ def get_batch_responses_from_llm_with_tools(
         "gpt" in model or model in ["o1-preview-2024-09-12", "o1-mini-2024-09-12"]
     ):
         new_msg_history = msg_history + [{"role": "user", "content": msg}]
+        kwargs = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": system_message},
+                *new_msg_history,
+            ],
+            "tools": tools,
+            "tool_choice": "auto",
+            "n": n_responses,
+            "stop": None,
+            "seed": 0,
+        }
+        if model.startswith("gpt-5"):
+            kwargs["max_completion_tokens"] = MAX_NUM_TOKENS
+        else:
+            kwargs["temperature"] = temperature
+            kwargs["max_tokens"] = MAX_NUM_TOKENS
+
         try:
             response = client.chat.completions.create(  # type: ignore[call-overload]
-                model=model,
-                messages=[
-                    {"role": "system", "content": system_message},
-                    *new_msg_history,
-                ],
-                tools=tools,
-                tool_choice="auto",  # Or specify a tool like {"type": "function", "function": {"name": "my_function"}}
-                temperature=temperature,
-                max_tokens=MAX_NUM_TOKENS,
-                n=n_responses,
-                stop=None,
-                seed=0,  # Seed might not be available for all models or with tool use
+                **kwargs
             )
 
             # Extract token usage for OpenAI

diff --git a/tiny_scientist/utils/pricing.py b/tiny_scientist/utils/pricing.py
@@ -7,6 +7,7 @@
     "gpt-3.5-turbo": (0.5, 1.5),
     "gpt-4o-mini": (0.15, 0.6),
     "gpt-4o": (2.5, 10),
+    "gpt-5": (1.25, 10),
     "o1-preview": (15, 60),
     "o1-mini": (1.1, 4.4),
     "o1": (15, 60),