diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py index c21ecc8888..bfde570044 100755 --- a/.github/run-eval/resolve_model_config.py +++ b/.github/run-eval/resolve_model_config.py @@ -132,6 +132,11 @@ "display_name": "GPT-5.3 Codex", "llm_config": {"model": "litellm_proxy/gpt-5-3-codex"}, }, + "gpt-5.4-codex": { + "id": "gpt-5.4-codex", + "display_name": "GPT-5.4 Codex", + "llm_config": {"model": "litellm_proxy/gpt-5.4-codex"}, + }, "gpt-5.2-high-reasoning": { "id": "gpt-5.2-high-reasoning", "display_name": "GPT-5.2 High Reasoning", diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py b/openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py index da226703a3..a61185fb0f 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py @@ -38,7 +38,13 @@ class ModelPromptSpec(BaseModel): "openai_gpt": ( ( "gpt-5-codex", - ("gpt-5-codex", "gpt-5.1-codex", "gpt-5.2-codex", "gpt-5.3-codex"), + ( + "gpt-5-codex", + "gpt-5.1-codex", + "gpt-5.2-codex", + "gpt-5.3-codex", + "gpt-5.4-codex", + ), ), ("gpt-5", ("gpt-5", "gpt-5.1", "gpt-5.2")), ), diff --git a/tests/github_workflows/test_resolve_model_config.py b/tests/github_workflows/test_resolve_model_config.py index 2ed496df68..e1b0ea0228 100644 --- a/tests/github_workflows/test_resolve_model_config.py +++ b/tests/github_workflows/test_resolve_model_config.py @@ -491,3 +491,12 @@ def test_models_importable_without_litellm(): f"stderr: {result.stderr}" ) assert "SUCCESS" in result.stdout + + +def test_gpt_5_4_codex_config(): + """Test that gpt-5.4-codex has correct configuration.""" + model = MODELS["gpt-5.4-codex"] + + assert model["id"] == "gpt-5.4-codex" + assert model["display_name"] == "GPT-5.4 Codex" + assert model["llm_config"]["model"] == "litellm_proxy/gpt-5.4-codex" diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index 9032c8dd39..297683abdb 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -39,6 +39,7 @@ def test_model_matches(name, pattern, expected): # GPT-5 family ("gpt-5.2", True), ("gpt-5.2-codex", True), + ("gpt-5.4-codex", True), ("gpt-4o", False), ("claude-3-5-sonnet", False), ("gemini-1.5-pro", False),