diff --git a/tests/entrypoints/openai/tool_parsers/common_tests.py b/tests/entrypoints/openai/tool_parsers/common_tests.py new file mode 100644 index 000000000000..c45a19efbe51 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/common_tests.py @@ -0,0 +1,378 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json +from dataclasses import dataclass, field +from types import NoneType +from typing import Any + +import pytest + +from tests.entrypoints.openai.tool_parsers.utils import run_tool_extraction +from vllm.entrypoints.openai.tool_parsers import ToolParserManager +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +@dataclass +class ToolParserTestConfig: + """Configuration for a tool parser's common tests. + + This dataclass contains all the test data and expected results needed + to run the common test suite for a parser. Each parser test file + creates one instance of this config with parser-specific values. + + Attributes: + parser_name: Name used with ToolParserManager (e.g., "mistral") + + Test data (model outputs): + no_tool_calls_output: Plain text without any tool syntax + single_tool_call_output: One tool call with simple arguments + parallel_tool_calls_output: Multiple tool calls in one response + various_data_types_output: Tool with various data types + empty_arguments_output: Tool call with no parameters + surrounding_text_output: Tool call mixed with regular text + escaped_strings_output: Tool call with escaped chars + malformed_input_outputs: List of invalid inputs + + Expected results: + single_tool_call_expected_name: Expected function name + single_tool_call_expected_args: Expected arguments dict + parallel_tool_calls_count: Number of tools in parallel test + parallel_tool_calls_names: Function names in order + single_tool_call_expected_content: Content field when tool called + parallel_tool_calls_expected_content: Content for parallel test + + xfail markers: + xfail_streaming: Mapping test name to xfail reason (streaming only) + xfail_nonstreaming: Mapping test name to xfail reason (non-streaming) + + Special flags: + allow_empty_or_json_empty_args: True if "" or "{}" both valid for empty args + supports_typed_arguments: True if the parser supports typed function arguments + """ + + # Parser identification + parser_name: str + + # Test data - model outputs for each common test + no_tool_calls_output: str + single_tool_call_output: str + parallel_tool_calls_output: str + various_data_types_output: str + empty_arguments_output: str + surrounding_text_output: str + escaped_strings_output: str + malformed_input_outputs: list[str] + + # Expected results for specific tests (optional overrides) + single_tool_call_expected_name: str = "get_weather" + single_tool_call_expected_args: dict[str, Any] = field( + default_factory=lambda: {"city": "Tokyo"} + ) + parallel_tool_calls_count: int = 2 + parallel_tool_calls_names: list[str] = field( + default_factory=lambda: ["get_weather", "get_time"] + ) + + # xfail configuration - maps test name to xfail reason + xfail_streaming: dict[str, str] = field(default_factory=dict) + xfail_nonstreaming: dict[str, str] = field(default_factory=dict) + + # Content expectations (some parsers strip content, others don't) + single_tool_call_expected_content: str | None = None + parallel_tool_calls_expected_content: str | None = None + + # Special assertions for edge cases + allow_empty_or_json_empty_args: bool = True # "{}" or "" for empty args + supports_typed_arguments: bool = True + + +class ToolParserTests: + """Mixin class providing common test suite for tool parsers. + + To use this mixin in a parser test file: + + 1. Create a test_config fixture that returns a ToolParserTestConfig instance + 2. Inherit from this class + 3. Add parser-specific tests as additional methods + + Example: + class TestMistralToolParser(ToolParserTests): + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="mistral", + no_tool_calls_output="Plain text...", + # ... other config ... + ) + + # Parser-specific tests + def test_mistral_specific_feature(self, tool_parser): + # Custom test logic + pass + """ + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + """Override this to provide parser-specific configuration.""" + raise NotImplementedError( + "Subclass must provide test_config fixture returning ToolParserTestConfig" + ) + + @pytest.fixture + def tokenizer(self, default_tokenizer: AnyTokenizer) -> AnyTokenizer: + """Override this to provide parser-specific tokenizer.""" + return default_tokenizer + + @pytest.fixture + def tool_parser(self, test_config: ToolParserTestConfig, tokenizer: AnyTokenizer): + return ToolParserManager.get_tool_parser(test_config.parser_name)(tokenizer) + + @pytest.fixture(params=[True, False]) + def streaming(self, request: pytest.FixtureRequest) -> bool: + return request.param + + def test_no_tool_calls( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser handles plain text without tool syntax.""" + # Apply xfail markers if configured + test_name = "test_no_tool_calls" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, test_config.no_tool_calls_output, streaming=streaming + ) + assert content == test_config.no_tool_calls_output, ( + f"Expected content to match input, got {content}" + ) + assert len(tool_calls) == 0, f"Expected no tool calls, got {len(tool_calls)}" + + def test_single_tool_call_simple_args( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser extracts one tool with simple arguments.""" + # Apply xfail markers if configured + test_name = "test_single_tool_call_simple_args" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, test_config.single_tool_call_output, streaming=streaming + ) + + # Content check (some parsers strip it) + if test_config.single_tool_call_expected_content is not None: + assert content == test_config.single_tool_call_expected_content + + assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}" + assert tool_calls[0].type == "function" + assert tool_calls[0].function.name == test_config.single_tool_call_expected_name + + args = json.loads(tool_calls[0].function.arguments) + for key, value in test_config.single_tool_call_expected_args.items(): + assert args.get(key) == value, ( + f"Expected {key}={value}, got {args.get(key)}" + ) + + def test_parallel_tool_calls( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser handles multiple tools in one response.""" + # Apply xfail markers if configured + test_name = "test_parallel_tool_calls" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, + test_config.parallel_tool_calls_output, + streaming=streaming, + ) + + assert len(tool_calls) == test_config.parallel_tool_calls_count, ( + f"Expected {test_config.parallel_tool_calls_count} " + f"tool calls, got {len(tool_calls)}" + ) + + # Verify tool names match expected + for i, expected_name in enumerate(test_config.parallel_tool_calls_names): + assert tool_calls[i].type == "function" + assert tool_calls[i].function.name == expected_name + + # Verify unique IDs + ids = [tc.id for tc in tool_calls] + assert len(ids) == len(set(ids)), "Tool call IDs should be unique" + + def test_various_data_types( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser handles all JSON types in arguments.""" + # Apply xfail markers if configured + test_name = "test_various_data_types" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, + test_config.various_data_types_output, + streaming=streaming, + ) + assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}" + + args = json.loads(tool_calls[0].function.arguments) + # Verify all expected fields present + required_fields_types = { + "string_field": str, + "int_field": int, + "float_field": float, + "bool_field": bool, + "null_field": NoneType, + "array_field": list, + "object_field": dict, + } + for required_field, expected_type in required_fields_types.items(): + assert required_field in args, ( + f"Expected field '{required_field}' in arguments" + ) + if test_config.supports_typed_arguments: + found_type = type(args[required_field]) + assert found_type is expected_type, ( + f"Expected field '{required_field}' to have type {expected_type}, " + f"got {found_type}" + ) + + def test_empty_arguments( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser handles parameterless tool calls.""" + # Apply xfail markers if configured + test_name = "test_empty_arguments" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, test_config.empty_arguments_output, streaming=streaming + ) + assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}" + + args = tool_calls[0].function.arguments + if test_config.allow_empty_or_json_empty_args: + assert args in ["{}", ""], f"Expected empty args, got {args}" + else: + assert args == "{}", f"Expected {{}}, got {args}" + + def test_surrounding_text( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser extracts tools from mixed content.""" + # Apply xfail markers if configured + test_name = "test_surrounding_text" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, test_config.surrounding_text_output, streaming=streaming + ) + assert len(tool_calls) >= 1, ( + f"Expected at least 1 tool call, got {len(tool_calls)}" + ) + + def test_escaped_strings( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser handles escaped characters in arguments.""" + # Apply xfail markers if configured + test_name = "test_escaped_strings" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + content, tool_calls = run_tool_extraction( + tool_parser, test_config.escaped_strings_output, streaming=streaming + ) + assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}" + + args = json.loads(tool_calls[0].function.arguments) + # At minimum, verify we can parse and have expected fields + # Exact escaping behavior varies by parser + assert len(args) > 0, "Expected some arguments with escaped strings" + + def test_malformed_input( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + streaming: bool, + ): + """Verify parser gracefully handles invalid syntax.""" + # Apply xfail markers if configured + test_name = "test_malformed_input" + self.apply_xfail_mark(request, test_config, test_name, streaming) + + for malformed_input in test_config.malformed_input_outputs: + # Should not raise exception + content, tool_calls = run_tool_extraction( + tool_parser, malformed_input, streaming=streaming + ) + # Parser should handle gracefully (exact behavior varies) + + def test_streaming_reconstruction( + self, + request: pytest.FixtureRequest, + tool_parser: Any, + test_config: ToolParserTestConfig, + ): + """Verify streaming produces same result as non-streaming.""" + test_name = "test_streaming_reconstruction" + self.apply_xfail_mark(request, test_config, test_name, True) + + test_output = test_config.single_tool_call_output + + # Non-streaming result + content_non, tools_non = run_tool_extraction( + tool_parser, test_output, streaming=False + ) + + # Streaming result + content_stream, tools_stream = run_tool_extraction( + tool_parser, test_output, streaming=True + ) + + # Compare results + assert content_non == content_stream, "Content should match between modes" + assert len(tools_non) == len(tools_stream), "Tool count should match" + if len(tools_non) > 0: + assert tools_non[0].function.name == tools_stream[0].function.name + assert tools_non[0].function.arguments == tools_stream[0].function.arguments + + def apply_xfail_mark(self, request, test_config, test_name, streaming): + reason = None + if streaming and test_name in test_config.xfail_streaming: + reason = test_config.xfail_streaming[test_name] + elif not streaming and test_name in test_config.xfail_nonstreaming: + reason = test_config.xfail_nonstreaming[test_name] + if reason is not None: + mark = pytest.mark.xfail(reason=reason, strict=True) + request.node.add_marker(mark) diff --git a/tests/entrypoints/openai/tool_parsers/conftest.py b/tests/entrypoints/openai/tool_parsers/conftest.py index f2ac5e5b9a8f..4a1e7d08c2fa 100644 --- a/tests/entrypoints/openai/tool_parsers/conftest.py +++ b/tests/entrypoints/openai/tool_parsers/conftest.py @@ -7,6 +7,6 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def default_tokenizer() -> AnyTokenizer: return AutoTokenizer.from_pretrained("gpt2") diff --git a/tests/entrypoints/openai/tool_parsers/test_deepseekv3_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_deepseekv3_tool_parser.py new file mode 100644 index 000000000000..c130f446dddf --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_deepseekv3_tool_parser.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer + + +class TestDeepSeekV3ToolParser(ToolParserTests): + @pytest.fixture(scope="class") + def tokenizer(self) -> AnyTokenizer: + return get_tokenizer("deepseek-ai/DeepSeek-V3") + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="deepseek_v3", + # Test data + no_tool_calls_output=( + "How can I help you today? I can check weather for you." + ), + single_tool_call_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather +```json +{"city": "Tokyo", "unit": "celsius"} +```<|tool▁call▁end|><|tool▁calls▁end|>""", + parallel_tool_calls_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather +```json +{"city": "Tokyo", "unit": "celsius"} +```<|tool▁call▁end|><|tool▁call▁begin|>function<|tool▁sep|>search_hotels +```json +{"location": "Tokyo", "check_in": "2025-01-15"} +```<|tool▁call▁end|><|tool▁calls▁end|>""", + various_data_types_output=( + """<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function +```json +""" + """{"string_field": "hello", "int_field": 42, "float_field": 3.14, """ + """"bool_field": true, "null_field": null, """ + """"array_field": ["a", "b", "c"], """ + """"object_field": {"nested": "value"}, """ + """"empty_array": [], "empty_object": {}} +```<|tool▁call▁end|><|tool▁calls▁end|>""" + ), + empty_arguments_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_time +```json +{} +```<|tool▁call▁end|><|tool▁calls▁end|>""", + surrounding_text_output=( + """Let me check the weather for you.""" + """<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather +```json +{"city": "Paris"} +```<|tool▁call▁end|><|tool▁calls▁end|>""" + ), + escaped_strings_output=( + """<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>send_message +```json +""" + """{"text": "He said \\"hello\\"", "path": "C:\\\\Users\\\\file", """ + """"newline": "line1\\nline2"} +```<|tool▁call▁end|><|tool▁calls▁end|>""" + ), + malformed_input_outputs=[ + """<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather +```json +{"city": "Tokyo" +```<|tool▁call▁end|><|tool▁calls▁end|>""", + """<|tool▁calls▁begin|>function<|tool▁sep|>get_weather +```json +{"city": "Tokyo"} +```<|tool▁calls▁end|>""", + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo", "unit": "celsius"}, + single_tool_call_expected_content=None, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "search_hotels"], + # xfail markers + xfail_streaming={}, + xfail_nonstreaming={ + "test_malformed_input": ( + "Parser sets tools_called=True even when tool_calls is " + "empty (detects start token but fails to parse)" + ), + }, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_granite_20b_fc_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_granite_20b_fc_tool_parser.py new file mode 100644 index 000000000000..d25d2ffd2910 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_granite_20b_fc_tool_parser.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) + + +class TestGranite20bFcToolParser(ToolParserTests): + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="granite-20b-fc", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + ' {"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}' + ), + parallel_tool_calls_output=( + ' {"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}\n' + ' {"name": "get_time", ' + '"arguments": {"timezone": "Asia/Tokyo"}}' + ), + various_data_types_output=""" { + "name": "test_function", + "arguments": { + "string_field": "hello", + "int_field": 42, + "float_field": 3.14, + "bool_field": true, + "null_field": null, + "array_field": ["a", "b", "c"], + "object_field": {"nested": "value"}, + "empty_array": [], + "empty_object": {} + } +}""", + empty_arguments_output=( + ' {"name": "refresh", "arguments": {}}' + ), + surrounding_text_output="""Let me check the weather for you. + {"name": "get_weather", "arguments": {"city": "Tokyo"}}""", + escaped_strings_output=""" { + "name": "test_function", + "arguments": { + "quoted": "He said \\"hello\\"", + "path": "C:\\\\Users\\\\file.txt", + "newline": "line1\\nline2", + "unicode": "emoji: 🎉" + } +}""", + malformed_input_outputs=[ + ' {"name": "func", "arguments": {', + ' [{"name": "func", "arguments": {}}]', + '{"name": "func", "arguments": {}}', + ' {"name": 123}', + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + single_tool_call_expected_content=None, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + # xfail markers + xfail_streaming={ + "test_surrounding_text": ( + "Granite 20B FC streaming requires at start" + ), + }, + xfail_nonstreaming={}, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_granite_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_granite_tool_parser.py new file mode 100644 index 000000000000..3ebc03e821b4 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_granite_tool_parser.py @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from tests.entrypoints.openai.tool_parsers.utils import run_tool_extraction + + +class TestGraniteToolParser(ToolParserTests): + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="granite", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + '<|tool_call|> [{"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}]' + ), + parallel_tool_calls_output="""<|tool_call|> [ + {"name": "get_weather", "arguments": {"city": "Tokyo"}}, + {"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}} +]""", + various_data_types_output=""" [{ + "name": "test_function", + "arguments": { + "string_field": "hello", + "int_field": 42, + "float_field": 3.14, + "bool_field": true, + "null_field": null, + "array_field": ["a", "b", "c"], + "object_field": {"nested": "value"}, + "empty_array": [], + "empty_object": {} + } +}]""", + empty_arguments_output=( + '<|tool_call|> [{"name": "refresh", "arguments": {}}]' + ), + surrounding_text_output="""Let me check the weather for you. +<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}] +I'll get that information.""", + escaped_strings_output=""" [{ + "name": "test_function", + "arguments": { + "quoted": "He said \\"hello\\"", + "path": "C:\\\\Users\\\\file.txt", + "newline": "line1\\nline2", + "unicode": "emoji: 🎉" + } +}]""", + malformed_input_outputs=[ + '<|tool_call|> [{"name": "func", "arguments": {', + '<|tool_call|> {"name": "func", "arguments": {}}', # Not an array + '[{"name": "func", "arguments": "not a dict"}]', + 'Some text [{"name": "func"}]', # JSON but not tool call format + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + # Granite strips content when tool calls present + single_tool_call_expected_content=None, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + # xfail markers + xfail_streaming={ + "test_malformed_input": ( + "Streaming mode incorrectly creates tool call from malformed JSON" + ), + "test_surrounding_text": ( + "Parser doesn't handle surrounding text correctly in streaming" + ), + "test_streaming_reconstruction": ( + "Streaming mode doesn't strip <|tool_call|> marker from content" + ), + }, + xfail_nonstreaming={ + "test_surrounding_text": ( + "Parser doesn't handle surrounding text correctly in non-streaming" + ), + }, + ) + + # Granite-Specific Tests + + @pytest.mark.parametrize("streaming", [True, False]) + def test_granite_token_prefix_format(self, tool_parser, streaming): + """Verify parser handles Granite 3.0 <|tool_call|> token format.""" + single_tool_call_token = ( + '<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]' + ) + content, tool_calls = run_tool_extraction( + tool_parser, single_tool_call_token, streaming=streaming + ) + assert len(tool_calls) == 1, ( + f"Expected 1 tool call from token format, got {len(tool_calls)}" + ) + assert tool_calls[0].function.name == "get_weather" + + @pytest.mark.parametrize("streaming", [True, False]) + def test_granite_string_prefix_format(self, tool_parser, streaming): + """Verify parser handles Granite 3.1 string format.""" + single_tool_call_string = ( + ' [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]' + ) + content, tool_calls = run_tool_extraction( + tool_parser, single_tool_call_string, streaming=streaming + ) + assert len(tool_calls) == 1, ( + f"Expected 1 tool call from string format, got {len(tool_calls)}" + ) + assert tool_calls[0].function.name == "get_weather" diff --git a/tests/entrypoints/openai/tool_parsers/test_internlm2_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_internlm2_tool_parser.py new file mode 100644 index 000000000000..3c9a9aaf993e --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_internlm2_tool_parser.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from unittest.mock import MagicMock + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +class TestInternLM2ToolParser(ToolParserTests): + @pytest.fixture + def tokenizer(self, default_tokenizer: AnyTokenizer) -> AnyTokenizer: + """Add some internlm2 specific tokens to the default vocab.""" + + tokenizer_vocab = default_tokenizer.get_vocab() + default_tokenizer.get_vocab = MagicMock() + tokenizer_vocab.update( + { + "<|action_start|>": 92540, + "<|plugin|>": 92541, + "<|action_end|>": 92542, + } + ) + default_tokenizer.get_vocab.return_value = tokenizer_vocab + return default_tokenizer + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="internlm", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + '<|action_start|><|plugin|>{"name": "get_weather", ' + '"parameters": {"city": "Tokyo"}}<|action_end|>' + ), + # InternLM2 doesn't support parallel calls + parallel_tool_calls_output=( + '<|action_start|><|plugin|>{"name": "get_weather", ' + '"parameters": {"city": "Tokyo"}}<|action_end|>' + ), + various_data_types_output="""<|action_start|><|plugin|>{ + "name": "test_function", + "parameters": { + "string_field": "hello", + "int_field": 42, + "float_field": 3.14, + "bool_field": true, + "null_field": null, + "array_field": ["a", "b", "c"], + "object_field": {"nested": "value"}, + "empty_array": [], + "empty_object": {} + } +}<|action_end|>""", + empty_arguments_output=( + '<|action_start|><|plugin|>{"name": "refresh", ' + '"parameters": {}}<|action_end|>' + ), + surrounding_text_output=( + "Let me check the weather for you. " + '<|action_start|><|plugin|>{"name": "get_weather", ' + '"parameters": {"city": "Tokyo"}}<|action_end|>' + ), + escaped_strings_output="""<|action_start|><|plugin|>{ + "name": "test_function", + "parameters": { + "quoted": "He said \\"hello\\"", + "path": "C:\\\\Users\\\\file.txt", + "newline": "line1\\nline2", + "unicode": "emoji: 🎉" + } +}<|action_end|>""", + malformed_input_outputs=[ + '<|action_start|><|plugin|>{"name": "func", "parameters": {', + ( + '<|action_start|><|plugin|>{"name": "func", ' + '"parameters": "not a dict"}<|action_end|>' + ), + "<|action_start|><|plugin|>not json<|action_end|>", + "<|action_start|><|plugin|>", + '<|action_start|>{"name": "func"}', + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + single_tool_call_expected_content=None, + parallel_tool_calls_count=1, # InternLM2 only supports single tool calls + parallel_tool_calls_names=["get_weather"], + # Parser-specific settings + allow_empty_or_json_empty_args=True, + # xfail markers + xfail_streaming={ + "test_single_tool_call_simple_args": ( + "InternLM2 streaming not fully implemented" + ), + "test_parallel_tool_calls": ( + "InternLM2 streaming not fully implemented" + ), + "test_various_data_types": ( + "InternLM2 streaming not fully implemented" + ), + "test_empty_arguments": ("InternLM2 streaming not fully implemented"), + "test_surrounding_text": ("InternLM2 streaming not fully implemented"), + "test_escaped_strings": ("InternLM2 streaming not fully implemented"), + "test_streaming_reconstruction": ( + "InternLM2 streaming parser returns '<|action_start|' as " + "content instead of None - streaming/non-streaming inconsistency" + ), + }, + xfail_nonstreaming={ + "test_malformed_input": ( + "InternLM2 parser raises JSONDecodeError on malformed JSON " + "instead of gracefully handling it" + ), + }, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_longcat_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_longcat_tool_parser.py new file mode 100644 index 000000000000..f934c7f6c69b --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_longcat_tool_parser.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from unittest.mock import MagicMock + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +class TestLongCatToolParser(ToolParserTests): + @pytest.fixture + def tokenizer(self, default_tokenizer: AnyTokenizer) -> AnyTokenizer: + """Add some longcat specific tokens to the default vocab.""" + tokenizer = default_tokenizer + tokenizer_vocab = tokenizer.get_vocab() + tokenizer.get_vocab = MagicMock() + tokenizer_vocab.update( + { + "": 32000, + "": 32001, + } + ) + tokenizer.get_vocab.return_value = tokenizer_vocab + return tokenizer + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="longcat", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + '{"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}' + ), + parallel_tool_calls_output=( + '{"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}\n' + '{"name": "get_time", ' + '"arguments": {"timezone": "Asia/Tokyo"}}' + ), + various_data_types_output="""{ + "name": "test_function", + "arguments": { + "string_field": "hello", + "int_field": 42, + "float_field": 3.14, + "bool_field": true, + "null_field": null, + "array_field": ["a", "b", "c"], + "object_field": {"nested": "value"}, + "empty_array": [], + "empty_object": {} + } +}""", + empty_arguments_output=( + '{"name": "refresh", "arguments": {}}' + "" + ), + surrounding_text_output=( + "Let me check the weather for you.\n" + '{"name": "get_weather", ' + '"arguments": {"city": "Tokyo"}}\n' + "Here is the result." + ), + escaped_strings_output="""{ + "name": "test_function", + "arguments": { + "quoted": "He said \\"hello\\"", + "path": "C:\\\\Users\\\\file.txt", + "newline": "line1\\nline2", + "unicode": "emoji: 🎉" + } +}""", + malformed_input_outputs=[ + '{"name": "func", "arguments": {', + ( + '{"name": "func", ' + '"arguments": "not a dict"}' + ), + "Some text with invalid json", + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + single_tool_call_expected_content=None, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + # xfail markers + xfail_streaming={ + "test_malformed_input": "Streaming has complex buffering behavior", + }, + xfail_nonstreaming={}, + # Configuration + allow_empty_or_json_empty_args=True, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_phi4mini_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_phi4mini_tool_parser.py new file mode 100644 index 000000000000..85dfe2ffc5a4 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_phi4mini_tool_parser.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from unittest.mock import MagicMock + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +class TestPhi4MiniToolParser(ToolParserTests): + @pytest.fixture + def tokenizer(self, default_tokenizer: AnyTokenizer) -> AnyTokenizer: + """Add some phi4mini specific tokens to the default vocab.""" + + tokenizer = default_tokenizer + tokenizer_vocab = tokenizer.get_vocab() + tokenizer.get_vocab = MagicMock() + tokenizer_vocab.update( + { + "functools": 32000, + } + ) + tokenizer.get_vocab.return_value = tokenizer_vocab + return tokenizer + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="phi4_mini_json", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + 'functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]' + ), + parallel_tool_calls_output="""functools[ + {"name": "get_weather", "arguments": {"city": "Tokyo"}}, + {"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}} +]""", + various_data_types_output="""functools[{ + "name": "test_function", + "arguments": { + "string_field": "hello", + "int_field": 42, + "float_field": 3.14, + "bool_field": true, + "null_field": null, + "array_field": ["a", "b", "c"], + "object_field": {"nested": "value"}, + "empty_array": [], + "empty_object": {} + } +}]""", + empty_arguments_output='functools[{"name": "refresh", "arguments": {}}]', + surrounding_text_output="""Let me check the weather for you. +functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}] +Would you like to know more?""", + escaped_strings_output="""functools[{ + "name": "test_function", + "arguments": { + "quoted": "He said \\"hello\\"", + "path": "C:\\\\Users\\\\file.txt", + "newline": "line1\\nline2", + "unicode": "emoji: 🎉" + } +}]""", + malformed_input_outputs=[ + 'functools[{"name": "func", "arguments": {', + 'functools[{"name": "func", "arguments": "not a dict"}]', + 'functools{"name": "func"}', # Missing brackets + 'functools[{"name": "func"}]', # Missing arguments/parameters + "functools[] This is just text", # Empty functools + "functools[ This is just text ]", # functools with invalid JSON + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + # Phi-4 Mini strips content when tool calls present + single_tool_call_expected_content=None, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + parallel_tool_calls_expected_content=None, + # xfail markers + xfail_streaming={ + "test_no_tool_calls": "Phi4 Mini streaming not implemented", + "test_single_tool_call_simple_args": ( + "Phi4 Mini streaming not implemented" + ), + "test_parallel_tool_calls": "Phi4 Mini streaming not implemented", + "test_various_data_types": "Phi4 Mini streaming not implemented", + "test_empty_arguments": "Phi4 Mini streaming not implemented", + "test_surrounding_text": "Phi4 Mini streaming not implemented", + "test_escaped_strings": "Phi4 Mini streaming not implemented", + "test_streaming_reconstruction": "Phi4 Mini streaming not implemented", + }, + xfail_nonstreaming={ + "test_various_data_types": ( + "Phi4MiniJsonToolParser regex has nesting limitations " + "with nested objects" + ), + "test_malformed_input": ( + "Phi4MiniJsonToolParser incorrectly sets " + "tools_called=True on empty array" + ), + }, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_qwen3xml_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_qwen3xml_tool_parser.py new file mode 100644 index 000000000000..7efa222a2031 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_qwen3xml_tool_parser.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) + + +class TestQwen3xmlToolParser(ToolParserTests): + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="qwen3_xml", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output="\n\nTokyo\n\n", + parallel_tool_calls_output="\n\nTokyo\n\n\n\nAsia/Tokyo\n\n", + various_data_types_output=( + "\n\n" + "hello\n" + "42\n" + "3.14\n" + "true\n" + "null\n" + '["a", "b", "c"]\n' + '{"nested": "value"}\n' + "\n" + ), + empty_arguments_output="\n\n\n", + surrounding_text_output=( + "Let me check the weather for you.\n\n" + "\n\n" + "Tokyo\n" + "\n\n\n" + "I will get that information." + ), + escaped_strings_output=( + "\n\n" + 'He said "hello"\n' + "C:\\Users\\file.txt\n" + "line1\nline2\n" + "\n" + ), + malformed_input_outputs=[ + "", + "", + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + # xfail markers - Qwen3XML has systematic streaming issues + xfail_streaming={ + "test_single_tool_call_simple_args": ( + "Qwen3XML streaming has systematic issues" + ), + "test_parallel_tool_calls": "Qwen3XML streaming has systematic issues", + "test_various_data_types": "Qwen3XML streaming has systematic issues", + "test_empty_arguments": "Qwen3XML streaming has systematic issues", + "test_surrounding_text": "Qwen3XML streaming has systematic issues", + "test_escaped_strings": "Qwen3XML streaming has systematic issues", + "test_malformed_input": ( + "Qwen3XML parser is lenient with malformed input" + ), + "test_streaming_reconstruction": ( + "Qwen3XML streaming reconstruction has known issues" + ), + }, + supports_typed_arguments=False, + ) diff --git a/tests/entrypoints/openai/tool_parsers/test_step3_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_step3_tool_parser.py new file mode 100644 index 000000000000..4a77891ac8ad --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_step3_tool_parser.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +import pytest + +from tests.entrypoints.openai.tool_parsers.common_tests import ( + ToolParserTestConfig, + ToolParserTests, +) +from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer + + +class TestStep3ToolParser(ToolParserTests): + @pytest.fixture(scope="class") + def tokenizer(self) -> AnyTokenizer: + return get_tokenizer("stepfun-ai/step3") + + @pytest.fixture + def test_config(self) -> ToolParserTestConfig: + return ToolParserTestConfig( + parser_name="step3", + # Test data + no_tool_calls_output="This is a regular response without any tool calls.", + single_tool_call_output=( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + 'Tokyo' + "<|tool_call_end|><|tool_calls_end|>" + ), + parallel_tool_calls_output=( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + 'Tokyo' + "<|tool_call_end|><|tool_sep|>" + '<|tool_call_begin|>' + 'Asia/Tokyo' + "<|tool_call_end|><|tool_calls_end|>" + ), + various_data_types_output=( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + 'hello' + '42' + '3.14' + 'true' + 'null' + '' + '["a", "b", "c"]' + '' + '{"nested": "value"}' + "<|tool_call_end|><|tool_calls_end|>" + ), + empty_arguments_output=( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + "<|tool_call_end|><|tool_calls_end|>" + ), + surrounding_text_output=( + "Let me check the weather for you.\n\n" + "<|tool_calls_begin|><|tool_call_begin|>" + '' + 'Tokyo' + "<|tool_call_end|><|tool_calls_end|>\n\n" + "I'll get that information." + ), + escaped_strings_output=( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + 'He said "hello"' + 'C:\\Users\\file.txt' + 'line1\nline2' + "<|tool_call_end|><|tool_calls_end|>" + ), + malformed_input_outputs=[ + ( + "<|tool_calls_begin|><|tool_call_begin|>" + '' + ), + ( + '<|tool_call_begin|>' + "<|tool_call_end|>" + ), + ], + # Expected results + single_tool_call_expected_name="get_weather", + single_tool_call_expected_args={"city": "Tokyo"}, + parallel_tool_calls_count=2, + parallel_tool_calls_names=["get_weather", "get_time"], + # xfail markers + xfail_nonstreaming={ + "test_single_tool_call_simple_args": ( + "Step3 parser non-streaming has bugs" + ), + "test_parallel_tool_calls": ("Step3 parser non-streaming has bugs"), + "test_various_data_types": "Step3 parser non-streaming has bugs", + "test_empty_arguments": "Step3 parser non-streaming has bugs", + "test_surrounding_text": "Step3 parser non-streaming has bugs", + "test_escaped_strings": "Step3 parser non-streaming has bugs", + }, + xfail_streaming={ + "test_parallel_tool_calls": ( + "Step3 parser has significant bugs in both streaming " + "and non-streaming" + ), + "test_streaming_reconstruction": ( + "Step3 parser non-streaming has bugs, so streaming " + "doesn't match non-streaming" + ), + }, + supports_typed_arguments=False, + )