From ac48ccaf8b201bf02f6ec2e652fc7d46f4d4a9df Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:43:54 +0100 Subject: [PATCH 1/4] Cleanup entrypoint test organisation Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 36 ++++++++++--------- tests/entrypoints/llm/test_collective_rpc.py | 4 +-- .../{ => openai}/test_chat_utils.py | 4 +-- .../{openai/tool_parsers => unit}/__init__.py | 0 .../test_api_server_process_manager.py | 0 tests/entrypoints/{ => unit}/test_context.py | 0 .../{ => unit}/test_harmony_utils.py | 0 tests/entrypoints/{ => unit}/test_renderer.py | 0 .../{ => unit}/test_ssl_cert_refresher.py | 0 .../entrypoints/unit/tool_parsers/__init__.py | 0 .../{openai => unit}/tool_parsers/conftest.py | 0 .../tool_parsers/test_hermes_tool_parser.py | 0 .../test_hunyuan_a13b_tool_parser.py | 0 .../test_llama3_json_tool_parser.py | 0 .../test_llama4_pythonic_tool_parser.py | 0 .../tool_parsers/test_olmo3_tool_parser.py | 0 .../tool_parsers/test_pythonic_tool_parser.py | 0 .../{openai => unit}/tool_parsers/utils.py | 0 18 files changed, 24 insertions(+), 20 deletions(-) rename tests/entrypoints/{ => openai}/test_chat_utils.py (99%) rename tests/entrypoints/{openai/tool_parsers => unit}/__init__.py (100%) rename tests/entrypoints/{ => unit}/test_api_server_process_manager.py (100%) rename tests/entrypoints/{ => unit}/test_context.py (100%) rename tests/entrypoints/{ => unit}/test_harmony_utils.py (100%) rename tests/entrypoints/{ => unit}/test_renderer.py (100%) rename tests/entrypoints/{ => unit}/test_ssl_cert_refresher.py (100%) create mode 100644 tests/entrypoints/unit/tool_parsers/__init__.py rename tests/entrypoints/{openai => unit}/tool_parsers/conftest.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_hermes_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_hunyuan_a13b_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_llama3_json_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_llama4_pythonic_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_olmo3_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/test_pythonic_tool_parser.py (100%) rename tests/entrypoints/{openai => unit}/tool_parsers/utils.py (100%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index e166f320f9c3..6d5fb045d8f3 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -89,14 +89,10 @@ steps: torch_nightly: true source_file_dependencies: - vllm/ - - tests/basic_correctness/test_basic_correctness - - tests/basic_correctness/test_cpu_offload - - tests/basic_correctness/test_cumem.py + - tests/basic_correctness/ commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s basic_correctness/test_cumem.py - - pytest -v -s basic_correctness/test_basic_correctness.py - - pytest -v -s basic_correctness/test_cpu_offload.py + - pytest -v -s basic_correctness - label: Entrypoints Unit Tests # 5min timeout_in_minutes: 10 @@ -104,10 +100,9 @@ steps: fast_check: true source_file_dependencies: - vllm/entrypoints - - tests/entrypoints/ + - tests/entrypoints/unit commands: - - pytest -v -s entrypoints/openai/tool_parsers - - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + - pytest -v -s entrypoints/unit - label: Entrypoints Integration Test (LLM) # 30min timeout_in_minutes: 40 @@ -121,12 +116,12 @@ steps: - tests/entrypoints/offline_mode commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py + - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests -- label: Entrypoints Integration Test (API Server) # 100min - timeout_in_minutes: 130 +- label: Entrypoints Integration Test (API Server) %N # 50min each + timeout_in_minutes: 65 mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" fast_check: true @@ -134,12 +129,21 @@ steps: source_file_dependencies: - vllm/ - tests/entrypoints/openai - - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/openai/test_collective_rpc.py # PYTHONPATH is needed to import custom Worker extension - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/ - - pytest -v -s entrypoints/test_chat_utils.py + # PYTHONPATH is needed to import custom Worker extension + - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/openai/test_collective_rpc.py \ + --shard-id=$$BUILDKITE_PARALLEL_JOB \ + --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + - pytest -v -s entrypoints/openai \ + --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py \ + --ignore=entrypoints/openai/test_oot_registration.py \ + --ignore=entrypoints/openai/test_tensorizer_entrypoint.py \ + --ignore=entrypoints/openai/correctness/ \ + --ignore=entrypoints/openai/test_collective_rpc.py \ + --shard-id=$$BUILDKITE_PARALLEL_JOB \ + --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + parallelism: 2 - label: Entrypoints Integration Test (Pooling) timeout_in_minutes: 50 diff --git a/tests/entrypoints/llm/test_collective_rpc.py b/tests/entrypoints/llm/test_collective_rpc.py index 747676ac9567..6329542cc443 100644 --- a/tests/entrypoints/llm/test_collective_rpc.py +++ b/tests/entrypoints/llm/test_collective_rpc.py @@ -8,13 +8,13 @@ from ...utils import create_new_process_for_each_test +pytestmark = pytest.mark.multi_gpu_test(num_gpus=2) + @pytest.mark.parametrize("tp_size", [1, 2]) @pytest.mark.parametrize("backend", ["mp", "ray"]) @create_new_process_for_each_test() def test_collective_rpc(tp_size, backend, monkeypatch): - if torch.cuda.device_count() < tp_size: - pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}") if tp_size == 1 and backend == "ray": pytest.skip("Skip duplicate test case") if tp_size == 1: diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/openai/test_chat_utils.py similarity index 99% rename from tests/entrypoints/test_chat_utils.py rename to tests/entrypoints/openai/test_chat_utils.py index ca87b3e76b3f..b48fd21eb132 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/openai/test_chat_utils.py @@ -31,8 +31,8 @@ from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer -from ..models.registry import HF_EXAMPLE_MODELS -from ..utils import VLLM_PATH +from ...models.registry import HF_EXAMPLE_MODELS +from ...utils import VLLM_PATH EXAMPLES_DIR = VLLM_PATH / "examples" diff --git a/tests/entrypoints/openai/tool_parsers/__init__.py b/tests/entrypoints/unit/__init__.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/__init__.py rename to tests/entrypoints/unit/__init__.py diff --git a/tests/entrypoints/test_api_server_process_manager.py b/tests/entrypoints/unit/test_api_server_process_manager.py similarity index 100% rename from tests/entrypoints/test_api_server_process_manager.py rename to tests/entrypoints/unit/test_api_server_process_manager.py diff --git a/tests/entrypoints/test_context.py b/tests/entrypoints/unit/test_context.py similarity index 100% rename from tests/entrypoints/test_context.py rename to tests/entrypoints/unit/test_context.py diff --git a/tests/entrypoints/test_harmony_utils.py b/tests/entrypoints/unit/test_harmony_utils.py similarity index 100% rename from tests/entrypoints/test_harmony_utils.py rename to tests/entrypoints/unit/test_harmony_utils.py diff --git a/tests/entrypoints/test_renderer.py b/tests/entrypoints/unit/test_renderer.py similarity index 100% rename from tests/entrypoints/test_renderer.py rename to tests/entrypoints/unit/test_renderer.py diff --git a/tests/entrypoints/test_ssl_cert_refresher.py b/tests/entrypoints/unit/test_ssl_cert_refresher.py similarity index 100% rename from tests/entrypoints/test_ssl_cert_refresher.py rename to tests/entrypoints/unit/test_ssl_cert_refresher.py diff --git a/tests/entrypoints/unit/tool_parsers/__init__.py b/tests/entrypoints/unit/tool_parsers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/entrypoints/openai/tool_parsers/conftest.py b/tests/entrypoints/unit/tool_parsers/conftest.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/conftest.py rename to tests/entrypoints/unit/tool_parsers/conftest.py diff --git a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_hunyuan_a13b_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_hunyuan_a13b_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_llama3_json_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_llama3_json_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_llama3_json_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_llama3_json_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_llama4_pythonic_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_llama4_pythonic_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_olmo3_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_olmo3_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_pythonic_tool_parser.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py rename to tests/entrypoints/unit/tool_parsers/test_pythonic_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/utils.py b/tests/entrypoints/unit/tool_parsers/utils.py similarity index 100% rename from tests/entrypoints/openai/tool_parsers/utils.py rename to tests/entrypoints/unit/tool_parsers/utils.py From 04e0269c5e119b097b9493d2141a837e4c5aa955 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:47:10 +0100 Subject: [PATCH 2/4] Remove unused import Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/entrypoints/llm/test_collective_rpc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/entrypoints/llm/test_collective_rpc.py b/tests/entrypoints/llm/test_collective_rpc.py index 6329542cc443..d9bf99cb8d35 100644 --- a/tests/entrypoints/llm/test_collective_rpc.py +++ b/tests/entrypoints/llm/test_collective_rpc.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -import torch from vllm import LLM From fe584b35a5c9d25a80214d73c12b90e3bce7b9f9 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:27:30 +0100 Subject: [PATCH 3/4] Move the non unit tests to `tool_use` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../tool_parsers/test_hermes_tool_parser.py | 261 ----------------- tests/tool_use/test_hermes_tool_parser.py | 265 ++++++++++++++++++ 2 files changed, 265 insertions(+), 261 deletions(-) create mode 100644 tests/tool_use/test_hermes_tool_parser.py diff --git a/tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py b/tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py index 38008dafe32b..14c9db4adbae 100644 --- a/tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py +++ b/tests/entrypoints/unit/tool_parsers/test_hermes_tool_parser.py @@ -1,273 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import json - import pytest from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import Hermes2ProToolParser from vllm.transformers_utils.tokenizer import AnyTokenizer -from ....utils import RemoteOpenAIServer - -MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -LORA_MODEL = "minpeter/LoRA-Llama-3.2-1B-tool-vllm-ci" - -SERVER_ARGS = [ - "--enforce-eager", - "--enable-auto-tool-choice", - "--tool-call-parser", - "hermes", - "--enable-lora", - "--lora-modules", - f"{LORA_MODEL}={LORA_MODEL}", - "--tokenizer", - f"{LORA_MODEL}", -] - -TOOLS = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - }, - }, - "required": ["location"], - }, - }, - } -] - -PRODUCT_TOOLS = [ - { - "type": "function", - "function": { - "name": "get_product_info", - "description": "Get detailed information of a product based on its " - "product ID.", - "parameters": { - "type": "object", - "properties": { - "inserted": { - "type": "boolean", - "description": "inserted.", - }, - "product_id": { - "type": "integer", - "description": "The product ID of the product.", - }, - }, - "required": ["product_id", "inserted"], - }, - }, - } -] - -MESSAGES = [{"role": "user", "content": "What's the weather like in Boston?"}] - -PRODUCT_MESSAGES = [ - { - "role": "user", - "content": "Hi! Do you have any detailed information about the product id " - "7355608 and inserted true?", - } -] - - -@pytest.mark.asyncio -async def test_non_streaming_tool_call(): - """Test tool call in non-streaming mode.""" - with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: - client = server.get_async_client() - - response = await client.chat.completions.create( - model=LORA_MODEL, - messages=MESSAGES, - tools=TOOLS, - tool_choice="auto", - temperature=0.0, - ) - - assert response.choices - choice = response.choices[0] - message = choice.message - - assert choice.finish_reason == "tool_calls" - assert message.tool_calls is not None - - tool_call = message.tool_calls[0] - assert tool_call.type == "function" - assert tool_call.function.name == "get_current_weather" - - arguments = json.loads(tool_call.function.arguments) - assert "location" in arguments - assert "Boston" in arguments["location"] - print("\n[Non-Streaming Test Passed]") - print(f"Tool Call: {tool_call.function.name}") - print(f"Arguments: {arguments}") - - -@pytest.mark.asyncio -async def test_streaming_tool_call(): - """Test tool call in streaming mode.""" - with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: - client = server.get_async_client() - - stream = await client.chat.completions.create( - model=LORA_MODEL, - messages=MESSAGES, - tools=TOOLS, - tool_choice="auto", - temperature=0.0, - stream=True, - ) - - tool_call_chunks = {} - async for chunk in stream: - if not chunk.choices: - continue - - delta = chunk.choices[0].delta - if not delta or not delta.tool_calls: - continue - - for tool_chunk in delta.tool_calls: - index = tool_chunk.index - if index not in tool_call_chunks: - tool_call_chunks[index] = {"name": "", "arguments": ""} - - if tool_chunk.function.name: - tool_call_chunks[index]["name"] += tool_chunk.function.name - if tool_chunk.function.arguments: - tool_call_chunks[index]["arguments"] += ( - tool_chunk.function.arguments - ) - - assert len(tool_call_chunks) == 1 - reconstructed_tool_call = tool_call_chunks[0] - - assert reconstructed_tool_call["name"] == "get_current_weather" - - arguments = json.loads(reconstructed_tool_call["arguments"]) - assert "location" in arguments - assert "Boston" in arguments["location"] - print("\n[Streaming Test Passed]") - print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") - print(f"Reconstructed Arguments: {arguments}") - - -@pytest.mark.asyncio -async def test_non_streaming_product_tool_call(): - """Test tool call integer and boolean parameters in non-streaming mode.""" - with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: - client = server.get_async_client() - - response = await client.chat.completions.create( - model=LORA_MODEL, - messages=PRODUCT_MESSAGES, - tools=PRODUCT_TOOLS, - tool_choice="auto", - temperature=0.66, - ) - - assert response.choices - choice = response.choices[0] - message = choice.message - - assert choice.finish_reason == "tool_calls" - assert message.tool_calls is not None - - tool_call = message.tool_calls[0] - assert tool_call.type == "function" - assert tool_call.function.name == "get_product_info" - - arguments = json.loads(tool_call.function.arguments) - assert "product_id" in arguments - assert "inserted" in arguments - - product_id = arguments.get("product_id") - inserted = arguments.get("inserted") - - assert isinstance(product_id, int) - assert product_id == 7355608 - assert isinstance(inserted, bool) - assert inserted is True - - print("\n[Non-Streaming Product Test Passed]") - print(f"Tool Call: {tool_call.function.name}") - print(f"Arguments: {arguments}") - - -@pytest.mark.asyncio -async def test_streaming_product_tool_call(): - """Test tool call integer and boolean parameters in streaming mode.""" - with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: - client = server.get_async_client() - - stream = await client.chat.completions.create( - model=LORA_MODEL, - messages=PRODUCT_MESSAGES, - tools=PRODUCT_TOOLS, - tool_choice="auto", - temperature=0.66, - stream=True, - ) - - tool_call_chunks = {} - async for chunk in stream: - if not chunk.choices: - continue - - delta = chunk.choices[0].delta - if not delta or not delta.tool_calls: - continue - - for tool_chunk in delta.tool_calls: - index = tool_chunk.index - if index not in tool_call_chunks: - tool_call_chunks[index] = {"name": "", "arguments": ""} - - if tool_chunk.function.name: - tool_call_chunks[index]["name"] += tool_chunk.function.name - if tool_chunk.function.arguments: - tool_call_chunks[index]["arguments"] += ( - tool_chunk.function.arguments - ) - - assert len(tool_call_chunks) == 1 - reconstructed_tool_call = tool_call_chunks[0] - - assert reconstructed_tool_call["name"] == "get_product_info" - - arguments = json.loads(reconstructed_tool_call["arguments"]) - assert "product_id" in arguments - assert "inserted" in arguments - - # Handle type coercion for streaming test as well - product_id = arguments.get("product_id") - inserted = arguments.get("inserted") - - assert isinstance(product_id, int) - assert product_id == 7355608 - assert isinstance(inserted, bool) - assert inserted is True - - print("\n[Streaming Product Test Passed]") - print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") - print(f"Reconstructed Arguments: {arguments}") - @pytest.fixture def qwen_tokenizer() -> AnyTokenizer: diff --git a/tests/tool_use/test_hermes_tool_parser.py b/tests/tool_use/test_hermes_tool_parser.py new file mode 100644 index 000000000000..af103c7d06f6 --- /dev/null +++ b/tests/tool_use/test_hermes_tool_parser.py @@ -0,0 +1,265 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +import pytest + +from .utils import RemoteOpenAIServer + +MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +LORA_MODEL = "minpeter/LoRA-Llama-3.2-1B-tool-vllm-ci" + +SERVER_ARGS = [ + "--enforce-eager", + "--enable-auto-tool-choice", + "--tool-call-parser", + "hermes", + "--enable-lora", + "--lora-modules", + f"{LORA_MODEL}={LORA_MODEL}", + "--tokenizer", + f"{LORA_MODEL}", +] + +TOOLS = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } +] + +PRODUCT_TOOLS = [ + { + "type": "function", + "function": { + "name": "get_product_info", + "description": "Get detailed information of a product based on its " + "product ID.", + "parameters": { + "type": "object", + "properties": { + "inserted": { + "type": "boolean", + "description": "inserted.", + }, + "product_id": { + "type": "integer", + "description": "The product ID of the product.", + }, + }, + "required": ["product_id", "inserted"], + }, + }, + } +] + +MESSAGES = [{"role": "user", "content": "What's the weather like in Boston?"}] + +PRODUCT_MESSAGES = [ + { + "role": "user", + "content": "Hi! Do you have any detailed information about the product id " + "7355608 and inserted true?", + } +] + + +@pytest.mark.asyncio +async def test_non_streaming_tool_call(): + """Test tool call in non-streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + response = await client.chat.completions.create( + model=LORA_MODEL, + messages=MESSAGES, + tools=TOOLS, + tool_choice="auto", + temperature=0.0, + ) + + assert response.choices + choice = response.choices[0] + message = choice.message + + assert choice.finish_reason == "tool_calls" + assert message.tool_calls is not None + + tool_call = message.tool_calls[0] + assert tool_call.type == "function" + assert tool_call.function.name == "get_current_weather" + + arguments = json.loads(tool_call.function.arguments) + assert "location" in arguments + assert "Boston" in arguments["location"] + print("\n[Non-Streaming Test Passed]") + print(f"Tool Call: {tool_call.function.name}") + print(f"Arguments: {arguments}") + + +@pytest.mark.asyncio +async def test_streaming_tool_call(): + """Test tool call in streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + stream = await client.chat.completions.create( + model=LORA_MODEL, + messages=MESSAGES, + tools=TOOLS, + tool_choice="auto", + temperature=0.0, + stream=True, + ) + + tool_call_chunks = {} + async for chunk in stream: + if not chunk.choices: + continue + + delta = chunk.choices[0].delta + if not delta or not delta.tool_calls: + continue + + for tool_chunk in delta.tool_calls: + index = tool_chunk.index + if index not in tool_call_chunks: + tool_call_chunks[index] = {"name": "", "arguments": ""} + + if tool_chunk.function.name: + tool_call_chunks[index]["name"] += tool_chunk.function.name + if tool_chunk.function.arguments: + tool_call_chunks[index]["arguments"] += ( + tool_chunk.function.arguments + ) + + assert len(tool_call_chunks) == 1 + reconstructed_tool_call = tool_call_chunks[0] + + assert reconstructed_tool_call["name"] == "get_current_weather" + + arguments = json.loads(reconstructed_tool_call["arguments"]) + assert "location" in arguments + assert "Boston" in arguments["location"] + print("\n[Streaming Test Passed]") + print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") + print(f"Reconstructed Arguments: {arguments}") + + +@pytest.mark.asyncio +async def test_non_streaming_product_tool_call(): + """Test tool call integer and boolean parameters in non-streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + response = await client.chat.completions.create( + model=LORA_MODEL, + messages=PRODUCT_MESSAGES, + tools=PRODUCT_TOOLS, + tool_choice="auto", + temperature=0.66, + ) + + assert response.choices + choice = response.choices[0] + message = choice.message + + assert choice.finish_reason == "tool_calls" + assert message.tool_calls is not None + + tool_call = message.tool_calls[0] + assert tool_call.type == "function" + assert tool_call.function.name == "get_product_info" + + arguments = json.loads(tool_call.function.arguments) + assert "product_id" in arguments + assert "inserted" in arguments + + product_id = arguments.get("product_id") + inserted = arguments.get("inserted") + + assert isinstance(product_id, int) + assert product_id == 7355608 + assert isinstance(inserted, bool) + assert inserted is True + + print("\n[Non-Streaming Product Test Passed]") + print(f"Tool Call: {tool_call.function.name}") + print(f"Arguments: {arguments}") + + +@pytest.mark.asyncio +async def test_streaming_product_tool_call(): + """Test tool call integer and boolean parameters in streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + stream = await client.chat.completions.create( + model=LORA_MODEL, + messages=PRODUCT_MESSAGES, + tools=PRODUCT_TOOLS, + tool_choice="auto", + temperature=0.66, + stream=True, + ) + + tool_call_chunks = {} + async for chunk in stream: + if not chunk.choices: + continue + + delta = chunk.choices[0].delta + if not delta or not delta.tool_calls: + continue + + for tool_chunk in delta.tool_calls: + index = tool_chunk.index + if index not in tool_call_chunks: + tool_call_chunks[index] = {"name": "", "arguments": ""} + + if tool_chunk.function.name: + tool_call_chunks[index]["name"] += tool_chunk.function.name + if tool_chunk.function.arguments: + tool_call_chunks[index]["arguments"] += ( + tool_chunk.function.arguments + ) + + assert len(tool_call_chunks) == 1 + reconstructed_tool_call = tool_call_chunks[0] + + assert reconstructed_tool_call["name"] == "get_product_info" + + arguments = json.loads(reconstructed_tool_call["arguments"]) + assert "product_id" in arguments + assert "inserted" in arguments + + # Handle type coercion for streaming test as well + product_id = arguments.get("product_id") + inserted = arguments.get("inserted") + + assert isinstance(product_id, int) + assert product_id == 7355608 + assert isinstance(inserted, bool) + assert inserted is True + + print("\n[Streaming Product Test Passed]") + print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") + print(f"Reconstructed Arguments: {arguments}") From 183ff2eda1ff274a21580d767f3e40e39be91407 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:42:50 +0100 Subject: [PATCH 4/4] import from the correct utils Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/tool_use/test_hermes_tool_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tool_use/test_hermes_tool_parser.py b/tests/tool_use/test_hermes_tool_parser.py index af103c7d06f6..e396ab5d8dbb 100644 --- a/tests/tool_use/test_hermes_tool_parser.py +++ b/tests/tool_use/test_hermes_tool_parser.py @@ -5,7 +5,7 @@ import pytest -from .utils import RemoteOpenAIServer +from ..utils import RemoteOpenAIServer MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" LORA_MODEL = "minpeter/LoRA-Llama-3.2-1B-tool-vllm-ci"