From 32bfefa9053c311b59e9ba6cdf82884f1f19a180 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 17 Mar 2026 14:26:31 +0800 Subject: [PATCH 1/3] [Bugfix][ResponsesAPI] Fix crash when tool_choice=required exceeds max_output_tokens Signed-off-by: chaunceyjiang --- .../openai/responses/test_function_call.py | 27 +++++++++++++++++++ vllm/parser/abstract_parser.py | 23 +++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py index 0b8a2e6499d3..ed580e4bd9fa 100644 --- a/tests/entrypoints/openai/responses/test_function_call.py +++ b/tests/entrypoints/openai/responses/test_function_call.py @@ -134,6 +134,33 @@ async def test_function_tool_use( assert reasoning.type == "reasoning" +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_max_tokens_with_tool_choice_required( + client: openai.AsyncOpenAI, model_name: str +): + prompt = [ + { + "role": "user", + "content": "Can you tell me what the current weather is in Berlin and the " + "forecast for the next 5 days, in fahrenheit?", + }, + ] + response = await client.responses.create( + model=model_name, + input=prompt, + tools=tools, + tool_choice="required", + max_output_tokens=1, + ) + assert len(response.output) >= 1 + for out in response.output: + # When `tool_choice="required"` and the tokens of `tools` + # exceed `max_output_tokens`,`function_call` should be empty. + # This behavior should be consistent with OpenAI + assert out.type != "function_call" + + @pytest.mark.asyncio async def test_named_tool_use(client: openai.AsyncOpenAI): def get_weather(latitude: float, longitude: float) -> str: diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py index aa145bab2121..0c1dda17b6a3 100644 --- a/vllm/parser/abstract_parser.py +++ b/vllm/parser/abstract_parser.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import contextlib import json from abc import abstractmethod from collections.abc import Sequence @@ -18,7 +19,7 @@ from openai.types.responses.response_reasoning_item import ( Content as ResponseReasoningTextContent, ) -from pydantic import TypeAdapter +from pydantic import TypeAdapter, ValidationError from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.openai.chat_completion.protocol import ( @@ -422,15 +423,19 @@ def _parse_tool_calls( if request.tool_choice == "required": # Required tool calls - parse JSON - assert content is not None - tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) - function_calls.extend( - FunctionCall( - name=tool_call.name, - arguments=json.dumps(tool_call.parameters, ensure_ascii=False), + tool_calls = [] + with contextlib.suppress(ValidationError): + content = content or "" + tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json( + content + ) + for tool_call in tool_calls: + function_calls.append( + FunctionCall( + name=tool_call.name, + arguments=json.dumps(tool_call.parameters, ensure_ascii=False), + ) ) - for tool_call in tool_calls - ) return function_calls, None # Clear content since tool is called. if ( From 3af2655ceaf2d28d5f7aedfc6475e3b6bcc4a41a Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 17 Mar 2026 14:32:13 +0800 Subject: [PATCH 2/3] [Bugfix][ResponsesAPI] Fix crash when tool_choice=required exceeds max_output_tokens Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/responses/test_function_call.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py index ed580e4bd9fa..696b4c15db65 100644 --- a/tests/entrypoints/openai/responses/test_function_call.py +++ b/tests/entrypoints/openai/responses/test_function_call.py @@ -159,6 +159,7 @@ async def test_max_tokens_with_tool_choice_required( # exceed `max_output_tokens`,`function_call` should be empty. # This behavior should be consistent with OpenAI assert out.type != "function_call" + assert response.incomplete_details.reason == "max_output_tokens" @pytest.mark.asyncio From b663a2a16ad4e6a75f8384397e378635fc473442 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 17 Mar 2026 14:43:01 +0800 Subject: [PATCH 3/3] [Bugfix][ResponsesAPI] Fix crash when tool_choice=required exceeds max_output_tokens Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/responses/test_function_call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py index 696b4c15db65..36627f92d7d7 100644 --- a/tests/entrypoints/openai/responses/test_function_call.py +++ b/tests/entrypoints/openai/responses/test_function_call.py @@ -151,7 +151,7 @@ async def test_max_tokens_with_tool_choice_required( input=prompt, tools=tools, tool_choice="required", - max_output_tokens=1, + max_output_tokens=10, ) assert len(response.output) >= 1 for out in response.output: