Skip to content

Commit d3aa127

Browse files
chaunceyjiangkhairulkabir1661
authored andcommitted
[Bugfix][ResponsesAPI] Fix crash when tool_choice=required exceeds max_output_tokens (vllm-project#37258)
Signed-off-by: chaunceyjiang <[email protected]>
1 parent 15aa6ff commit d3aa127

2 files changed

Lines changed: 42 additions & 9 deletions

File tree

tests/entrypoints/openai/responses/test_function_call.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,34 @@ async def test_function_tool_use(
134134
assert reasoning.type == "reasoning"
135135

136136

137+
@pytest.mark.asyncio
138+
@pytest.mark.parametrize("model_name", [MODEL_NAME])
139+
async def test_max_tokens_with_tool_choice_required(
140+
client: openai.AsyncOpenAI, model_name: str
141+
):
142+
prompt = [
143+
{
144+
"role": "user",
145+
"content": "Can you tell me what the current weather is in Berlin and the "
146+
"forecast for the next 5 days, in fahrenheit?",
147+
},
148+
]
149+
response = await client.responses.create(
150+
model=model_name,
151+
input=prompt,
152+
tools=tools,
153+
tool_choice="required",
154+
max_output_tokens=10,
155+
)
156+
assert len(response.output) >= 1
157+
for out in response.output:
158+
# When `tool_choice="required"` and the tokens of `tools`
159+
# exceed `max_output_tokens`,`function_call` should be empty.
160+
# This behavior should be consistent with OpenAI
161+
assert out.type != "function_call"
162+
assert response.incomplete_details.reason == "max_output_tokens"
163+
164+
137165
@pytest.mark.asyncio
138166
async def test_named_tool_use(client: openai.AsyncOpenAI):
139167
def get_weather(latitude: float, longitude: float) -> str:

vllm/parser/abstract_parser.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import contextlib
45
import json
56
from abc import abstractmethod
67
from collections.abc import Sequence
@@ -18,7 +19,7 @@
1819
from openai.types.responses.response_reasoning_item import (
1920
Content as ResponseReasoningTextContent,
2021
)
21-
from pydantic import TypeAdapter
22+
from pydantic import TypeAdapter, ValidationError
2223

2324
from vllm.entrypoints.chat_utils import make_tool_call_id
2425
from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -422,15 +423,19 @@ def _parse_tool_calls(
422423

423424
if request.tool_choice == "required":
424425
# Required tool calls - parse JSON
425-
assert content is not None
426-
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
427-
function_calls.extend(
428-
FunctionCall(
429-
name=tool_call.name,
430-
arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
426+
tool_calls = []
427+
with contextlib.suppress(ValidationError):
428+
content = content or ""
429+
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
430+
content
431+
)
432+
for tool_call in tool_calls:
433+
function_calls.append(
434+
FunctionCall(
435+
name=tool_call.name,
436+
arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
437+
)
431438
)
432-
for tool_call in tool_calls
433-
)
434439
return function_calls, None # Clear content since tool is called.
435440

436441
if (

0 commit comments

Comments
 (0)