From 32bfefa9053c311b59e9ba6cdf82884f1f19a180 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 17 Mar 2026 14:26:31 +0800
Subject: [PATCH 1/3] [Bugfix][ResponsesAPI] Fix crash when
 tool_choice=required exceeds max_output_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../openai/responses/test_function_call.py    | 27 +++++++++++++++++++
 vllm/parser/abstract_parser.py                | 23 +++++++++-------
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py
index 0b8a2e6499d3..ed580e4bd9fa 100644
--- a/tests/entrypoints/openai/responses/test_function_call.py
+++ b/tests/entrypoints/openai/responses/test_function_call.py
@@ -134,6 +134,33 @@ async def test_function_tool_use(
     assert reasoning.type == "reasoning"
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_max_tokens_with_tool_choice_required(
+    client: openai.AsyncOpenAI, model_name: str
+):
+    prompt = [
+        {
+            "role": "user",
+            "content": "Can you tell me what the current weather is in Berlin and the "
+            "forecast for the next 5 days, in fahrenheit?",
+        },
+    ]
+    response = await client.responses.create(
+        model=model_name,
+        input=prompt,
+        tools=tools,
+        tool_choice="required",
+        max_output_tokens=1,
+    )
+    assert len(response.output) >= 1
+    for out in response.output:
+        # When `tool_choice="required"` and the tokens of `tools`
+        # exceed `max_output_tokens`,`function_call` should be empty.
+        # This behavior should be consistent with OpenAI
+        assert out.type != "function_call"
+
+
 @pytest.mark.asyncio
 async def test_named_tool_use(client: openai.AsyncOpenAI):
     def get_weather(latitude: float, longitude: float) -> str:
diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py
index aa145bab2121..0c1dda17b6a3 100644
--- a/vllm/parser/abstract_parser.py
+++ b/vllm/parser/abstract_parser.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import contextlib
 import json
 from abc import abstractmethod
 from collections.abc import Sequence
@@ -18,7 +19,7 @@
 from openai.types.responses.response_reasoning_item import (
     Content as ResponseReasoningTextContent,
 )
-from pydantic import TypeAdapter
+from pydantic import TypeAdapter, ValidationError
 
 from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -422,15 +423,19 @@ def _parse_tool_calls(
 
         if request.tool_choice == "required":
             # Required tool calls - parse JSON
-            assert content is not None
-            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
-            function_calls.extend(
-                FunctionCall(
-                    name=tool_call.name,
-                    arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
+            tool_calls = []
+            with contextlib.suppress(ValidationError):
+                content = content or ""
+                tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
+                    content
+                )
+            for tool_call in tool_calls:
+                function_calls.append(
+                    FunctionCall(
+                        name=tool_call.name,
+                        arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
+                    )
                 )
-                for tool_call in tool_calls
-            )
             return function_calls, None  # Clear content since tool is called.
 
         if (

From 3af2655ceaf2d28d5f7aedfc6475e3b6bcc4a41a Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 17 Mar 2026 14:32:13 +0800
Subject: [PATCH 2/3] [Bugfix][ResponsesAPI] Fix crash when
 tool_choice=required exceeds max_output_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 tests/entrypoints/openai/responses/test_function_call.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py
index ed580e4bd9fa..696b4c15db65 100644
--- a/tests/entrypoints/openai/responses/test_function_call.py
+++ b/tests/entrypoints/openai/responses/test_function_call.py
@@ -159,6 +159,7 @@ async def test_max_tokens_with_tool_choice_required(
         # exceed `max_output_tokens`,`function_call` should be empty.
         # This behavior should be consistent with OpenAI
         assert out.type != "function_call"
+    assert response.incomplete_details.reason == "max_output_tokens"
 
 
 @pytest.mark.asyncio

From b663a2a16ad4e6a75f8384397e378635fc473442 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 17 Mar 2026 14:43:01 +0800
Subject: [PATCH 3/3] [Bugfix][ResponsesAPI] Fix crash when
 tool_choice=required exceeds max_output_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 tests/entrypoints/openai/responses/test_function_call.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py
index 696b4c15db65..36627f92d7d7 100644
--- a/tests/entrypoints/openai/responses/test_function_call.py
+++ b/tests/entrypoints/openai/responses/test_function_call.py
@@ -151,7 +151,7 @@ async def test_max_tokens_with_tool_choice_required(
         input=prompt,
         tools=tools,
         tool_choice="required",
-        max_output_tokens=1,
+        max_output_tokens=10,
     )
     assert len(response.output) >= 1
     for out in response.output: