[Frontend] Align finish_reason when tool is called with OpenAI (#25054)

n0gu-furiosa · chaunceyjiang · web-flow · commit 470ad118b623 · 2025-11-03T04:21:18.000Z
Signed-off-by: Sungyoon Jeong &lt;sungyoon.jeong@furiosa.ai&gt;
Co-authored-by: Chauncey &lt;chaunceyjiang@gmail.com&gt;
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -1170,9 +1170,13 @@ async def chat_completion_stream_generator(
                             )
 
                         # Send the finish response for each request.n only once
+                        # In OpenAI's API, when a tool is called, the
+                        # finish_reason is:
+                        # "tool_calls" for "auto" or "required" tool calls,
+                        # and "stop" for named tool calls.
                         if (
                             auto_tools_called
-                            or tools_streamed[i]
+                            or (tools_streamed[i] and not tool_choice_function_name)
                             or (self.use_harmony and harmony_tools_streamed[i])
                         ):
                             finish_reason_ = "tool_calls"
@@ -1523,18 +1527,24 @@ async def chat_completion_full_generator(
                 message = ChatMessage(
                     role=role, reasoning_content=reasoning_content, content=content
                 )
+            # In OpenAI's API, when a tool is called, the finish_reason is:
+            # "tool_calls" for "auto" or "required" tool calls,
+            # and "stop" for named tool calls.
+            is_finish_reason_tool_calls = auto_tools_called or (
+                request.tool_choice
+                and request.tool_choice == "required"
+                and output.finish_reason == "stop"
+            )
 
             choice_data = ChatCompletionResponseChoice(
                 index=output.index,
                 message=message,
                 logprobs=logprobs,
-                finish_reason=(
-                    "tool_calls"
-                    if auto_tools_called
-                    else output.finish_reason
-                    if output.finish_reason
-                    else "stop"
-                ),
+                finish_reason="tool_calls"
+                if is_finish_reason_tool_calls
+                else output.finish_reason
+                if output.finish_reason
+                else "stop",
                 stop_reason=output.stop_reason,
                 token_ids=(
                     as_list(output.token_ids) if request.return_token_ids else None