From 6623be7ac6ea444c3498c4585e7199f4f2d3f3c9 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Thu, 3 Oct 2024 11:26:35 -0600 Subject: [PATCH] fix: do not use existence of tool_parser as proxy for tool_choice_auto Signed-off-by: Travis Johnson --- vllm/entrypoints/openai/serving_chat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 41f131f56b51..1ef3471240b8 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -505,15 +505,18 @@ async def chat_completion_stream_generator( # any tokens that were generated but previously # matched by partial json parsing # only happens if we are NOT using guided decoding - if tool_parser: + if tool_choice_auto: + assert tool_parser is not None index = len( tool_parser.prev_tool_call_arr) - 1 if len( tool_parser.prev_tool_call_arr) > 0 else 0 else: index = 0 - if self._should_check_for_unstreamed_tool_arg_tokens( - delta_message, output) and tool_parser: + if tool_choice_auto and \ + self._should_check_for_unstreamed_tool_arg_tokens( + delta_message, output): + assert tool_parser is not None # get the expected call based on partial JSON # parsing which "autocompletes" the JSON expected_call = json.dumps(