Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions vllm_omni/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,7 @@ async def create_chat_completion(

generators.append(generator)
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
return self.create_error_response(e)
Copy link

Copilot AI Jan 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent error handling: While this PR updates some error handlers to pass exception objects directly (lines 289, 317, 1287, 1312), there are similar error handlers in the same file that still use str(e):

  • Line 634: self.create_streaming_error_response(str(e)) for RuntimeError
  • Line 646: self.create_streaming_error_response(str(e)) for Exception
  • Line 1485: self.create_error_response(str(e)) for RuntimeError
  • Line 1570: self.create_error_response(str(e)) for RuntimeError

For consistency with the upstream vLLM v0.14.0 alignment mentioned in the PR description, these should also be updated to pass the exception object directly (removing str() wrapper).

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@ceanna93 ceanna93 Feb 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hsliuustc0106 Thanks for letting me know! I've updated the remaining call sites. a85dad9


assert len(generators) == 1
(result_generator,) = generators
Expand Down Expand Up @@ -315,8 +314,7 @@ async def create_chat_completion(
request_metadata,
)
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
return self.create_error_response(e)

async def _preprocess_chat(
self,
Expand Down Expand Up @@ -633,7 +631,7 @@ async def chat_completion_stream_generator(
)
except RuntimeError as e:
logger.exception("Error in reasoning parser creation.")
data = self.create_streaming_error_response(str(e))
data = self.create_streaming_error_response(e)
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"
return
Expand All @@ -645,7 +643,7 @@ async def chat_completion_stream_generator(
tool_parsers = [None] * num_choices
except Exception as e:
logger.exception("Error in tool parser creation.")
data = self.create_streaming_error_response(str(e))
data = self.create_streaming_error_response(e)
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"
return
Expand Down Expand Up @@ -1285,9 +1283,8 @@ async def chat_completion_stream_generator(
)

except Exception as e:
# TODO: Use a vllm-specific Validation Error
logger.exception("Error in chat completion stream generator.")
data = self.create_streaming_error_response(str(e))
data = self.create_streaming_error_response(e)
yield f"data: {data}\n\n"
# Send the final done message after all response.n are finished
yield "data: [DONE]\n\n"
Expand All @@ -1312,8 +1309,7 @@ async def chat_completion_full_generator(
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
return self.create_error_response(e)

assert final_outputs is not None

Expand Down Expand Up @@ -1486,7 +1482,7 @@ def _create_text_choice(
)
except RuntimeError as e:
logger.exception("Error in reasoning parser creation.")
return self.create_error_response(str(e))
return self.create_error_response(e)
# If the reasoning parser is enabled,
# tool calls are extracted exclusively from the content.
reasoning_content, content = reasoning_parser.extract_reasoning(output.text, request=request)
Expand Down Expand Up @@ -1571,7 +1567,7 @@ def _create_text_choice(
tool_parser = self.tool_parser(tokenizer)
except RuntimeError as e:
logger.exception("Error in tool parser creation.")
return self.create_error_response(str(e))
return self.create_error_response(e)

tool_call_info = tool_parser.extract_tool_calls(content if content is not None else "", request=request)
# In the OpenAI API the finish_reason is "tools_called"
Expand Down
2 changes: 1 addition & 1 deletion vllm_omni/entrypoints/openai/serving_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ async def create_speech(
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
except ValueError as e:
return self.create_error_response(str(e))
return self.create_error_response(e)
except Exception as e:
logger.exception("Speech generation failed: %s", e)
return self.create_error_response(f"Speech generation failed: {e}")
Loading