refactor: optimize comments in streaming chat handler

PierreLeGuen · PierreLeGuen · commit 5efca07df011 · 2026-01-15T12:14:11.000+01:00
Improve code comments based on PR feedback:
- Clarify role initialization purpose to prevent UnboundLocalError
- Consolidate split comment into single line for better readability

Signed-off-by: Pierre Le Guen &lt;26087574+PierreLeGuen@users.noreply.github.com&gt;
diff --git a/vllm_omni/entrypoints/openai/serving_chat.py b/vllm_omni/entrypoints/openai/serving_chat.py
@@ -551,9 +551,9 @@ async def chat_completion_stream_generator(
         created_time = int(time.time())
         chunk_object_type: Final = "chat.completion.chunk"
         first_iteration_dict = {}
-        assert hasattr(request, "modalities") and request.modalities is not None, (
-            "Streaming request must specify output modalities"
-        )
+        assert (
+            hasattr(request, "modalities") and request.modalities is not None
+        ), "Streaming request must specify output modalities"
         for modality in request.modalities:
             first_iteration_dict[modality] = True
 
@@ -638,19 +638,17 @@ async def chat_completion_stream_generator(
                     if res.encoder_prompt_token_ids is not None:
                         num_prompt_tokens += len(res.encoder_prompt_token_ids)
 
-                # Get role for all modalities (text, audio, image)
+                # Initialize role before conditional blocks to avoid UnboundLocalError
+                # when handling audio/image responses
                 role = self.get_chat_request_role(request)
 
                 # We need to do it here, because if there are exceptions in
                 # the result_generator, it needs to be sent as the FIRST
                 # response (by the try...catch).
                 if first_iteration_dict[final_output_type] and final_output_type == "text":
                     num_cached_tokens = res.num_cached_tokens
-                    # Send first response for each request.n (index) with
-                    # the role
-
-                    # NOTE num_choices defaults to 1 so this usually executes
-                    # once per request
+                    # Send first response for each choice with role
+                    # NOTE: num_choices defaults to 1 so this usually executes once per request
                     for i in range(num_choices):
                         choice_data = ChatCompletionResponseStreamChoice(
                             index=i,