@@ -101,7 +101,10 @@ async def chat_completion_stream_generator(
101101 role = self .get_chat_request_role (request )
102102 for i in range (request .n ):
103103 choice_data = ChatCompletionResponseStreamChoice (
104- index = i , delta = DeltaMessage (role = role ), finish_reason = None )
104+ index = i ,
105+ delta = DeltaMessage (role = role ),
106+ logprobs = None ,
107+ finish_reason = None )
105108 chunk = ChatCompletionStreamResponse (id = request_id ,
106109 object = chunk_object_type ,
107110 created = created_time ,
@@ -118,6 +121,7 @@ async def chat_completion_stream_generator(
118121 "content" ) and request .messages [- 1 ].get (
119122 "role" ) == role :
120123 last_msg_content = request .messages [- 1 ]["content" ]
124+
121125 if last_msg_content :
122126 for i in range (request .n ):
123127 choice_data = ChatCompletionResponseStreamChoice (
@@ -129,6 +133,7 @@ async def chat_completion_stream_generator(
129133 object = chunk_object_type ,
130134 created = created_time ,
131135 choices = [choice_data ],
136+ logprobs = None ,
132137 model = model_name )
133138 data = chunk .model_dump_json (exclude_unset = True )
134139 yield f"data: { data } \n \n "
@@ -145,15 +150,29 @@ async def chat_completion_stream_generator(
145150 if finish_reason_sent [i ]:
146151 continue
147152
153+ delta_token_ids = output .token_ids [previous_num_tokens [i ]:]
154+ top_logprobs = output .logprobs [
155+ previous_num_tokens [i ]:] if output .logprobs else None
156+
157+ if request .logprobs :
158+ logprobs = self ._create_logprobs (
159+ token_ids = delta_token_ids ,
160+ top_logprobs = top_logprobs ,
161+ num_output_top_logprobs = request .logprobs ,
162+ initial_text_offset = len (previous_texts [i ]),
163+ )
164+ else :
165+ logprobs = None
166+
148167 delta_text = output .text [len (previous_texts [i ]):]
149168 previous_texts [i ] = output .text
150169 previous_num_tokens [i ] = len (output .token_ids )
151-
152170 if output .finish_reason is None :
153171 # Send token-by-token response for each request.n
154172 choice_data = ChatCompletionResponseStreamChoice (
155173 index = i ,
156174 delta = DeltaMessage (content = delta_text ),
175+ logprobs = logprobs ,
157176 finish_reason = None )
158177 chunk = ChatCompletionStreamResponse (
159178 id = request_id ,
@@ -174,6 +193,7 @@ async def chat_completion_stream_generator(
174193 choice_data = ChatCompletionResponseStreamChoice (
175194 index = i ,
176195 delta = DeltaMessage (content = delta_text ),
196+ logprobs = logprobs ,
177197 finish_reason = output .finish_reason )
178198 chunk = ChatCompletionStreamResponse (
179199 id = request_id ,
@@ -208,11 +228,25 @@ async def chat_completion_full_generator(
208228 assert final_res is not None
209229
210230 choices = []
231+
211232 role = self .get_chat_request_role (request )
212233 for output in final_res .outputs :
234+ token_ids = output .token_ids
235+ top_logprobs = output .logprobs
236+
237+ if request .logprobs :
238+ logprobs = self ._create_logprobs (
239+ token_ids = token_ids ,
240+ top_logprobs = top_logprobs ,
241+ num_output_top_logprobs = request .logprobs ,
242+ )
243+ else :
244+ logprobs = None
245+
213246 choice_data = ChatCompletionResponseChoice (
214247 index = output .index ,
215248 message = ChatMessage (role = role , content = output .text ),
249+ logprobs = logprobs ,
216250 finish_reason = output .finish_reason ,
217251 )
218252 choices .append (choice_data )
0 commit comments