@@ -318,15 +318,16 @@ def _sanitize_generate_config(
318318 stream_options = generate_config .get ("stream_options" )
319319 generate_config .setdefault ("stream_options" , stream_options )
320320 generate_config .setdefault ("ignore_eos" , False )
321- response_format = generate_config .pop ("response_format" )
322- json_schema_config = response_format .pop ("json_schema" )
323- json_schema = None
324- if "schema_" in json_schema_config :
325- json_schema = json_schema_config .pop ("schema_" )
326- elif "schema" in json_schema_config :
327- json_schema = json_schema_config .pop ("schema" )
328- if json_schema :
329- generate_config .setdefault ("json_schema" , json .dumps (json_schema )) # type: ignore
321+ response_format = generate_config .pop ("response_format" , None )
322+ if response_format :
323+ json_schema_config = response_format .pop ("json_schema" , None )
324+ json_schema = None
325+ if "schema_" in json_schema_config :
326+ json_schema = json_schema_config .pop ("schema_" )
327+ elif "schema" in json_schema_config :
328+ json_schema = json_schema_config .pop ("schema" )
329+ if json_schema :
330+ generate_config .setdefault ("json_schema" , json .dumps (json_schema )) # type: ignore
330331
331332 return generate_config
332333
@@ -359,35 +360,47 @@ def match_json(
359360
360361 @staticmethod
361362 def _convert_state_to_completion_chunk (
362- request_id : str , model : str , output_text : str
363+ request_id : str , model : str , output_text : str , meta_info : Dict
363364 ) -> CompletionChunk :
365+ finish_reason = meta_info .get ("finish_reason" , None )
366+ if isinstance (finish_reason , dict ) and "type" in finish_reason :
367+ finish_reason = finish_reason ["type" ]
364368 choices : List [CompletionChoice ] = [
365369 CompletionChoice (
366370 text = output_text ,
367371 index = 0 ,
368372 logprobs = None ,
369- finish_reason = None ,
373+ finish_reason = finish_reason ,
370374 )
371375 ]
376+ usage = CompletionUsage (
377+ prompt_tokens = meta_info ["prompt_tokens" ],
378+ completion_tokens = meta_info ["completion_tokens" ],
379+ total_tokens = meta_info ["prompt_tokens" ] + meta_info ["completion_tokens" ],
380+ )
372381 chunk = CompletionChunk (
373382 id = request_id ,
374383 object = "text_completion" ,
375384 created = int (time .time ()),
376385 model = model ,
377386 choices = choices ,
387+ usage = usage ,
378388 )
379389 return chunk
380390
381391 @staticmethod
382392 def _convert_state_to_completion (
383393 request_id : str , model : str , output_text : str , meta_info : Dict
384394 ) -> Completion :
395+ finish_reason = meta_info .get ("finish_reason" , None )
396+ if isinstance (finish_reason , dict ) and "type" in finish_reason :
397+ finish_reason = finish_reason ["type" ]
385398 choices = [
386399 CompletionChoice (
387400 text = output_text ,
388401 index = 0 ,
389402 logprobs = None ,
390- finish_reason = None ,
403+ finish_reason = finish_reason ,
391404 )
392405 ]
393406
@@ -516,7 +529,10 @@ async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
516529 prompt , image_data , ** sanitized_generate_config
517530 ):
518531 chunk = self ._convert_state_to_completion_chunk (
519- request_id , self .model_uid , output_text = out
532+ request_id ,
533+ self .model_uid ,
534+ output_text = out ,
535+ meta_info = meta_info ,
520536 )
521537 complete_response += out
522538 finish_reason = meta_info ["finish_reason" ]
0 commit comments