@@ -214,7 +214,6 @@ def _update_num_reasoning_tokens(self):
214214
215215 def append_output (self , output : RequestOutput ) -> None :
216216 output_token_ids = output .outputs [0 ].token_ids
217- self .parser = get_streamable_parser_for_assistant ()
218217 for token_id in output_token_ids :
219218 self .parser .process (token_id )
220219 # Check if the current token is part of reasoning content
@@ -519,7 +518,8 @@ def append_output(self, output: RequestOutput) -> None:
519518 # (finished=True), then the next token processed will mark the
520519 # beginning of a new message
521520 self .first_tok_of_message = output .finished
522- for tok in output .outputs [0 ].token_ids :
521+ token_ids = output .outputs [0 ].token_ids
522+ for tok in token_ids :
523523 self .parser .process (tok )
524524 self ._update_decode_token_usage (output )
525525
@@ -529,7 +529,9 @@ def append_output(self, output: RequestOutput) -> None:
529529 self .current_turn_metrics .reset ()
530530 # Check if the current token is part of reasoning content
531531 self ._update_num_reasoning_tokens ()
532- self .last_tok = tok
532+ # Only update last_tok if we actually processed tokens
533+ if token_ids :
534+ self .last_tok = tok
533535 if len (self ._messages ) - self .num_init_messages < len (self .parser .messages ):
534536 self ._messages .extend (
535537 self .parser .messages [len (self ._messages ) - self .num_init_messages :]
@@ -547,7 +549,8 @@ def append_tool_output(self, output: list[Message]) -> None:
547549 for tok in toks :
548550 self .parser .process (tok )
549551 self .last_tok = toks [- 1 ]
550- # TODO: add tool_output messages to self._messages
552+ # Add tool output messages to self._messages
553+ self ._messages .extend (output )
551554
552555 def is_expecting_start (self ) -> bool :
553556 return self .parser .state == StreamState .EXPECT_START
0 commit comments