@@ -765,18 +765,10 @@ def init_multi_step_from_lookahead_slots(self, num_lookahead_slots: int,
765765
766766 def set_last_token_time (self , now : float ) -> None :
767767 """Sets the last token time for Request level timings."""
768- < << << << HEAD
769768 # If still in prefill phase, assertion fails.
770769 assert not self .is_prefill (), (
771770 "seq_group.set_last_token_time() should not be called "
772771 "if the seq_group is in prefill phase." )
773- == == == =
774- # If still in prefill phase, raise Error.
775- if self .is_prefill ():
776- raise ValueError (
777- "seq_group.set_last_token_time() should not be called "
778- "if the seq_group is in prefill phase." )
779- > >> >> >> set function for setting last token latency and time
780772 self .last_token_latency = now - self .metrics .last_token_time
781773 self .metrics .last_token_time = now
782774
@@ -787,14 +779,6 @@ def get_last_token_latency(self) -> float:
787779 "if the seq_group is in prefill phase." )
788780 return self .last_token_latency
789781
790- def get_last_token_latency (self ) -> float :
791- """Returns the latency of the last token."""
792- if self .is_prefill ():
793- raise ValueError (
794- "seq_group.get_last_token_latency() should not be called "
795- "if the seq_group is in prefill phase." )
796- return self .last_token_latency
797-
798782 def maybe_set_first_token_time (self , time : float ) -> None :
799783 """Sets the first token time for Request level timings."""
800784 # Note: in a case where a sequence_group is swapped and
0 commit comments