File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -416,6 +416,12 @@ def pop_preallocated(self) -> List[DecodeRequest]:
416416
417417 return preallocated_reqs
418418
419+ @property
420+ def num_tokens_pre_allocated (self ):
421+ return sum (
422+ len (decode_req .req .fill_ids ) for decode_req in self .transfer_queue .queue
423+ )
424+
419425 def _allocatable_tokens (
420426 self , retractable_tokens : Optional [int ] = None , count_retracted : bool = True
421427 ) -> int :
Original file line number Diff line number Diff line change @@ -707,9 +707,6 @@ def init_disaggregation(self):
707707 transfer_backend = self .transfer_backend ,
708708 )
709709
710- # Metric for pre-allocation
711- self .num_tokens_pre_allocated = 0
712-
713710 elif self .disaggregation_mode == DisaggregationMode .PREFILL :
714711 # *2 for the headroom.
715712 buffer_size = self .max_running_requests * 2
@@ -1372,7 +1369,7 @@ def log_decode_stats(
13721369 msg += f"accept len: { spec_accept_length :.2f} , "
13731370
13741371 if self .disaggregation_mode == DisaggregationMode .DECODE :
1375- msg += f"pre-allocated usage: { self .num_tokens_pre_allocated / self .max_total_num_tokens :.2f} , "
1372+ msg += f"pre-allocated usage: { self .disagg_decode_prealloc_queue . num_tokens_pre_allocated / self .max_total_num_tokens :.2f} , "
13761373 msg += f"#retracted-req: { len (self .disagg_decode_prealloc_queue .retracted_queue )} , "
13771374
13781375 msg += (
You can’t perform that action at this time.
0 commit comments