Skip to content

Commit 1450d23

Browse files
ZeldaHuangchenxijun1029
authored andcommitted
Fix num_tokens_pre_allocated in disaggregation log (sgl-project#7714)
1 parent 0354142 commit 1450d23

2 files changed

Lines changed: 7 additions & 4 deletions

File tree

python/sglang/srt/disaggregation/decode.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,12 @@ def pop_preallocated(self) -> List[DecodeRequest]:
416416

417417
return preallocated_reqs
418418

419+
@property
420+
def num_tokens_pre_allocated(self):
421+
return sum(
422+
len(decode_req.req.fill_ids) for decode_req in self.transfer_queue.queue
423+
)
424+
419425
def _allocatable_tokens(
420426
self, retractable_tokens: Optional[int] = None, count_retracted: bool = True
421427
) -> int:

python/sglang/srt/managers/scheduler.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -707,9 +707,6 @@ def init_disaggregation(self):
707707
transfer_backend=self.transfer_backend,
708708
)
709709

710-
# Metric for pre-allocation
711-
self.num_tokens_pre_allocated = 0
712-
713710
elif self.disaggregation_mode == DisaggregationMode.PREFILL:
714711
# *2 for the headroom.
715712
buffer_size = self.max_running_requests * 2
@@ -1372,7 +1369,7 @@ def log_decode_stats(
13721369
msg += f"accept len: {spec_accept_length:.2f}, "
13731370

13741371
if self.disaggregation_mode == DisaggregationMode.DECODE:
1375-
msg += f"pre-allocated usage: {self.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, "
1372+
msg += f"pre-allocated usage: {self.disagg_decode_prealloc_queue.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, "
13761373
msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, "
13771374

13781375
msg += (

0 commit comments

Comments
 (0)