@@ -300,6 +300,7 @@ def add_request(
300300 def abort_request (self , request_id : Union [str , Iterable [str ]]) -> None :
301301 self .scheduler .finish_requests (request_id ,
302302 RequestStatus .FINISHED_ABORTED )
303+ self ._free_request (request_id )
303304
304305 def get_num_unfinished_requests (self ) -> int :
305306 """Gets the number of unfinished requests."""
@@ -361,6 +362,11 @@ def recv_from_detokenizer(self) -> List[RequestOutput]:
361362 num_reqs = len (detokenizer_output .req_ids )
362363 for i in range (num_reqs ):
363364 req_id = detokenizer_output .req_ids [i ]
365+ if req_id not in self .requests :
366+ # The request has been aborted while the detokenizer was
367+ # processing the outputs.
368+ continue
369+
364370 req = self .requests [req_id ]
365371 req .output_text += detokenizer_output .detokenized_texts [i ]
366372
@@ -373,9 +379,7 @@ def recv_from_detokenizer(self) -> List[RequestOutput]:
373379 req_outputs .append (req_output )
374380
375381 if finished :
376- del self .requests [req_id ]
377- del self .num_lagged_steps [req_id ]
378- del self .request_outputs [req_id ]
382+ self ._free_request (req_id )
379383 return req_outputs
380384
381385 def terminate_detokenizer (self ) -> None :
@@ -440,6 +444,11 @@ def _make_request_output(
440444 req_output .finished = finished
441445 return req_output
442446
447+ def _free_request (self , request_id : str ) -> None :
448+ self .requests .pop (request_id , None )
449+ self .num_lagged_steps .pop (request_id , None )
450+ self .request_outputs .pop (request_id , None )
451+
443452 def check_health (self ) -> None :
444453 if self .tokenizer :
445454 self .tokenizer .check_health ()
0 commit comments