11import asyncio
22import time
33from functools import partial
4- from typing import (AsyncIterator , Callable , Dict , Iterable , List , Optional ,
5- Set , Tuple , Type , Union )
4+ from typing import (AsyncIterator , Callable , Dict , Iterable , List , Mapping ,
5+ Optional , Set , Tuple , Type , Union )
66
77from transformers import PreTrainedTokenizer
88
@@ -151,7 +151,10 @@ def process_exception(self,
151151 logger .info ("Finished request %s." , request_id )
152152 self .abort_request (request_id )
153153
154- def add_request (self , request_id : str ,
154+ def add_request (self ,
155+ request_id : str ,
156+ * ,
157+ verbose : bool = False ,
155158 ** engine_add_request_kwargs ) -> AsyncStream :
156159 """Add a request to be sent to the engine on the next background
157160 loop iteration."""
@@ -166,6 +169,9 @@ def add_request(self, request_id: str,
166169
167170 self .new_requests_event .set ()
168171
172+ if verbose :
173+ logger .info ("Added request %s." , request_id )
174+
169175 return stream
170176
171177 def abort_request (self , request_id : str , * , verbose : bool = False ) -> None :
@@ -337,14 +343,14 @@ async def process_model_params_async(
337343 return params
338344
339345 async def add_request_async (
340- self ,
341- request_id : str ,
342- inputs : PromptInputs ,
343- params : Union [SamplingParams , PoolingParams ],
344- arrival_time : Optional [float ] = None ,
345- lora_request : Optional [LoRARequest ] = None ,
346- trace_headers : Optional [Dict [str , str ]] = None ,
347- prompt_adapter_request : Optional [PromptAdapterRequest ] = None
346+ self ,
347+ request_id : str ,
348+ inputs : PromptInputs ,
349+ params : Union [SamplingParams , PoolingParams ],
350+ arrival_time : Optional [float ] = None ,
351+ lora_request : Optional [LoRARequest ] = None ,
352+ trace_headers : Optional [Mapping [str , str ]] = None ,
353+ prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
348354 ) -> None :
349355 if lora_request is not None and not self .lora_config :
350356 raise ValueError (f"Got lora_request { lora_request } but LoRA is "
@@ -393,8 +399,6 @@ class AsyncLLMEngine:
393399 async frontend will be executed in a separate process as the
394400 model workers.
395401 log_requests: Whether to log the requests.
396- max_log_len: Maximum number of prompt characters or prompt ID numbers
397- being printed in log.
398402 start_engine_loop: If True, the background task to run the engine
399403 will be automatically started in the generate call.
400404 *args: Arguments for :class:`LLMEngine`.
@@ -408,13 +412,11 @@ def __init__(self,
408412 engine_use_ray : bool ,
409413 * args ,
410414 log_requests : bool = True ,
411- max_log_len : Optional [int ] = None ,
412415 start_engine_loop : bool = True ,
413416 ** kwargs ) -> None :
414417 self .worker_use_ray = worker_use_ray
415418 self .engine_use_ray = engine_use_ray
416419 self .log_requests = log_requests
417- self .max_log_len = max_log_len
418420 self .engine = self ._init_engine (* args , ** kwargs )
419421
420422 self .background_loop : Optional [asyncio .Future ] = None
@@ -508,7 +510,6 @@ def from_engine_args(
508510 executor_class = executor_class ,
509511 log_requests = not engine_args .disable_log_requests ,
510512 log_stats = not engine_args .disable_log_stats ,
511- max_log_len = engine_args .max_log_len ,
512513 start_engine_loop = start_engine_loop ,
513514 usage_context = usage_context ,
514515 stat_loggers = stat_loggers ,
@@ -707,30 +708,9 @@ async def add_request(
707708 params : Union [SamplingParams , PoolingParams ],
708709 arrival_time : Optional [float ] = None ,
709710 lora_request : Optional [LoRARequest ] = None ,
710- trace_headers : Optional [Dict [str , str ]] = None ,
711+ trace_headers : Optional [Mapping [str , str ]] = None ,
711712 prompt_adapter_request : Optional [PromptAdapterRequest ] = None
712713 ) -> AsyncStream :
713- if self .log_requests :
714- if isinstance (inputs , str ):
715- shortened_prompt = inputs
716- shortened_token_ids = None
717- else :
718- shortened_prompt = inputs .get ("prompt" )
719- shortened_token_ids = inputs .get ("prompt_token_ids" )
720-
721- max_log_len = self .max_log_len
722- if max_log_len is not None :
723- if shortened_prompt is not None :
724- shortened_prompt = shortened_prompt [:max_log_len ]
725- if shortened_token_ids is not None :
726- shortened_token_ids = shortened_token_ids [:max_log_len ]
727-
728- logger .info (
729- "Received request %s: prompt: %r, "
730- "params: %s, prompt_token_ids: %s, "
731- "lora_request: %s." , request_id , shortened_prompt , params ,
732- shortened_token_ids , lora_request )
733-
734714 if not self .is_running :
735715 if self .start_engine_loop :
736716 self .start_background_loop ()
@@ -746,6 +726,7 @@ async def add_request(
746726
747727 stream = self ._request_tracker .add_request (
748728 request_id ,
729+ verbose = self .log_requests ,
749730 inputs = inputs ,
750731 params = params ,
751732 arrival_time = arrival_time ,
@@ -761,7 +742,7 @@ async def generate(
761742 sampling_params : SamplingParams ,
762743 request_id : str ,
763744 lora_request : Optional [LoRARequest ] = None ,
764- trace_headers : Optional [Dict [str , str ]] = None ,
745+ trace_headers : Optional [Mapping [str , str ]] = None ,
765746 prompt_adapter_request : Optional [PromptAdapterRequest ] = None
766747 ) -> AsyncIterator [RequestOutput ]:
767748 """Generate outputs for a request.
@@ -844,7 +825,7 @@ async def encode(
844825 pooling_params : PoolingParams ,
845826 request_id : str ,
846827 lora_request : Optional [LoRARequest ] = None ,
847- trace_headers : Optional [Dict [str , str ]] = None ,
828+ trace_headers : Optional [Mapping [str , str ]] = None ,
848829 ) -> AsyncIterator [EmbeddingRequestOutput ]:
849830 """Generate outputs for a request from an embedding model.
850831
@@ -922,7 +903,7 @@ async def _process_request(
922903 params : Union [SamplingParams , PoolingParams ],
923904 * ,
924905 lora_request : Optional [LoRARequest ] = None ,
925- trace_headers : Optional [Dict [str , str ]] = None ,
906+ trace_headers : Optional [Mapping [str , str ]] = None ,
926907 prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
927908 ) -> AsyncIterator [Union [RequestOutput , EmbeddingRequestOutput ]]:
928909 """Common logic to process requests with SamplingParams or
0 commit comments