@@ -116,21 +116,19 @@ def log(self, stats: Stats) -> None:
116116 self ._vllm_stat_logger .log (stats )
117117
118118 # Then log TGIS specific ones
119- self .tgi_queue_size .set (stats .num_waiting + stats .num_swapped )
120- self .tgi_batch_current_size .set (stats .num_running )
121-
122- for ttft in stats .time_to_first_tokens :
123- self .tgi_batch_inference_duration .labels ({
124- "method" : "prefill"
125- }).observe (ttft )
126- for tpot in stats .time_per_output_tokens :
127- self .tgi_batch_inference_duration .labels ({
128- "method" : "next_token"
129- }).observe (tpot )
130-
131- # These metrics depend on open PR: https://github.com/vllm-project/vllm/pull/2764
132- if hasattr (stats , "num_prompt_tokens_lst" ):
133- for input_len in stats .num_prompt_tokens_lst :
134- self .tgi_request_input_length .observe (input_len )
135- for output_len in stats .num_generation_tokens_lst :
136- self .tgi_request_generated_tokens .observe (output_len )
119+ self .tgi_queue_size .set (stats .num_waiting_sys + stats .num_swapped_sys )
120+ self .tgi_batch_current_size .set (stats .num_running_sys )
121+
122+ for ttft in stats .time_to_first_tokens_iter :
123+ self .tgi_batch_inference_duration .labels (
124+ {"method" : "prefill" }
125+ ).observe (ttft )
126+ for tpot in stats .time_per_output_tokens_iter :
127+ self .tgi_batch_inference_duration .labels (
128+ {"method" : "next_token" }
129+ ).observe (tpot )
130+
131+ for input_len in stats .num_prompt_tokens_requests :
132+ self .tgi_request_input_length .observe (input_len )
133+ for output_len in stats .num_generation_tokens_requests :
134+ self .tgi_request_generated_tokens .observe (output_len )
0 commit comments