Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm_omni/benchmarks/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def calculate_metrics(
total_input += input_requests[i].prompt_len
tpot = 0
if output_len > 1:
latency_minus_ttft = outputs[i].latency - outputs[i].ttft
latency_minus_ttft = outputs[i].text_latency - outputs[i].ttft
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Contributor Author

@kechengliu97 kechengliu97 Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This attribute is pre-defined in the struct, having a default value as 0

tpot = latency_minus_ttft / (output_len - 1)
tpots.append(tpot)
# Note: if output_len <= 1, we regard tpot as 0 for goodput
Expand Down
6 changes: 4 additions & 2 deletions vllm_omni/benchmarks/patch/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class MixRequestFuncOutput(RequestFuncOutput):
audio_duration: float = 0.0
audio_frames: int = 0
audio_rtf: float = 0.0
text_latency: float = 0.0


async def async_request_openai_chat_omni_completions(
Expand Down Expand Up @@ -148,6 +149,7 @@ async def async_request_openai_chat_omni_completions(
output.itl.append(timestamp - most_recent_timestamp)
generated_text += content or ""
most_recent_timestamp = timestamp
output.text_latency = timestamp - st
elif modality == "audio":
if output.audio_ttfp == 0.0:
output.audio_ttfp = timestamp - st
Expand All @@ -161,8 +163,8 @@ async def async_request_openai_chat_omni_completions(
else:
generated_audio = generated_audio + seg

elif usage := data.get("usage"):
output.output_tokens = usage.get("completion_tokens")
if metrics := data.get("metrics"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set default values to avoid possible error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This selection is legal cause := firstly get the value from the formula and then transfer to the metrics param. If no attribution is found, it returns None, which makes this judgement execute no more.

output.output_tokens = metrics.get("num_tokens_out")

output.latency = timestamp - st
output.generated_text = generated_text
Expand Down