Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions test/srt/configs/llama_405b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
tasks:
- name: sglang-8192-1024-concurrency1
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency2
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency4
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency8
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency16
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency24
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl

- name: sglang-8192-1024-concurrency32
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl
8 changes: 7 additions & 1 deletion test/srt/experiment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ def format_results(results: List[TaskResult]) -> str:
return "\n".join(output)


def get_bool_env_var(name: str, default: str = "false") -> bool:
value = os.getenv(name, default)
return value.lower() in ("true", "1")


def write_in_github_step_summary(results: List[TaskResult]):
"""Write formatted results to GitHub step summary."""
if not os.environ.get("GITHUB_STEP_SUMMARY"):
Expand Down Expand Up @@ -349,7 +354,8 @@ def main():
result = runner.run_task(config)
results.append(result)

write_in_github_step_summary(results)
if get_bool_env_var("SGLANG_IS_IN_CI"):
write_in_github_step_summary(results)
except Exception as e:
logger.error(f"Error: {e}")
raise
Expand Down
46 changes: 46 additions & 0 deletions test/srt/parse_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import json
import pandas as pd
import argparse
import os
from tabulate import tabulate

# Parse command-line arguments
parser = argparse.ArgumentParser(description="Parse JSONL benchmark and summarize.")
parser.add_argument("input_file", type=str, help="Path to input JSONL file")
args = parser.parse_args()

input_file = args.input_file
base_name = os.path.splitext(os.path.basename(input_file))[0]
output_file = f"{base_name}_summary.csv"

fields = [
"max_concurrency",
"output_throughput",
"mean_ttft_ms",
"median_ttft_ms",
"p99_ttft_ms",
"mean_tpot_ms",
"median_tpot_ms",
"p99_tpot_ms",
]

# Read JSONL and parse
results = []
with open(input_file, "r") as f:
for line in f:
data = json.loads(line)
row = {field: data.get(field, None) for field in fields}
max_conc = data.get("max_concurrency")
out_tp = data.get("output_throughput")
row["per_user_throughput"] = out_tp / max_conc if max_conc else None
results.append(row)

# Convert to DataFrame
df = pd.DataFrame(results)

# Save to CSV
df.to_csv(output_file, index=False)
print(f"\nSaved summary to: {output_file}\n")

# Print ASCII table
print(tabulate(df, headers="keys", tablefmt="grid", floatfmt=".3f"))
Loading