From 6de056afd150b41cb5d34c5dbe33283c11a87039 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 15:25:09 -0800 Subject: [PATCH 1/7] Fix several issues on benchmark outputs Signed-off-by: Huy Do --- .../scripts/run-performance-benchmarks.sh | 3 +++ .buildkite/nightly-benchmarks/tests/throughput-tests.json | 2 +- benchmarks/benchmark_latency.py | 5 ++--- benchmarks/benchmark_serving.py | 5 ++--- benchmarks/benchmark_throughput.py | 5 ++--- benchmarks/benchmark_utils.py | 6 ++++++ 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh index 9425cb07ec01..f3152d297707 100644 --- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh @@ -309,11 +309,14 @@ run_serving_tests() { new_test_name=$test_name"_qps_"$qps + # pass the tensor parallel size to the client so that it can be displayed + # on the benchmark dashboard client_command="python3 benchmark_serving.py \ --save-result \ --result-dir $RESULTS_FOLDER \ --result-filename ${new_test_name}.json \ --request-rate $qps \ + --tensor-parallel-size $tp \ $client_args" echo "Running test case $test_name with qps $qps" diff --git a/.buildkite/nightly-benchmarks/tests/throughput-tests.json b/.buildkite/nightly-benchmarks/tests/throughput-tests.json index 91ef6d16be63..9bc87cbcd2bc 100644 --- a/.buildkite/nightly-benchmarks/tests/throughput-tests.json +++ b/.buildkite/nightly-benchmarks/tests/throughput-tests.json @@ -32,4 +32,4 @@ "backend": "vllm" } } -] \ No newline at end of file +] diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index b041626550b5..e467b1cd131b 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -11,7 +11,7 @@ import numpy as np import torch -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from tqdm import tqdm from vllm import LLM, SamplingParams @@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, for k in ["avg_latency", "percentiles"]}) if pt_records: pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 9760737ccec3..098257496372 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -56,7 +56,7 @@ except ImportError: from argparse import ArgumentParser as FlexibleArgumentParser -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json MILLISECONDS_TO_SECONDS_CONVERSION = 1000 @@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, if pt_records: # Don't use json suffix here as we don't want CI to pick it up pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index f7d87f1b336f..4674516a7af4 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -11,7 +11,7 @@ import torch import uvloop -from benchmark_utils import convert_to_pytorch_benchmark_format +from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from PIL import Image from tqdm import tqdm from transformers import (AutoModelForCausalLM, AutoTokenizer, @@ -355,8 +355,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, if pt_records: # Don't use json suffix here as we don't want CI to pick it up pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json" - with open(pt_file, "w") as f: - json.dump(pt_records, f) + write_to_json(pt_file, pt_records) def main(args: argparse.Namespace): diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index 6f01cf20e17c..762c10b9f5c3 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 import argparse +import json import os from typing import Any, Dict, List @@ -37,3 +38,8 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace, records.append(record) return records + + +def write_to_json(filename: str, records: List) -> None: + with open(filename, "w") as f: + json.dump(records, f) From 0c5651b3580e8388ff8c188f4019cb487f5d1602 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 16:35:54 -0800 Subject: [PATCH 2/7] Add the option Signed-off-by: Huy Do --- benchmarks/benchmark_serving.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 098257496372..2515337ce603 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1325,5 +1325,12 @@ def main(args: argparse.Namespace): "launching the server. For each request, the " "script chooses a LoRA module at random.") + parser.add_argument( + "--tensor-parallel-size", + type=int, + default=0, + help= + "The tensor parallel used by the server to display on the dashboard") + args = parser.parse_args() main(args) From b846138e23f0582a042d84487e42e315c038c557 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 17:58:06 -0800 Subject: [PATCH 3/7] Handle inf value Signed-off-by: Huy Do --- benchmarks/benchmark_utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index 762c10b9f5c3..d09ea23f6a67 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -2,6 +2,7 @@ import argparse import json +import math import os from typing import Any, Dict, List @@ -40,6 +41,21 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace, return records +class InfEncoder(json.JSONEncoder): + + def clear_inf(self, o: Any): + if isinstance(o, dict): + return {k: self.clear_inf(v) for k, v in o.items()} + elif isinstance(o, list): + return [self.clear_inf(v) for v in o] + elif isinstance(o, float) and math.isinf(o): + return "inf" + return o + + def encode(self, o: Any, *args, **kwargs) -> Any: + return super().encode(self.clear_inf(o), *args, **kwargs) + + def write_to_json(filename: str, records: List) -> None: with open(filename, "w") as f: - json.dump(records, f) + json.dump(records, f, cls=InfEncoder) From cdeff0ebaeab5851ff3afb63fb9ede3eaf797814 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 18:11:36 -0800 Subject: [PATCH 4/7] json.dump use iterencode Signed-off-by: Huy Do --- benchmarks/benchmark_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index d09ea23f6a67..eaa35f234033 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -52,8 +52,8 @@ def clear_inf(self, o: Any): return "inf" return o - def encode(self, o: Any, *args, **kwargs) -> Any: - return super().encode(self.clear_inf(o), *args, **kwargs) + def iterencode(self, o: Any, *args, **kwargs) -> Any: + return super().iterencode(self.clear_inf(o), *args, **kwargs) def write_to_json(filename: str, records: List) -> None: From e404a4210909cf3bfb78f9e683050036fbf94b38 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 20 Feb 2025 18:22:36 -0800 Subject: [PATCH 5/7] Handle missing command file Signed-off-by: Huy Do --- .../convert-results-json-to-markdown.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py index e031686c7a29..1030ec24e8d7 100644 --- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py +++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py @@ -84,8 +84,13 @@ def results_to_json(latency, throughput, serving): # this result is generated via `benchmark_serving.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result @@ -99,8 +104,13 @@ def results_to_json(latency, throughput, serving): # this result is generated via `benchmark_latency.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result @@ -121,8 +131,13 @@ def results_to_json(latency, throughput, serving): # this result is generated via `benchmark_throughput.py` # attach the benchmarking command to raw_result - with open(test_file.with_suffix(".commands")) as f: - command = json.loads(f.read()) + try: + with open(test_file.with_suffix(".commands")) as f: + command = json.loads(f.read()) + except OSError as e: + print(e) + continue + raw_result.update(command) # update the test name of this result From d0bdd26aeb977497d2f2ac3ce9d8f0426aa52452 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sat, 22 Feb 2025 18:18:50 -0800 Subject: [PATCH 6/7] Use --metadata Signed-off-by: Huy Do --- .../scripts/run-performance-benchmarks.sh | 2 +- benchmarks/benchmark_serving.py | 7 ------- benchmarks/benchmark_utils.py | 6 ++++++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh index f3152d297707..a3555f72a666 100644 --- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh @@ -316,7 +316,7 @@ run_serving_tests() { --result-dir $RESULTS_FOLDER \ --result-filename ${new_test_name}.json \ --request-rate $qps \ - --tensor-parallel-size $tp \ + --metadata "tensor_parallel_size=$tp" \ $client_args" echo "Running test case $test_name with qps $qps" diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 2515337ce603..098257496372 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1325,12 +1325,5 @@ def main(args: argparse.Namespace): "launching the server. For each request, the " "script chooses a LoRA module at random.") - parser.add_argument( - "--tensor-parallel-size", - type=int, - default=0, - help= - "The tensor parallel used by the server to display on the dashboard") - args = parser.parse_args() main(args) diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index eaa35f234033..156817865907 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -36,6 +36,12 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace, "extra_info": extra_info, }, } + + tp = record["benchmark"]["extra_info"]["args"].get("tensor_parallel_size", 0) + # Save tensor_parallel_size parameter if it's part of the metadata + if not tp and "tensor_parallel_size" in extra_info: + record["benchmark"]["extra_info"]["args"]["tensor_parallel_size"] = tp + records.append(record) return records From d5e27f138f23cc5bb7e956ef095d9e4b55e288c7 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sat, 22 Feb 2025 18:51:49 -0800 Subject: [PATCH 7/7] Fix lint Signed-off-by: Huy Do --- benchmarks/benchmark_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py index 156817865907..ac0688ca013f 100644 --- a/benchmarks/benchmark_utils.py +++ b/benchmarks/benchmark_utils.py @@ -37,10 +37,12 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace, }, } - tp = record["benchmark"]["extra_info"]["args"].get("tensor_parallel_size", 0) + tp = record["benchmark"]["extra_info"]["args"].get( + "tensor_parallel_size") # Save tensor_parallel_size parameter if it's part of the metadata if not tp and "tensor_parallel_size" in extra_info: - record["benchmark"]["extra_info"]["args"]["tensor_parallel_size"] = tp + record["benchmark"]["extra_info"]["args"][ + "tensor_parallel_size"] = extra_info["tensor_parallel_size"] records.append(record)