From 6de056afd150b41cb5d34c5dbe33283c11a87039 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 20 Feb 2025 15:25:09 -0800
Subject: [PATCH 1/7] Fix several issues on benchmark outputs

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .../scripts/run-performance-benchmarks.sh                   | 3 +++
 .buildkite/nightly-benchmarks/tests/throughput-tests.json   | 2 +-
 benchmarks/benchmark_latency.py                             | 5 ++---
 benchmarks/benchmark_serving.py                             | 5 ++---
 benchmarks/benchmark_throughput.py                          | 5 ++---
 benchmarks/benchmark_utils.py                               | 6 ++++++
 6 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
index 9425cb07ec01..f3152d297707 100644
--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -309,11 +309,14 @@ run_serving_tests() {
 
       new_test_name=$test_name"_qps_"$qps
 
+      # pass the tensor parallel size to the client so that it can be displayed
+      # on the benchmark dashboard
       client_command="python3 benchmark_serving.py \
         --save-result \
         --result-dir $RESULTS_FOLDER \
         --result-filename ${new_test_name}.json \
         --request-rate $qps \
+        --tensor-parallel-size $tp \
         $client_args"
 
       echo "Running test case $test_name with qps $qps"
diff --git a/.buildkite/nightly-benchmarks/tests/throughput-tests.json b/.buildkite/nightly-benchmarks/tests/throughput-tests.json
index 91ef6d16be63..9bc87cbcd2bc 100644
--- a/.buildkite/nightly-benchmarks/tests/throughput-tests.json
+++ b/.buildkite/nightly-benchmarks/tests/throughput-tests.json
@@ -32,4 +32,4 @@
             "backend": "vllm"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py
index b041626550b5..e467b1cd131b 100644
--- a/benchmarks/benchmark_latency.py
+++ b/benchmarks/benchmark_latency.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 import torch
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from tqdm import tqdm
 
 from vllm import LLM, SamplingParams
@@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
                     for k in ["avg_latency", "percentiles"]})
     if pt_records:
         pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
-            json.dump(pt_records, f)
+        write_to_json(pt_file, pt_records)
 
 
 def main(args: argparse.Namespace):
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 9760737ccec3..098257496372 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -56,7 +56,7 @@
 except ImportError:
     from argparse import ArgumentParser as FlexibleArgumentParser
 
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 
 MILLISECONDS_TO_SECONDS_CONVERSION = 1000
 
@@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
     if pt_records:
         # Don't use json suffix here as we don't want CI to pick it up
         pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
-            json.dump(pt_records, f)
+        write_to_json(pt_file, pt_records)
 
 
 def main(args: argparse.Namespace):
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index f7d87f1b336f..4674516a7af4 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -11,7 +11,7 @@
 
 import torch
 import uvloop
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from PIL import Image
 from tqdm import tqdm
 from transformers import (AutoModelForCausalLM, AutoTokenizer,
@@ -355,8 +355,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
     if pt_records:
         # Don't use json suffix here as we don't want CI to pick it up
         pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
-            json.dump(pt_records, f)
+        write_to_json(pt_file, pt_records)
 
 
 def main(args: argparse.Namespace):
diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index 6f01cf20e17c..762c10b9f5c3 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
+import json
 import os
 from typing import Any, Dict, List
 
@@ -37,3 +38,8 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
         records.append(record)
 
     return records
+
+
+def write_to_json(filename: str, records: List) -> None:
+    with open(filename, "w") as f:
+        json.dump(records, f)

From 0c5651b3580e8388ff8c188f4019cb487f5d1602 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 20 Feb 2025 16:35:54 -0800
Subject: [PATCH 2/7] Add the option

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 benchmarks/benchmark_serving.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 098257496372..2515337ce603 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -1325,5 +1325,12 @@ def main(args: argparse.Namespace):
                         "launching the server. For each request, the "
                         "script chooses a LoRA module at random.")
 
+    parser.add_argument(
+        "--tensor-parallel-size",
+        type=int,
+        default=0,
+        help=
+        "The tensor parallel used by the server to display on the dashboard")
+
     args = parser.parse_args()
     main(args)

From b846138e23f0582a042d84487e42e315c038c557 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 20 Feb 2025 17:58:06 -0800
Subject: [PATCH 3/7] Handle inf value

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 benchmarks/benchmark_utils.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index 762c10b9f5c3..d09ea23f6a67 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -2,6 +2,7 @@
 
 import argparse
 import json
+import math
 import os
 from typing import Any, Dict, List
 
@@ -40,6 +41,21 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
     return records
 
 
+class InfEncoder(json.JSONEncoder):
+
+    def clear_inf(self, o: Any):
+        if isinstance(o, dict):
+            return {k: self.clear_inf(v) for k, v in o.items()}
+        elif isinstance(o, list):
+            return [self.clear_inf(v) for v in o]
+        elif isinstance(o, float) and math.isinf(o):
+            return "inf"
+        return o
+
+    def encode(self, o: Any, *args, **kwargs) -> Any:
+        return super().encode(self.clear_inf(o), *args, **kwargs)
+
+
 def write_to_json(filename: str, records: List) -> None:
     with open(filename, "w") as f:
-        json.dump(records, f)
+        json.dump(records, f, cls=InfEncoder)

From cdeff0ebaeab5851ff3afb63fb9ede3eaf797814 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 20 Feb 2025 18:11:36 -0800
Subject: [PATCH 4/7] json.dump use iterencode

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 benchmarks/benchmark_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index d09ea23f6a67..eaa35f234033 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -52,8 +52,8 @@ def clear_inf(self, o: Any):
             return "inf"
         return o
 
-    def encode(self, o: Any, *args, **kwargs) -> Any:
-        return super().encode(self.clear_inf(o), *args, **kwargs)
+    def iterencode(self, o: Any, *args, **kwargs) -> Any:
+        return super().iterencode(self.clear_inf(o), *args, **kwargs)
 
 
 def write_to_json(filename: str, records: List) -> None:

From e404a4210909cf3bfb78f9e683050036fbf94b38 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 20 Feb 2025 18:22:36 -0800
Subject: [PATCH 5/7] Handle missing command file

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .../convert-results-json-to-markdown.py       | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
index e031686c7a29..1030ec24e8d7 100644
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -84,8 +84,13 @@ def results_to_json(latency, throughput, serving):
             # this result is generated via `benchmark_serving.py`
 
             # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
-                command = json.loads(f.read())
+            try:
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
+
             raw_result.update(command)
 
             # update the test name of this result
@@ -99,8 +104,13 @@ def results_to_json(latency, throughput, serving):
             # this result is generated via `benchmark_latency.py`
 
             # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
-                command = json.loads(f.read())
+            try:
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
+
             raw_result.update(command)
 
             # update the test name of this result
@@ -121,8 +131,13 @@ def results_to_json(latency, throughput, serving):
             # this result is generated via `benchmark_throughput.py`
 
             # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
-                command = json.loads(f.read())
+            try:
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
+
             raw_result.update(command)
 
             # update the test name of this result

From d0bdd26aeb977497d2f2ac3ce9d8f0426aa52452 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 22 Feb 2025 18:18:50 -0800
Subject: [PATCH 6/7] Use --metadata

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .../scripts/run-performance-benchmarks.sh                  | 2 +-
 benchmarks/benchmark_serving.py                            | 7 -------
 benchmarks/benchmark_utils.py                              | 6 ++++++
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
index f3152d297707..a3555f72a666 100644
--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -316,7 +316,7 @@ run_serving_tests() {
         --result-dir $RESULTS_FOLDER \
         --result-filename ${new_test_name}.json \
         --request-rate $qps \
-        --tensor-parallel-size $tp \
+        --metadata "tensor_parallel_size=$tp" \
         $client_args"
 
       echo "Running test case $test_name with qps $qps"
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 2515337ce603..098257496372 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -1325,12 +1325,5 @@ def main(args: argparse.Namespace):
                         "launching the server. For each request, the "
                         "script chooses a LoRA module at random.")
 
-    parser.add_argument(
-        "--tensor-parallel-size",
-        type=int,
-        default=0,
-        help=
-        "The tensor parallel used by the server to display on the dashboard")
-
     args = parser.parse_args()
     main(args)
diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index eaa35f234033..156817865907 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -36,6 +36,12 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
                 "extra_info": extra_info,
             },
         }
+
+        tp = record["benchmark"]["extra_info"]["args"].get("tensor_parallel_size", 0)
+        # Save tensor_parallel_size parameter if it's part of the metadata
+        if not tp and "tensor_parallel_size" in extra_info:
+            record["benchmark"]["extra_info"]["args"]["tensor_parallel_size"] = tp
+
         records.append(record)
 
     return records

From d5e27f138f23cc5bb7e956ef095d9e4b55e288c7 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 22 Feb 2025 18:51:49 -0800
Subject: [PATCH 7/7] Fix lint

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 benchmarks/benchmark_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_utils.py b/benchmarks/benchmark_utils.py
index 156817865907..ac0688ca013f 100644
--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
@@ -37,10 +37,12 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
             },
         }
 
-        tp = record["benchmark"]["extra_info"]["args"].get("tensor_parallel_size", 0)
+        tp = record["benchmark"]["extra_info"]["args"].get(
+            "tensor_parallel_size")
         # Save tensor_parallel_size parameter if it's part of the metadata
         if not tp and "tensor_parallel_size" in extra_info:
-            record["benchmark"]["extra_info"]["args"]["tensor_parallel_size"] = tp
+            record["benchmark"]["extra_info"]["args"][
+                "tensor_parallel_size"] = extra_info["tensor_parallel_size"]
 
         records.append(record)