-
-
Notifications
You must be signed in to change notification settings - Fork 11.7k
Run v1 benchmark and integrate with PyTorch OSS benchmark database #13068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
ce79bc5
d84671f
27caeb3
4ae88ea
c04e53f
10971bd
93c3b85
7cddf74
a14865c
19f436d
6874d65
826887c
42288cb
61ca7c2
0798703
a4e24e4
fd8fc67
85910f5
e05d7dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,4 +29,4 @@ | |
| "num-iters": 15 | ||
| } | ||
| } | ||
| ] | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,6 +56,8 @@ | |
| except ImportError: | ||
| from argparse import ArgumentParser as FlexibleArgumentParser | ||
|
|
||
| from benchmark_utils import convert_to_pytorch_benchmark_format | ||
|
|
||
| MILLISECONDS_TO_SECONDS_CONVERSION = 1000 | ||
|
|
||
|
|
||
|
|
@@ -402,21 +404,21 @@ async def get_request( | |
| burstiness: float = 1.0, | ||
| ) -> AsyncGenerator[Tuple[str, int, int], None]: | ||
| """ | ||
| Asynchronously generates requests at a specified rate | ||
| Asynchronously generates requests at a specified rate | ||
| with OPTIONAL burstiness. | ||
|
|
||
| Args: | ||
| input_requests: | ||
| input_requests: | ||
| A list of input requests, each represented as a tuple. | ||
| request_rate: | ||
| request_rate: | ||
| The rate at which requests are generated (requests/s). | ||
| burstiness (optional): | ||
| The burstiness factor of the request generation. | ||
| burstiness (optional): | ||
| The burstiness factor of the request generation. | ||
| Only takes effect when request_rate is not inf. | ||
| Default value is 1, which follows a Poisson process. | ||
| Otherwise, the request intervals follow a gamma distribution. | ||
| A lower burstiness value (0 < burstiness < 1) results | ||
| in more bursty requests, while a higher burstiness value | ||
| A lower burstiness value (0 < burstiness < 1) results | ||
| in more bursty requests, while a higher burstiness value | ||
| (burstiness > 1) results in a more uniform arrival of requests. | ||
| """ | ||
| input_requests = iter(input_requests) | ||
|
|
@@ -817,6 +819,32 @@ def parse_goodput(slo_pairs): | |
| return goodput_config_dict | ||
|
|
||
|
|
||
| def save_to_pytorch_benchmark_format(args: argparse.Namespace, | ||
| results: Dict[str, Any], | ||
| file_name: str) -> None: | ||
| metrics = [ | ||
| "median_ttft_ms", "mean_ttft_ms", "std_ttft_ms", "p99_ttft_ms", | ||
| "mean_tpot_ms", "median_tpot_ms", "std_tpot_ms", "p99_tpot_ms", | ||
| "median_itl_ms", "mean_itl_ms", "std_itl_ms", "p99_itl_ms" | ||
| ] | ||
| # These raw data might be useful, but they are rather big. They can be added | ||
| # later if needed | ||
| ignored_metrics = ["ttfts", "itls", "generated_texts", "errors"] | ||
| pt_records = convert_to_pytorch_benchmark_format( | ||
| args=args, | ||
| metrics={k: [results[k]] | ||
| for k in metrics}, | ||
| extra_info={ | ||
| k: results[k] | ||
| for k in results if k not in metrics and k not in ignored_metrics | ||
| }) | ||
| if pt_records: | ||
| # Don't use json suffix here as we don't want CI to pick it up | ||
| pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json" | ||
| with open(pt_file, "w") as f: | ||
| json.dump(pt_records, f) | ||
|
|
||
|
|
||
| def main(args: argparse.Namespace): | ||
| print(args) | ||
| random.seed(args.seed) | ||
|
|
@@ -997,6 +1025,7 @@ def main(args: argparse.Namespace): | |
| file_name = os.path.join(args.result_dir, file_name) | ||
| with open(file_name, "w", encoding='utf-8') as outfile: | ||
| json.dump(result_json, outfile) | ||
| save_to_pytorch_benchmark_format(args, result_json, file_name) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
|
|
@@ -1014,7 +1043,7 @@ def main(args: argparse.Namespace): | |
| default=None, | ||
| help="Server or API base url if not using http host and port.", | ||
| ) | ||
| parser.add_argument("--host", type=str, default="localhost") | ||
| parser.add_argument("--host", type=str, default="127.0.0.1") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: This change is needed to force the benchmark script to use ipv4. There is nothing wrong with resolving
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine by me, but can you add a comment here regarding this? |
||
| parser.add_argument("--port", type=int, default=8000) | ||
| parser.add_argument( | ||
| "--endpoint", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| import argparse | ||
| import os | ||
| from typing import Any, Dict, List | ||
|
|
||
|
|
||
| def convert_to_pytorch_benchmark_format(args: argparse.Namespace, | ||
| metrics: Dict[str, List], | ||
| extra_info: Dict[str, Any]) -> List: | ||
| """ | ||
| Save the benchmark results in the format used by PyTorch OSS benchmark with | ||
| on metric per record | ||
| https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database | ||
| """ | ||
| records = [] | ||
| if not os.environ.get("SAVE_TO_PYTORCH_BENCHMARK_FORMAT", False): | ||
| return records | ||
|
|
||
| for name, benchmark_values in metrics.items(): | ||
| record = { | ||
| "benchmark": { | ||
| "name": "vLLM benchmark", | ||
| "extra_info": { | ||
| "args": vars(args), | ||
| }, | ||
| }, | ||
| "model": { | ||
| "name": args.model, | ||
| }, | ||
| "metric": { | ||
| "name": name, | ||
| "benchmark_values": benchmark_values, | ||
| "extra_info": extra_info, | ||
| }, | ||
| } | ||
| records.append(record) | ||
|
|
||
| return records |
Uh oh!
There was an error while loading. Please reload this page.