From d5972fb6495659e9295f463f30cdc04adb3bb852 Mon Sep 17 00:00:00 2001 From: junq <22017000+QiJune@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:21:36 +0800 Subject: [PATCH] cherry pick 7725 Signed-off-by: junq <22017000+QiJune@users.noreply.github.com> --- .../bench/dataclasses/configuration.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorrt_llm/bench/dataclasses/configuration.py b/tensorrt_llm/bench/dataclasses/configuration.py index 6d8e703ee49..3b32626003e 100755 --- a/tensorrt_llm/bench/dataclasses/configuration.py +++ b/tensorrt_llm/bench/dataclasses/configuration.py @@ -90,15 +90,16 @@ def get_llm_args(self) -> Dict: if self.backend == "pytorch": cuda_graph_config = updated_llm_args.pop( "cuda_graph_config", llm_args["cuda_graph_config"]) - # Use runtime max_batch_size as cuda_graph_config.max_batch_size - # if both max_batch_size and batch_sizes are not set. - batch_sizes_set = cuda_graph_config.get("batch_sizes", - None) is not None - max_batch_size_set = cuda_graph_config.get("max_batch_size", - None) is not None - if not batch_sizes_set and not max_batch_size_set: - cuda_graph_config[ - "max_batch_size"] = self.settings_config.max_batch_size + if cuda_graph_config: + # Use runtime max_batch_size as cuda_graph_config.max_batch_size + # if both max_batch_size and batch_sizes are not set. + batch_sizes_set = cuda_graph_config.get("batch_sizes", + None) is not None + max_batch_size_set = cuda_graph_config.get( + "max_batch_size", None) is not None + if not batch_sizes_set and not max_batch_size_set: + cuda_graph_config[ + "max_batch_size"] = self.settings_config.max_batch_size updated_llm_args["cuda_graph_config"] = cuda_graph_config return updated_llm_args