Skip to content

Commit eac489f

Browse files
FrankD412NVShreyas
authored andcommitted
[fix] Fixes KV Cache overrides in trtllm-bench (NVIDIA#6103)
Signed-off-by: Frank Di Natale <[email protected]> Signed-off-by: Shreyas Misra <[email protected]>
1 parent 271d4b0 commit eac489f

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

tensorrt_llm/bench/dataclasses/configuration.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ def get_llm_args(self) -> Dict:
5858
self.world_config.cluster_size,
5959
"trust_remote_code":
6060
True,
61-
"kv_cache_config":
62-
self.settings_config.get_kvcache_config(),
6361
"enable_chunked_prefill":
6462
self.settings_config.chunking,
6563
"extended_runtime_perf_knob_config":
@@ -82,6 +80,10 @@ def get_llm_args(self) -> Dict:
8280
if self.backend in backend_config_map:
8381
llm_args.update(backend_config_map[self.backend]())
8482

83+
kv_cache_config = self.settings_config.get_kvcache_config().__dict__
84+
backend_cache_config = llm_args.pop("kv_cache_config", {})
85+
llm_args["kv_cache_config"] = backend_cache_config | kv_cache_config
86+
8587
return update_llm_args_with_extra_options(llm_args,
8688
self.extra_llm_api_options)
8789

0 commit comments

Comments
 (0)