File tree Expand file tree Collapse file tree 1 file changed +6
-4
lines changed
Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -383,11 +383,13 @@ def compile_or_warm_up_model(self) -> None:
383383 f"for non-torch memory, and { GiB (cuda_graph_memory_bytes )} "
384384 f"GiB for CUDAGraph memory. Replace gpu_memory_utilization "
385385 f"config with `--kv-cache-memory="
386- f"{ kv_cache_memory_bytes_to_requested_limit } ` to fit into "
387- f"requested memory, or `--kv-cache-memory="
388- f"{ kv_cache_memory_bytes_to_gpu_limit } ` to fully "
386+ f"{ kv_cache_memory_bytes_to_requested_limit } ` "
387+ f"({ GiB (kv_cache_memory_bytes_to_requested_limit )} GiB) to fit "
388+ f"into requested memory, or `--kv-cache-memory="
389+ f"{ kv_cache_memory_bytes_to_gpu_limit } ` "
390+ f"({ GiB (kv_cache_memory_bytes_to_gpu_limit )} GiB) to fully "
389391 f"utilize gpu memory. Current kv cache memory in use is "
390- f"{ int (self .available_kv_cache_memory_bytes )} bytes ." )
392+ f"{ GiB (self .available_kv_cache_memory_bytes )} GiB ." )
391393
392394 logger .debug (msg )
393395
You can’t perform that action at this time.
0 commit comments