File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change 3535from vllm .test_utils import MODEL_WEIGHTS_S3_BUCKET , MODELS_ON_S3
3636from vllm .transformers_utils .utils import check_gguf_file
3737from vllm .usage .usage_lib import UsageContext
38- from vllm .utils import FlexibleArgumentParser , is_in_ray_actor
38+ from vllm .utils import FlexibleArgumentParser , GiB_bytes , is_in_ray_actor
3939
4040# yapf: enable
4141
@@ -1618,13 +1618,13 @@ def _set_default_args_v1(self, usage_context: UsageContext) -> None:
16181618 # values for non-H100/H200 GPUs.
16191619 try :
16201620 from vllm .platforms import current_platform
1621- device_name = current_platform .get_device_name (). lower ()
1621+ device_memory = current_platform .get_device_total_memory ()
16221622 except Exception :
16231623 # This is only used to set default_max_num_batched_tokens
1624- device_name = "no-device"
1624+ device_memory = 0
16251625
1626- if "h100" in device_name or "h200" in device_name :
1627- # For H100 and H200, we use larger default values.
1626+ if device_memory >= 70 * GiB_bytes :
1627+ # For GPUs like H100 and MI300x, use larger default values.
16281628 default_max_num_batched_tokens = {
16291629 UsageContext .LLM_CLASS : 16384 ,
16301630 UsageContext .OPENAI_API_SERVER : 8192 ,
You can’t perform that action at this time.
0 commit comments