Skip to content

Commit 45dc43f

Browse files
author
Tom HENEAULT
committed
fix(mig): fallback gpu_memory_total value
1 parent 16267d4 commit 45dc43f

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

python/sglang/srt/utils.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,15 +1259,25 @@ def get_hpu_memory_capacity():
12591259

12601260

12611261
def get_device_memory_capacity(device: str = None):
1262-
if is_cuda():
1263-
gpu_mem = get_nvgpu_memory_capacity()
1264-
elif is_hip():
1265-
gpu_mem = get_amdgpu_memory_capacity()
1266-
elif device == "hpu":
1267-
gpu_mem = get_hpu_memory_capacity()
1268-
else:
1269-
# GPU memory is not known yet or no GPU is available.
1270-
gpu_mem = None
1262+
try:
1263+
if is_hip():
1264+
gpu_mem = get_amdgpu_memory_capacity()
1265+
elif torch.cuda.is_available():
1266+
gpu_mem = get_nvgpu_memory_capacity()
1267+
elif device == "hpu":
1268+
gpu_mem = get_hpu_memory_capacity()
1269+
else:
1270+
# GPU memory is not known yet or no GPU is available.
1271+
gpu_mem = None
1272+
except ValueError as e:
1273+
fallback_value = os.environ.get("SGLANG_GPU_MEMORY_TOTAL_FALLBACK", None)
1274+
if fallback_value:
1275+
gpu_mem = float(fallback_value)
1276+
else:
1277+
logger.info(
1278+
"Impossible to get the memory capacity, you might set it manually via SGLANG_GPU_MEMORY_TOTAL_FALLBACK environment"
1279+
)
1280+
raise e
12711281

12721282
return gpu_mem
12731283

0 commit comments

Comments
 (0)