Skip to content

Commit c17610e

Browse files
authored
[Bugfix] Only use triton_kernels for MXFP4 on SM90 and SM100 (#29339)
Signed-off-by: mgoin <[email protected]>
1 parent 71df2a5 commit c17610e

File tree

1 file changed

+9
-6
lines changed
  • vllm/model_executor/layers/quantization

1 file changed

+9
-6
lines changed

vllm/model_executor/layers/quantization/mxfp4.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,15 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
132132
)
133133

134134
# If FlashInfer is not available, try either Marlin or Triton
135-
if (
136-
envs.VLLM_MXFP4_USE_MARLIN
137-
or current_platform.get_device_capability()[0] < 9
138-
or not has_triton_kernels()
139-
or not is_torch_equal_or_newer("2.8.0")
140-
):
135+
triton_kernels_supported = (
136+
has_triton_kernels()
137+
and is_torch_equal_or_newer("2.8.0")
138+
# NOTE: triton_kernels are only confirmed to work on SM90 and SM100
139+
# SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
140+
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
141+
and (9, 0) <= current_platform.get_device_capability() < (11, 0)
142+
)
143+
if envs.VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported:
141144
logger.info_once("Using Marlin backend")
142145
return Mxfp4Backend.MARLIN
143146
else:

0 commit comments

Comments
 (0)