File tree Expand file tree Collapse file tree 1 file changed +9
-6
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +9
-6
lines changed Original file line number Diff line number Diff line change @@ -132,12 +132,15 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
132132 )
133133
134134 # If FlashInfer is not available, try either Marlin or Triton
135- if (
136- envs .VLLM_MXFP4_USE_MARLIN
137- or current_platform .get_device_capability ()[0 ] < 9
138- or not has_triton_kernels ()
139- or not is_torch_equal_or_newer ("2.8.0" )
140- ):
135+ triton_kernels_supported = (
136+ has_triton_kernels ()
137+ and is_torch_equal_or_newer ("2.8.0" )
138+ # NOTE: triton_kernels are only confirmed to work on SM90 and SM100
139+ # SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
140+ # SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
141+ and (9 , 0 ) <= current_platform .get_device_capability () < (11 , 0 )
142+ )
143+ if envs .VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported :
141144 logger .info_once ("Using Marlin backend" )
142145 return Mxfp4Backend .MARLIN
143146 else :
You can’t perform that action at this time.
0 commit comments