flashinfer-ai · jimmyzho · Feb 12, 2026 · Feb 11, 2026 · gemini-code-assist · Feb 11, 2026
@@ -3394,7 +3394,7 @@ def mm_fp4(
     return out
 
 
-@supported_compute_capability([89, 90, 100, 103, 120, 121])
+@supported_compute_capability([89, 90, 100, 103, 110, 120, 121])
 def _cudnn_bmm_fp8_requirement(
     A: torch.Tensor,
     B: torch.Tensor,
@@ -3408,7 +3408,7 @@ def _cudnn_bmm_fp8_requirement(
     return True
 
 
-@supported_compute_capability([89, 90, 100, 103, 120, 121])
+@supported_compute_capability([89, 90, 100, 103, 110, 120, 121])
 def _cublas_bmm_fp8_requirement(
     A: torch.Tensor,
     B: torch.Tensor,

@@ -29,7 +29,12 @@ def test_bmm_fp8(b, m, n, k, input_dtype, mat2_dtype, res_dtype, backend, auto_t
             pytest.skip("Invalid combination: cutlass does not support e5m2")
     if auto_tuning and backend != "cutlass":
         pytest.skip("Invalid combination: auto_tuning only supported for cutlass")
-
+    if compute_capability[0] == 11 and (
+        input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2
+    ):
+        pytest.skip(
+            "Invalid combination: only cutlass supports SM110 which does not support e5m2"
+        )
-    if compute_capability[0] == 11 and (
-        input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2
-    ):
-        pytest.skip(
-            "Invalid combination: only cutlass supports SM110 which does not support e5m2"
-        )
+    if compute_capability[0] == 11 and (
+        input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2
+    ):
+        pytest.skip(
+            "e5m2 is not supported on SM110 for bmm_fp8 by any of the available backends."
+        )
-    if compute_capability[0] == 11 and (
-        input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2
-    ):
-        pytest.skip(
-            "Invalid combination: only cutlass supports SM110 which does not support e5m2"
-        )
+    if compute_capability[0] == 11 and (
+        input_dtype == torch.float8_e5m2 or mat2_dtype == torch.float8_e5m2
+    ):
+        pytest.skip(
+            "e5m2 is not supported on SM110 for bmm_fp8 by any of the available backends."
+        )
     input = torch.randn([b, m, k], device="cuda", dtype=torch.bfloat16)
     input_fp8, input_inv_s = to_float8(input, dtype=input_dtype)