[ROCm] Disable Cudnn fusions (#358)

draganmladjenovic · web-flow · commit edab8b2b0b24 · 2025-09-19T11:42:31.000+01:00
diff --git a/xla/service/gpu/amdgpu_compiler.cc b/xla/service/gpu/amdgpu_compiler.cc
@@ -122,8 +122,9 @@ absl::Status AMDGPUCompiler::OptimizeHloConvolutionCanonicalization(
       stream_executor::RocmSolverContext::Create);
   pipeline.AddPass<ConvRewriter>(gpu_version);
   pipeline.AddPass<ConvPaddingLegalization>();
-  auto rcc = std::get<se::RocmComputeCapability>(gpu_version);
-  pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
+  //TODO(rocm): Until #12613 is fixed.
+  // auto rcc = std::get<se::RocmComputeCapability>(gpu_version);
+  // pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
 
   // The conv padding/vectorization passes which we need to get rid of.  They
   // also leave behind unnecessary tuple/get-tuple-element pairs that
diff --git a/xla/service/gpu/transforms/BUILD b/xla/service/gpu/transforms/BUILD
@@ -1009,6 +1009,9 @@ cc_library(
 xla_test(
     name = "cudnn_fused_conv_rewriter_test",
     srcs = ["cudnn_fused_conv_rewriter_test.cc"],
+    tags = [
+        "cuda-only", # TODO(rocm): Until #12613 is fixed.
+    ],
     backend_tags = {
         "gpu_a100": [
             "noasan",
@@ -1017,8 +1020,7 @@ xla_test(
     },
     backends = [
         "gpu_a100",
-        "gpu_amd_any",
-    ] + if_oss(["gpu_any"]),
+    ],
     shard_count = 10,
     deps = [
         ":conv_rewriter",