ROCm · alekstheod · Sep 17, 2025 · Sep 18, 2025 · Sep 18, 2025 · Copilot
diff --git a/build_tools/rocm/tsan_ignore_list.txt b/build_tools/rocm/tsan_ignore_list.txt
@@ -15,17 +15,13 @@ race:xla::gpu::AllocateDestinationBuffer
 race:xla::LocalDeviceState::ThenRelease
 
 # To be fixed
-race:xla::LiteralBase::Piece::DeallocateBuffers
 race:xla::PjRtStreamExecutorLoadedExecutable::ExecuteHelper
 race:xla::PjRtStreamExecutorClient::BufferFromHostBufferInternal
 race:xla::HloRunnerPjRt::TransferLiteralsFromDevice
 race:xla::MutableLiteralBase::~MutableLiteralBase
 race:xla::MutableLiteralBase::PopulateR1<int>
-race:xla::xla::gpu::GpuCompiler::CompileSingleModule
 race:xla::LiteralBase::Piece::Storage::Storage
 race:xla::LocalClient::TransferFromOutfeedLocal
-race:llvm::cl::opt_storage<bool, false, false>::setValue<int>
-race:xla::gpu::(anonymous namespace)::RecoverExp2Pattern::initStaticsIfNeeded<std::tuple<mlir::Float32Type, mlir::FloatTF32Type, mlir::BFloat16Type>>
 race:lld::lldMain
 race:llvm::*
 race:xla::gpu::GpuExecutable::ExecuteAsyncOnStream
diff --git a/xla/service/gpu/amdgpu_compiler.cc b/xla/service/gpu/amdgpu_compiler.cc
@@ -122,8 +122,8 @@ absl::Status AMDGPUCompiler::OptimizeHloConvolutionCanonicalization(
       stream_executor::RocmSolverContext::Create);
   pipeline.AddPass<ConvRewriter>(gpu_version);
   pipeline.AddPass<ConvPaddingLegalization>();
-  auto rcc = std::get<se::RocmComputeCapability>(gpu_version);
-  pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
+  // auto rcc = std::get<se::RocmComputeCapability>(gpu_version);
+  // pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
-  // pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
-  // pipeline.AddPass<CudnnFusedConvRewriter>(rcc, dnn_version, toolkit_version);
 
   // The conv padding/vectorization passes which we need to get rid of.  They
   // also leave behind unnecessary tuple/get-tuple-element pairs that

diff --git a/xla/service/gpu/transforms/BUILD b/xla/service/gpu/transforms/BUILD
@@ -1009,6 +1009,9 @@ cc_library(
 xla_test(
     name = "cudnn_fused_conv_rewriter_test",
     srcs = ["cudnn_fused_conv_rewriter_test.cc"],
+    tags = [
+        "cuda-only",
+    ],
     backend_tags = {
         "gpu_a100": [
             "noasan",
@@ -1017,8 +1020,7 @@ xla_test(
     },
     backends = [
         "gpu_a100",
-        "gpu_amd_any",
-    ] + if_oss(["gpu_any"]),
+    ],
     shard_count = 10,
     deps = [
         ":conv_rewriter",