fix test_memcpy_op

ronny1996 · ronny1996 · commit 54403a03094b · 2021-04-29T05:57:38.000Z
diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt
@@ -32,8 +32,10 @@ if (WITH_GPU OR WITH_ROCM)
         file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_batch_norm_act);\n")
     endif()
     # conv_fusion_op needs cudnn 7 above
-    op_library(conv_fusion_op)
-    file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
+    if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
+        op_library(conv_fusion_op)
+        file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
+    endif()
     # fusion_transpose_flatten_concat_op
     # HIP not support cudnnTransformTensor
     if(NOT WITH_ROCM)
diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu
@@ -208,7 +208,6 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
               kNUM_CUDNN_FWD_ALGS, &find_count, &find_result,
               cudnn_workspace_ptr, workspace_size, false));
     };
-    // if (!exhaustive_search && !deterministic) {
     workspace_handle.RunFuncSync(cudnn_find_func, workspace_size);
     algo = find_result.fwd_algo;
     VLOG(3) << "cuDNN forward algo " << algo;
@@ -244,15 +243,16 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_CUDA_SUCCESS(
         platform::dynload::cudnnSetConvolutionGroupCount(cudnn_conv_desc,
                                                          groups));
-    // Now only support NCHW
-    std::vector<int> bias_dim = {
-        1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
+
     cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
         layout, framework::vectorize<int>(transformed_input.dims()));
     cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
         layout, framework::vectorize<int>(transformed_output.dims()));
     cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
         layout, framework::vectorize<int>(filter->dims()));
+    // Now only support NCHW
+    std::vector<int> bias_dim = {
+        1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
     cudnnTensorDescriptor_t cudnn_bias_desc =
         bias_desc.descriptor<T>(layout, bias_dim);
     cudnnActivationDescriptor_t cudnn_act_desc =
@@ -430,6 +430,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
   }
 };
 #endif
+
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc
@@ -139,7 +139,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
                                ops::MemcpyKernel, plat::float16,
                                ops::MemcpyKernel);
 
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
 REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
                                 ops::MemcpyKernel, int, ops::MemcpyKernel,
                                 int64_t, ops::MemcpyKernel, bool,