Skip to content

Commit 54403a0

Browse files
committed
fix test_memcpy_op
1 parent 2d222f4 commit 54403a0

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

paddle/fluid/operators/fused/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ if (WITH_GPU OR WITH_ROCM)
3232
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_batch_norm_act);\n")
3333
endif()
3434
# conv_fusion_op needs cudnn 7 above
35-
op_library(conv_fusion_op)
36-
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
35+
if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
36+
op_library(conv_fusion_op)
37+
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
38+
endif()
3739
# fusion_transpose_flatten_concat_op
3840
# HIP not support cudnnTransformTensor
3941
if(NOT WITH_ROCM)

paddle/fluid/operators/fused/conv_fusion_op.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
208208
kNUM_CUDNN_FWD_ALGS, &find_count, &find_result,
209209
cudnn_workspace_ptr, workspace_size, false));
210210
};
211-
// if (!exhaustive_search && !deterministic) {
212211
workspace_handle.RunFuncSync(cudnn_find_func, workspace_size);
213212
algo = find_result.fwd_algo;
214213
VLOG(3) << "cuDNN forward algo " << algo;
@@ -244,15 +243,16 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
244243
PADDLE_ENFORCE_CUDA_SUCCESS(
245244
platform::dynload::cudnnSetConvolutionGroupCount(cudnn_conv_desc,
246245
groups));
247-
// Now only support NCHW
248-
std::vector<int> bias_dim = {
249-
1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
246+
250247
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
251248
layout, framework::vectorize<int>(transformed_input.dims()));
252249
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
253250
layout, framework::vectorize<int>(transformed_output.dims()));
254251
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
255252
layout, framework::vectorize<int>(filter->dims()));
253+
// Now only support NCHW
254+
std::vector<int> bias_dim = {
255+
1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
256256
cudnnTensorDescriptor_t cudnn_bias_desc =
257257
bias_desc.descriptor<T>(layout, bias_dim);
258258
cudnnActivationDescriptor_t cudnn_act_desc =
@@ -430,6 +430,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
430430
}
431431
};
432432
#endif
433+
433434
} // namespace operators
434435
} // namespace paddle
435436

paddle/fluid/operators/memcpy_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
139139
ops::MemcpyKernel, plat::float16,
140140
ops::MemcpyKernel);
141141

142-
#ifdef PADDLE_WITH_CUDA
142+
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
143143
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
144144
ops::MemcpyKernel, int, ops::MemcpyKernel,
145145
int64_t, ops::MemcpyKernel, bool,

0 commit comments

Comments
 (0)