Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,18 @@ void Conv3dImplicitGemmGPUKernel(const GPUContext& dev_ctx,
phi::funcs::TransposeGPUKernelDriver<T>(
dev_ctx, kernel, perm, &kernel_transpose);

#ifdef PADDLE_WITH_CUDA
conv_forward_implicit_gemm_cuda(dev_ctx,
x.values(),
kernel_transpose,
*(out_kmap_cache_ptr->out_in_map),
out->nnz(),
out_channels,
*(out->mutable_values()));
#else
PADDLE_THROW(phi::errors::Unimplemented(
"conv_forward_implicit_gemm_cuda is only supported on CUDA."));
#endif
}

/**
Expand All @@ -179,6 +184,7 @@ void Conv3dImplicitGemmKernel(const Context& dev_ctx,
const bool subm,
const std::string& key,
SparseCooTensor* out) {
#ifdef PADDLE_WITH_CUDA
PD_VISIT_BASE_INTEGRAL_TYPES(
x.indices().dtype(), "Conv3dImplicitGemmGPUKernel", ([&] {
// Conv3dImplicitGemmGPUKernel<T, data_t>(dev_ctx,
Expand All @@ -193,6 +199,10 @@ void Conv3dImplicitGemmKernel(const Context& dev_ctx,
key,
out);
}));
#else
PADDLE_THROW(phi::errors::Unimplemented(
"Conv3dImplicitGemmKernel is only supported on CUDA."));
#endif
}
} // namespace sparse
} // namespace phi
Expand Down
6 changes: 6 additions & 0 deletions paddle/phi/kernels/sparse/gpu/conv_kernel_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#pragma once

#ifdef PADDLE_WITH_CUDA

#include <cuda_fp16.h>
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/sparse/gpu/conv_memory_utils.cuh"
Expand Down Expand Up @@ -1271,3 +1275,5 @@ void conv_forward_implicit_gemm_cuda(
}
}
}

#endif //PADDLE_WITH_CUDA
4 changes: 4 additions & 0 deletions paddle/phi/kernels/sparse/gpu/conv_memory_utils.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#ifdef PADDLE_WITH_CUDA

template <int bytes>
struct global_load;

Expand Down Expand Up @@ -93,3 +95,5 @@ struct global_load<2>
}
}
};

#endif // PADDLE_WITH_CUDA