Skip to content

Commit 61ec0b9

Browse files
authored
Refine device context (#6433)
There are mainly following fixes: - take `DeviceContext` as the template parameter of math functors and OpKernel instead of `Place` - remove `eigen_device` interface in base class `DeviceContext` - remove `GetEigenDevice` interface in `ExecutionContext` and base class `DeviceContext` - remove unused `platform::EigenDeviceConverter` - rename `REGISTER_OP_GPU_KERNEL` to `REGISTER_OP_CUDA_KERNEL` - rename `USE_GPU_ONLY_OP` to `USE_CUDA_ONLY_OP`
1 parent 7902ad6 commit 61ec0b9

319 files changed

Lines changed: 2624 additions & 2546 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

paddle/framework/op_registry.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ class OpKernelRegistrar : public Registrar {
181181
return 0; \
182182
}
183183

184-
#define REGISTER_OP_GPU_KERNEL(op_type, ...) \
185-
REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
184+
#define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
185+
REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::GPUPlace, __VA_ARGS__)
186186

187187
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
188188
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
@@ -217,7 +217,7 @@ class OpKernelRegistrar : public Registrar {
217217
#else
218218
#define USE_OP_KERNEL(op_type) \
219219
USE_OP_DEVICE_KERNEL(op_type, CPU); \
220-
USE_OP_DEVICE_KERNEL(op_type, GPU)
220+
USE_OP_DEVICE_KERNEL(op_type, CUDA)
221221
#endif
222222

223223
#define USE_NO_KERNEL_OP(op_type) USE_OP_ITSELF(op_type);
@@ -226,9 +226,9 @@ class OpKernelRegistrar : public Registrar {
226226
USE_OP_ITSELF(op_type); \
227227
USE_OP_DEVICE_KERNEL(op_type, CPU);
228228

229-
#define USE_GPU_ONLY_OP(op_type) \
230-
USE_OP_ITSELF(op_type); \
231-
USE_OP_DEVICE_KERNEL(op_type, GPU)
229+
#define USE_CUDA_ONLY_OP(op_type) \
230+
USE_OP_ITSELF(op_type); \
231+
USE_OP_DEVICE_KERNEL(op_type, CUDA)
232232

233233
#define USE_OP(op_type) \
234234
USE_OP_ITSELF(op_type); \

paddle/framework/operator.cc

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,6 @@ limitations under the License. */
2222
namespace paddle {
2323
namespace framework {
2424

25-
template <>
26-
Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
27-
platform::CPUPlace, Eigen::DefaultDevice>() const {
28-
return *device_context_.GetEigenDevice<platform::CPUPlace>();
29-
}
30-
31-
#ifdef PADDLE_WITH_CUDA
32-
template <>
33-
Eigen::GpuDevice&
34-
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
35-
return *device_context_.GetEigenDevice<platform::GPUPlace>();
36-
}
37-
#endif
38-
3925
std::string OperatorBase::Input(const std::string& name) const {
4026
auto& ins = Inputs(name);
4127
PADDLE_ENFORCE_LE(ins.size(), 1UL,
@@ -429,7 +415,7 @@ void OperatorWithKernel::Run(const Scope& scope,
429415
}
430416
OpKernelType OperatorWithKernel::GetKernelType(
431417
const ExecutionContext& ctx) const {
432-
return OpKernelType(IndicateDataType(ctx), ctx.device_context());
418+
return OpKernelType(IndicateDataType(ctx), ctx.GetPlace());
433419
}
434420
DataType OperatorWithKernel::IndicateDataType(
435421
const ExecutionContext& ctx) const {

paddle/framework/operator.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -276,17 +276,25 @@ class ExecutionContext {
276276
out_tensor->set_lod(in_tensor.lod());
277277
}
278278

279-
template <typename PlaceType,
280-
typename DeviceType = typename platform::EigenDeviceConverter<
281-
PlaceType>::EigenDeviceType>
282-
DeviceType& GetEigenDevice() const;
283-
284279
platform::Place GetPlace() const { return device_context_.GetPlace(); }
285280

281+
template <typename DeviceContextType>
282+
const DeviceContextType& device_context() const {
283+
return *reinterpret_cast<const DeviceContextType*>(&device_context_);
284+
}
285+
286286
const platform::DeviceContext& device_context() const {
287287
return device_context_;
288288
}
289289

290+
#ifdef PADDLE_WITH_CUDA
291+
const inline platform::CUDADeviceContext& cuda_device_context() const {
292+
PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
293+
return *reinterpret_cast<const platform::CUDADeviceContext*>(
294+
&device_context_);
295+
}
296+
#endif
297+
290298
//! Get actual name vector for this input.
291299
const std::vector<std::string>& Inputs(const std::string& name) const {
292300
return op_.Inputs(name);
@@ -297,14 +305,6 @@ class ExecutionContext {
297305
return op_.Outputs(name);
298306
}
299307

300-
#ifdef PADDLE_WITH_CUDA
301-
const inline platform::CUDADeviceContext& cuda_device_context() const {
302-
PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
303-
return *reinterpret_cast<const platform::CUDADeviceContext*>(
304-
&device_context_);
305-
}
306-
#endif
307-
308308
private:
309309
const OperatorBase& op_;
310310
const Scope& scope_;

paddle/framework/operator_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class OpWithKernelTest : public OperatorWithKernel {
115115
protected:
116116
void InferShape(framework::InferShapeContext* ctx) const override {}
117117
OpKernelType GetKernelType(const ExecutionContext& ctx) const override {
118-
return OpKernelType(DataType::FP32, ctx.device_context());
118+
return OpKernelType(DataType::FP32, ctx.GetPlace());
119119
}
120120
};
121121

paddle/operators/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ function(op_library TARGET)
138138
if ("${TARGET}" STREQUAL "nccl_op")
139139
set(pybind_flag 1)
140140
# It's enough to just adding one operator to pybind
141-
file(APPEND ${pybind_file} "USE_GPU_ONLY_OP(ncclAllReduce);\n")
141+
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(ncclAllReduce);\n")
142142
endif()
143143

144144
# reduce_op contains several operators

paddle/operators/accuracy_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
5757
const framework::ExecutionContext &ctx) const override {
5858
return framework::OpKernelType(
5959
framework::ToDataType(ctx.Input<Tensor>("Out")->type()),
60-
ctx.device_context());
60+
ctx.GetPlace());
6161
}
6262
};
6363

paddle/operators/accuracy_op.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,5 +104,6 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
104104

105105
// FIXME(typhoonzero): types of T is for inference data.
106106
// label data is always int64
107-
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>,
108-
paddle::operators::AccuracyOpCUDAKernel<double>);
107+
REGISTER_OP_CUDA_KERNEL(accuracy,
108+
paddle::operators::AccuracyOpCUDAKernel<float>,
109+
paddle::operators::AccuracyOpCUDAKernel<double>);

paddle/operators/accuracy_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace operators {
2121

2222
using Tensor = framework::Tensor;
2323

24-
template <typename Place, typename T>
24+
template <typename DeviceContext, typename T>
2525
class AccuracyKernel : public framework::OpKernel<T> {
2626
public:
2727
void Compute(const framework::ExecutionContext& ctx) const override {

paddle/operators/activation_op.cc

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -611,16 +611,17 @@ REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker,
611611
REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad,
612612
ops::ActivationOpGrad);
613613

614-
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
615-
REGISTER_OP_CPU_KERNEL( \
616-
act_type, \
617-
ops::ActivationKernel<paddle::platform::CPUPlace, ops::functor<float>>, \
618-
ops::ActivationKernel<paddle::platform::CPUPlace, \
619-
ops::functor<double>>); \
620-
REGISTER_OP_CPU_KERNEL( \
621-
act_type##_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, \
622-
ops::grad_functor<float>>, \
623-
ops::ActivationGradKernel<paddle::platform::CPUPlace, \
614+
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
615+
REGISTER_OP_CPU_KERNEL( \
616+
act_type, ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
617+
ops::functor<float>>, \
618+
ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
619+
ops::functor<double>>); \
620+
REGISTER_OP_CPU_KERNEL( \
621+
act_type##_grad, \
622+
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
623+
ops::grad_functor<float>>, \
624+
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
624625
ops::grad_functor<double>>);
625626

626627
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);

paddle/operators/activation_op.cu

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717

1818
namespace ops = paddle::operators;
1919

20-
#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, functor, grad_functor) \
21-
REGISTER_OP_GPU_KERNEL( \
22-
act_type, \
23-
ops::ActivationKernel<paddle::platform::GPUPlace, ops::functor<float>>, \
24-
ops::ActivationKernel<paddle::platform::GPUPlace, \
25-
ops::functor<double>>); \
26-
REGISTER_OP_GPU_KERNEL( \
27-
act_type##_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, \
28-
ops::grad_functor<float>>, \
29-
ops::ActivationGradKernel<paddle::platform::GPUPlace, \
20+
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, functor, grad_functor) \
21+
REGISTER_OP_CUDA_KERNEL( \
22+
act_type, ops::ActivationKernel<paddle::platform::CUDADeviceContext, \
23+
ops::functor<float>>, \
24+
ops::ActivationKernel<paddle::platform::CUDADeviceContext, \
25+
ops::functor<double>>); \
26+
REGISTER_OP_CUDA_KERNEL( \
27+
act_type##_grad, \
28+
ops::ActivationGradKernel<paddle::platform::CUDADeviceContext, \
29+
ops::grad_functor<float>>, \
30+
ops::ActivationGradKernel<paddle::platform::CUDADeviceContext, \
3031
ops::grad_functor<double>>);
3132

32-
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_GPU_KERNEL);
33+
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CUDA_KERNEL);

0 commit comments

Comments
 (0)