Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions paddle/fluid/operators/activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,28 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
}
};

class LeakyReluDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->HasOutput("DX")) {
ctx->ShareDim("X", "DX");
ctx->ShareLoD("X", "DX");
}
if (ctx->HasOutput("DDOut")) {
ctx->ShareDim("X", "DDOut");
ctx->ShareLoD("X", "DDOut");
}
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "DDX");
}
};

//
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
Expand All @@ -643,6 +665,29 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
}
};

// leaky_relu Grad: dx=dy if y>=0 else alpha * dy
// leaky_relu GradGrad: ddy=ddx if y>=0 else alpha * ddx
class LeakyReluDoubleGradMaker
: public ::paddle::framework::SingleGradOpDescMaker {
public:
using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker;

protected:
std::unique_ptr<::paddle::framework::OpDesc> Apply() const override {
auto* op = new ::paddle::framework::OpDesc();
op->SetType("leaky_relu_grad_grad");
// input1: X
op->SetInput("X", Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DX", InputGrad("X"));
op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out")));
return std::unique_ptr<::paddle::framework::OpDesc>(op);
}
};

} // namespace operators
} // namespace paddle

Expand Down Expand Up @@ -699,3 +744,23 @@ REGISTER_OP_CPU_KERNEL(
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);

REGISTER_OPERATOR(
leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpDescMaker<ops::LeakyReluGradFunctor<float>::FwdDeps()>,
paddle::framework::SingleOpInplaceInToOut);
REGISTER_OPERATOR(leaky_relu_grad, ops::ActivationOpGrad,
paddle::framework::SingleOpInplaceInToOut,
ops::LeakyReluDoubleGradMaker);
REGISTER_OPERATOR(leaky_relu_grad_grad, ops::LeakyReluDoubleGrad);
REGISTER_ACTIVATION_CPU_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor,
LeakyReluGradFunctor);
REGISTER_OP_CPU_KERNEL(
leaky_relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::LeakyReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::LeakyReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<
plat::CPUDeviceContext, ops::LeakyReluGradGradFunctor<plat::float16>>);
12 changes: 12 additions & 0 deletions paddle/fluid/operators/activation_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ namespace plat = paddle::platform;

FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);

REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor,
LeakyReluGradFunctor);

REGISTER_OP_CUDA_KERNEL(
leaky_relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<
plat::CUDADeviceContext, ops::LeakyReluGradGradFunctor<plat::float16>>);

REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor);

REGISTER_OP_CUDA_KERNEL(
Expand Down
77 changes: 58 additions & 19 deletions paddle/fluid/operators/activation_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -1208,45 +1208,31 @@ inline void ExtractActivationDoubleGradTensor(
const framework::Tensor** Out, const framework::Tensor** ddX,
framework::Tensor** dX, framework::Tensor** dOut,
framework::Tensor** ddOut) {
auto out_var = ctx.InputVar("Out");
auto ddx_var = ctx.InputVar("DDX");
auto ddo_var = ctx.OutputVar("DDOut");
auto do_var = ctx.OutputVar("DOut");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("Out"));
PADDLE_ENFORCE(ddx_var != nullptr,
"Cannot get input Variable %s, variable name = %s", "DDX",
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("DDX"));
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*ddX = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*ddx_var);
if (ddo_var) {
*ddOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
ddo_var);
}
if (do_var) {
*dOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
do_var);
}
} else {
*Out = ctx.Input<framework::Tensor>("Out");
*ddX = ctx.Input<framework::Tensor>("DDX");
if (ddo_var) {
*ddOut = ctx.Output<framework::Tensor>("DDOut");
}
if (do_var) {
*dOut = ctx.Output<framework::Tensor>("DOut");
}
}
PADDLE_ENFORCE(*ddX != nullptr,
"Cannot get output tensor %s, variable name = %s", "DDX",
"Cannot get output tensor DDX, variable name = %s",
ctx.op().Output("DDX"));

if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) {
auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s",
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("X"));
auto dx_var = ctx.OutputVar("DX");
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
Expand All @@ -1262,9 +1248,33 @@ inline void ExtractActivationDoubleGradTensor(
}
}
} else {
VLOG(10) << " Inplace activation of Op : " << ctx.op().Type();
VLOG(10) << "Inplace activation of Op: " << ctx.op().Type();
*X = *ddX;
}
if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) {
auto out_var = ctx.InputVar("Out");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input tensor Out, variable name = %s",
ctx.op().Input("Out"));
auto dout_var = ctx.OutputVar("DOut");
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*Out =
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
if (dout_var) {
*dOut =
paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
dout_var);
}
} else {
*Out = ctx.Input<framework::Tensor>("Out");
if (dout_var) {
*dOut = ctx.Output<framework::Tensor>("DOut");
}
}
} else {
VLOG(10) << "Inplace activation of Op: " << ctx.op().Type();
*Out = *ddX;
}
}

template <typename DeviceContext, typename Functor>
Expand Down Expand Up @@ -1318,6 +1328,36 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};

template <typename T>
struct LeakyReluGradGradFunctor : public BaseActivationFunctor<T> {
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* Out, const framework::Tensor* ddX,
framework::Tensor* ddOut, framework::Tensor* dOut,
framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(detail::Ref(ddX));
auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(detail::Ref(ddOut));
ddout.device(*d) = ddx *
((x >= static_cast<T>(0)).template cast<T>().eval() +
static_cast<T>(alpha) *
(x < static_cast<T>(0)).template cast<T>().eval())
.template cast<T>();
}
if (dX) {
auto dx = framework::EigenVector<T>::Flatten(detail::Ref(dX));
dx.device(*d) = dx.constant(static_cast<T>(0));
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};

} // namespace operators
} // namespace paddle

Expand Down Expand Up @@ -1349,7 +1389,6 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELU, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \
Expand Down
26 changes: 26 additions & 0 deletions python/paddle/fluid/tests/unittests/test_nn_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,31 @@ def test_grad(self):
self.func(p)


class TestLeakyReluDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
# the shape of input variable shoule be clearly specified, not inlcude -1.
shape = [3, 7]
eps = 0.005
alpha = 0.2
dtype = np.float64

x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.leaky_relu(x, alpha=alpha)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
x_arr[np.abs(x_arr) < 0.005] = 0.02

gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)

def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)


if __name__ == "__main__":
unittest.main()