PaddlePaddle · luotao1 · Jul 19, 2019 · Jul 15, 2019 · luotao1 · Jul 18, 2019
diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
@@ -363,6 +363,13 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X", "Input of LeakyRelu operator");
     AddOutput("Out", "Output of LeakyRelu operator");
     AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
+    AddAttr<bool>("use_mkldnn",
+                  "(bool, default false) Only used in mkldnn kernel")
+        .SetDefault(false);
+    AddAttr<bool>("is_test",
+                  "(bool, default false) Set to true for inference only, false "
+                  "for training. Some layers may run faster when this is true.")
+        .SetDefault(false);
     AddComment(R"DOC(
 LeakyRelu Activation Operator.
 
@@ -695,6 +702,8 @@ class LeakyReluDoubleGradMaker
     op->SetType("leaky_relu_grad_grad");
     // input1: X
     op->SetInput("X", Input("X"));
+    // input2: Out
+    op->SetInput("Out", Input("Out"));
     // X@GRAD@GRAD: ddx
     op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
     op->SetAttrMap(Attrs());

diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h
@@ -1001,7 +1001,7 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor<T> {
     dx.device(d) = dout * (temp1 + temp2).template cast<T>();
   }
 
-  static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
+  static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepXOut; }
 };
 
 template <typename T>

diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -77,8 +77,7 @@ class MKLDNNActivationGradKernel
 
 template <typename T>
 void eltwise_forward(const framework::ExecutionContext &ctx,
-                     mkldnn::algorithm algorithm, const T alpha = 0,
-                     const T beta = 0) {
+                     mkldnn::algorithm algorithm) {
   PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
                  "It must use CPUPlace.");
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
@@ -90,6 +89,9 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
   const T *x_data = x->data<T>();
   T *y_data = y->mutable_data<T>(ctx.GetPlace());
 
+  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
+  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;
+
   PADDLE_ENFORCE(
       x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4,
       "Input dim must be with 2, 3 or 4");
@@ -101,10 +103,9 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
   bool is_test = ctx.Attr<bool>("is_test");
 
-  // TODO(jczaja): When adding leaky-relu , swish , elu make sure to extend key
-  // with alpha, beta
   std::string key = platform::MKLDNNHandler::GetHash(
-      src_tz, std::to_string(algorithm) + ctx.op().Output("Out"));
+      src_tz, std::to_string(algorithm) + std::to_string(alpha) +
+                  std::to_string(beta) + ctx.op().Output("Out"));
 
   // TODO(jczaja): Make it Thread safe
   // save input data and layout to be referred in backward path
@@ -153,8 +154,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
 template <typename T>
 void eltwise_grad(const framework::ExecutionContext &ctx,
-                  mkldnn::algorithm algorithm, const T alpha = 0,
-                  const T beta = 0) {
+                  mkldnn::algorithm algorithm) {
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
   const auto &mkldnn_engine = dev_ctx.GetEngine();
 
@@ -164,6 +164,9 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
   const T *diff_y_data = diff_y->data<T>();
   T *diff_x_data = diff_x->mutable_data<T>(ctx.GetPlace());
 
+  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
+  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;
+
   std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
 
   auto diff_y_format =
@@ -173,7 +176,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
       diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
 
   std::string key = platform::MKLDNNHandler::GetHash(
-      diff_dst_tz, std::to_string(algorithm) + ctx.op().Input("Out"));
+      diff_dst_tz, std::to_string(algorithm) + std::to_string(alpha) +
+                       std::to_string(beta) + ctx.op().Input("Out"));
 
   const std::string key_src_data = key + "@eltwise_fwd_src_data";
   const std::string key_src_layout = key + "@eltwise_fwd_src_layout";
@@ -273,10 +277,11 @@ namespace ops = paddle::operators;
       act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace,               \
       ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);
 
-#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)            \
-  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
-  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \
-  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \
+#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                  \
+  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);       \
+  __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
+  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor);       \
+  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor);       \
   __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);
 
 FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
@@ -18,7 +18,7 @@
 import numpy as np
 import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.op_test import OpTest
-from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs
+from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu
 from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
 
 
@@ -29,6 +29,13 @@ def setUp(self):
         self.attrs = {"use_mkldnn": True}
 
 
+class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
+    def setUp(self):
+        super(TestMKLDNNLeakyReluDim2, self).setUp()
+
+        self.attrs = {"use_mkldnn": True}
+
+
 class TestMKLDNNTanhDim2(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim2, self).setUp()
@@ -63,6 +70,20 @@ def setUp(self):
         self.attrs = {"use_mkldnn": True}
 
 
+class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
+    def setUp(self):
+        super(TestMKLDNNLeakyReluDim4, self).setUp()
+
+        x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32")
+        # The same reason with TestAbs
+        x[np.abs(x) < 0.005] = 0.02
+        out = np.maximum(x, 0.02 * x)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {"use_mkldnn": True}
+
+
 class TestMKLDNNTanhDim4(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim4, self).setUp()

diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -367,6 +367,25 @@ def test_check_grad(self):
         self.check_grad(['X'], 'Out', max_relative_error=0.007)
 
 
+class TestLeakyRelu(TestActivation):
+    def setUp(self):
+        self.op_type = "leaky_relu"
+        self.init_dtype()
+
+        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        # The same reason with TestAbs
+        x[np.abs(x) < 0.005] = 0.02
+        out = np.maximum(x, 0.02 * x)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        self.check_grad(['X'], 'Out', max_relative_error=0.007)
+
+
 class TestGelu(TestActivation):
     def setUp(self):
         self.op_type = "gelu"