From d7407c90aa3ee847fda052fdca9f10b788249875 Mon Sep 17 00:00:00 2001
From: sneaxiy <sneaxiy@126.com>
Date: Fri, 8 Mar 2019 13:01:30 +0000
Subject: [PATCH 1/5] refine cross_entropy mem test=develop

---
 paddle/fluid/operators/cross_entropy2_op.cc | 218 ++++++++++++++++++++
 paddle/fluid/operators/cross_entropy2_op.cu |  29 +++
 paddle/fluid/operators/cross_entropy2_op.h  | 188 +++++++++++++++++
 python/paddle/fluid/layers/nn.py            |  16 ++
 4 files changed, 451 insertions(+)
 create mode 100644 paddle/fluid/operators/cross_entropy2_op.cc
 create mode 100644 paddle/fluid/operators/cross_entropy2_op.cu
 create mode 100644 paddle/fluid/operators/cross_entropy2_op.h
diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc
new file mode 100644
index 00000000000000..03b217a974c7bb
--- /dev/null
+++ b/paddle/fluid/operators/cross_entropy2_op.cc
@@ -0,0 +1,218 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/cross_entropy2_op.h"
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace paddle {
+namespace operators {
+
+class CrossEntropyOp2 : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput("XShape"),
+                   "Output(XShape) should be not null.");
+
+    auto x_dims = ctx->GetInputDim("X");
+    auto label_dims = ctx->GetInputDim("Label");
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
+                      "Input(X) and Input(Label) shall have the same rank.");
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "Input(X) and Input(Label) shall have the same shape "
+                        "except the last dimension.");
+    }
+
+    PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
+                      "Last dimension of Input(Label) should be 1.");
+    auto y_dims = x_dims;
+    y_dims[rank - 1] = 1;
+    ctx->SetOutputDim("Y", y_dims);
+    ctx->ShareLoD("X", /*->*/ "Y");
+
+    auto x_dims_vec = framework::vectorize(x_dims);
+    x_dims_vec.push_back(0);
+    ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec));
+    ctx->ShareLoD("X", /*->*/ "XShape");
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
+                                   ctx.device_context());
+  }
+};
+
+class CrossEntropyGradientOp2 : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("XShape"),
+                   "Input(XShape) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null.");
+
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
+                   "Input(Y@GRAD) shoudl be not null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
+                   "Output(X@GRAD) should be not null.");
+
+    auto x_shapes = ctx->GetInputDim("XShape");
+    framework::DDim x_dims(x_shapes.Get(), x_shapes.size() - 1);
+    auto label_dims = ctx->GetInputDim("Label");
+    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
+                      "Input(Y@Grad) and Input(X) should have the same rank.");
+    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
+                      "Input(Label) and Input(X) should have the same rank.");
+
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "The Input(X) and Input(Label) should have the same "
+                        "shape except the last dimension.");
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(dy_dims, 0, rank - 1),
+                        "The Input(X) and Input(Y@Grad) should have the same "
+                        "shape except the last dimension.");
+    }
+    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
+                      "The last dimension of Input(Y@Grad) should be 1.");
+    PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
+                      "Last dimension of Input(Label) should be 1.");
+    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
+    ctx->ShareLoD("XShape", framework::GradVarName("X"));
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
+        ctx.device_context());
+  }
+};
+
+class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X",
+             "(Tensor, default Tensor<float>), a tensor whose last dimension "
+             "size is equal to the number of classes. This input is a "
+             "probability computed by the previous operator, which is almost "
+             "always the result of a softmax operator.");
+    AddInput(
+        "Label",
+        "(Tensor), the tensor which represents the ground truth. It has the "
+        "same shape with 'X' except the last dimension. One hot Tensor.");
+    AddOutput("Y",
+              "(Tensor, default Tensor<float>), a tensor whose shape is same "
+              "with 'X' except that the last dimension size is 1. It "
+              "represents the cross entropy loss.");
+    AddOutput("XShape", "Temporaily variable to save shape and LoD of X.");
+    AddAttr<int>("ignore_index",
+                 "(int, default -100), Specifies a target value that is"
+                 "ignored and does not contribute to the input gradient."
+                 "Only valid if soft_label is set to False")
+        .SetDefault(-100);
+    AddComment(R"DOC(
+CrossEntropy Operator.
+
+The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. 
+The matrix's second dimension(row length) is as same as the original last 
+dimension, and the first dimension(column length) is the product of all other 
+original dimensions. Then the softmax computation will take palce on each raw 
+of flattened matrixs.
+
+Only support hard label.
+
+Both the input X and Label can carry the LoD (Level of Details) information,
+or not. But the output only shares the LoD information with input X.
+
+)DOC");
+  }
+};
+
+class CrossEntropyOpInferVarType2
+    : public framework::PassInDtypeAndVarTypeToOutput {
+ protected:
+  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
+      const override {
+    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
+  }
+};
+
+class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
+    op->SetType("cross_entropy_grad2");
+    op->SetInput("Label", Input("Label"));
+    op->SetInput("Y", Output("Y"));
+    op->SetInput("XShape", Output("XShape"));
+    op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
+    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op->SetAttrMap(Attrs());
+    return op;
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+using CPUCtx = paddle::platform::CPUDeviceContext;
+
+REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2,
+                  ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType2,
+                  ops::CrossEntropyGradOpMaker2);
+REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2);
+REGISTER_OP_CPU_KERNEL(cross_entropy2,
+                       ops::CrossEntropyOpKernel2<CPUCtx, float>,
+                       ops::CrossEntropyOpKernel2<CPUCtx, double>);
+REGISTER_OP_CPU_KERNEL(cross_entropy_grad2,
+                       ops::CrossEntropyGradientOpKernel2<CPUCtx, float>,
+                       ops::CrossEntropyGradientOpKernel2<CPUCtx, double>);
diff --git a/paddle/fluid/operators/cross_entropy2_op.cu b/paddle/fluid/operators/cross_entropy2_op.cu
new file mode 100644
index 00000000000000..1868c1b866016d
--- /dev/null
+++ b/paddle/fluid/operators/cross_entropy2_op.cu
@@ -0,0 +1,29 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/cross_entropy2_op.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace plat = paddle::platform;
+namespace ops = paddle::operators;
+using CUDACtx = paddle::platform::CUDADeviceContext;
+REGISTER_OP_CUDA_KERNEL(cross_entropy2,
+                        ops::CrossEntropyOpKernel2<CUDACtx, float>,
+                        ops::CrossEntropyOpKernel2<CUDACtx, double>,
+                        ops::CrossEntropyOpKernel2<CUDACtx, plat::float16>);
+
+REGISTER_OP_CUDA_KERNEL(
+    cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2<CUDACtx, float>,
+    ops::CrossEntropyGradientOpKernel2<CUDACtx, double>,
+    ops::CrossEntropyGradientOpKernel2<CUDACtx, plat::float16>);
diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h
new file mode 100644
index 00000000000000..3d209f7c5c95c2
--- /dev/null
+++ b/paddle/fluid/operators/cross_entropy2_op.h
@@ -0,0 +1,188 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <cmath>
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/cross_entropy.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/platform/for_range.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+HOSTDEVICE inline platform::float16 RealLog(platform::float16 x) {
+#ifdef __NVCC__
+  return static_cast<platform::float16>(logf(static_cast<float>(x)));
+#else
+  return static_cast<platform::float16>(std::log(static_cast<float>(x)));
+#endif
+}
+
+HOSTDEVICE inline float RealLog(float x) {
+#ifdef __NVCC__
+  return logf(x);
+#else
+  return std::log(x);
+#endif
+}
+
+HOSTDEVICE inline double RealLog(double x) {
+#ifdef __NVCC__
+  return log(x);
+#else
+  return std::log(x);
+#endif
+}
+
+HOSTDEVICE inline platform::float16 RealExp(platform::float16 x) {
+#ifdef __NVCC__
+  return static_cast<platform::float16>(expf(static_cast<float>(x)));
+#else
+  return static_cast<platform::float16>(std::exp(static_cast<float>(x)));
+#endif
+}
+
+HOSTDEVICE inline float RealExp(float x) {
+#ifdef __NVCC__
+  return expf(x);
+#else
+  return std::exp(x);
+#endif
+}
+
+HOSTDEVICE inline double RealExp(double x) {
+#ifdef __NVCC__
+  return exp(x);
+#else
+  return std::exp(x);
+#endif
+}
+
+template <typename T>
+struct CrossEntropyForwardFunctor {
+  CrossEntropyForwardFunctor(const T *x, T *y, const int64_t *label,
+                             int64_t ignore_index, int64_t feature_size)
+      : x_(x),
+        y_(y),
+        label_(label),
+        ignore_index_(ignore_index),
+        feature_size_(feature_size) {}
+
+  HOSTDEVICE void operator()(int64_t row_idx) const {
+    auto col_idx = label_[row_idx];
+    if (col_idx != ignore_index_) {
+      y_[row_idx] = -math::TolerableValue<T>()(
+          RealLog(x_[row_idx * feature_size_ + col_idx]));
+    } else {
+      y_[row_idx] = 0;
+    }
+  }
+
+  const T *x_;
+  T *y_;
+  const int64_t *label_;
+  int64_t ignore_index_;
+  int64_t feature_size_;
+};
+
+template <typename T>
+struct CrossEntropyBackwardFunctor {
+  CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy,
+                              const int64_t *label, int64_t ignore_index,
+                              int64_t feature_size)
+      : dx_(dx),
+        y_(y),
+        dy_(dy),
+        label_(label),
+        ignore_index_(ignore_index),
+        feature_size_(feature_size) {}
+
+  HOSTDEVICE void operator()(int64_t idx) const {
+    auto row_idx = idx / feature_size_;
+    auto col_idx = idx % feature_size_;
+    auto label = label_[row_idx];
+    if (label == col_idx && label != ignore_index_) {
+      dx_[idx] = -dy_[row_idx] * RealExp(y_[row_idx]);
+    } else {
+      dx_[idx] = 0;
+    }
+  }
+
+  T *dx_;
+  const T *y_;
+  const T *dy_;
+  const int64_t *label_;
+  int64_t ignore_index_;
+  int64_t feature_size_;
+};
+
+template <typename DeviceContext, typename T>
+class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *x = ctx.Input<Tensor>("X");
+    auto *label = ctx.Input<Tensor>("Label");
+    auto *y = ctx.Output<Tensor>("Y");
+
+    auto *p_y = y->mutable_data<T>(ctx.GetPlace());
+    auto *p_x = x->data<T>();
+    auto *p_label = label->data<int64_t>();
+
+    int rank = x->dims().size();
+    int64_t feature_size = x->dims()[rank - 1];
+    int64_t batch_size = framework::product(x->dims()) / feature_size;
+
+    int64_t ignore_index = ctx.Attr<int>("ignore_index");
+
+    platform::ForRange<DeviceContext> for_range(
+        ctx.template device_context<DeviceContext>(), batch_size);
+    for_range(CrossEntropyForwardFunctor<T>(p_x, p_y, p_label, ignore_index,
+                                            feature_size));
+  }
+};
+
+template <typename DeviceContext, typename T>
+class CrossEntropyGradientOpKernel2 : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *y = ctx.Input<Tensor>("Y");
+    auto *dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
+    auto *label = ctx.Input<Tensor>("Label");
+
+    auto *p_dx = dx->mutable_data<T>(ctx.GetPlace());
+    auto *p_y = y->data<T>();
+    auto *p_dy = dy->data<T>();
+    auto *p_label = label->data<int64_t>();
+
+    int64_t ignore_index = ctx.Attr<int>("ignore_index");
+    int rank = dx->dims().size();
+    int64_t feature_size = dx->dims()[rank - 1];
+    int64_t batch_size = framework::product(dx->dims()) / feature_size;
+
+    platform::ForRange<DeviceContext> for_range(
+        ctx.template device_context<DeviceContext>(),
+        batch_size * feature_size);
+    for_range(CrossEntropyBackwardFunctor<T>(p_dx, p_y, p_dy, p_label,
+                                             ignore_index, feature_size));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 9d1d5fe0932ea8..4f384ce37d7c88 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1432,6 +1432,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
           predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
           cost = fluid.layers.cross_entropy(input=predict, label=label)
     """
+    if not soft_label:
+        return cross_entropy2(input, label, ignore_index)
     helper = LayerHelper('cross_entropy', **locals())
     out = helper.create_variable_for_type_inference(dtype=input.dtype)
     helper.append_op(
@@ -1444,6 +1446,20 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
     return out
 
 
+def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
+    helper = LayerHelper('cross_entropy2', **locals())
+    out = helper.create_variable_for_type_inference(dtype=input.dtype)
+    xshape = helper.create_variable_for_type_inference(dtype=input.dtype)
+    helper.append_op(
+        type='cross_entropy2',
+        inputs={'X': [input],
+                'Label': [label]},
+        outputs={'Y': [out],
+                 'XShape': [xshape]},
+        attrs={'ignore_index': ignore_index})
+    return out
+
+
 def bpr_loss(input, label, name=None):
     """
     Bayesian Personalized Ranking Loss Operator.

From cfd012e2cb82dc0a2f4ddcc0d23eeefbb28aff0a Mon Sep 17 00:00:00 2001
From: sneaxiy <sneaxiy@126.com>
Date: Mon, 11 Mar 2019 04:05:00 +0000
Subject: [PATCH 2/5] add unittest test=develop

---
 paddle/fluid/operators/expand_op.cc           | 18 ++++-
 .../tests/unittests/test_cross_entropy2_op.py | 79 +++++++++++++++++++
 .../tests/unittests/test_dist_transpiler.py   | 20 ++---
 3 files changed, 106 insertions(+), 11 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py

diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc
index 44a2f37b667724..ce3d9a7aacb72a 100644
--- a/paddle/fluid/operators/expand_op.cc
+++ b/paddle/fluid/operators/expand_op.cc
@@ -138,12 +138,28 @@ class ExpandGradOp : public framework::OperatorWithKernel {
   }
 };
 
+class ExpandGradOpDescMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
+    op->SetType("expand_grad");
+    op->SetInput("X", Input("X"));
+    op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
+    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op->SetAttrMap(Attrs());
+    return op;
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker,
-                  paddle::framework::DefaultGradOpDescMaker<true>);
+                  ops::ExpandGradOpDescMaker);
 REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp);
 REGISTER_OP_CPU_KERNEL(
     expand, ops::ExpandKernel<paddle::platform::CPUDeviceContext, float>,
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py
new file mode 100644
index 00000000000000..c29d422361b2fc
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from op_test import OpTest
+import unittest
+import numpy as np
+import six
+
+
+class CrossEntropy2OpTestBase(OpTest):
+    def initParameters(self):
+        return [32, 64], 'float32', -100
+
+    def calc_output(self, logits, label, ignore_index):
+        ret = np.zeros(shape=label.shape, dtype=logits.dtype)
+        for idx in six.moves.range(label.shape[0]):
+            if label[idx] == ignore_index:
+                continue
+            ret[idx] = -np.log(logits[idx][label[idx]])
+        return ret
+
+    def setUp(self):
+        self.shape, self.dtype, self.ignore_index = self.initParameters()
+        self.op_type = 'cross_entropy2'
+        feature_size = int(self.shape[-1])
+        batch_size = int(np.prod(self.shape) / feature_size)
+        logits = (np.random.random(size=self.shape) + 1).astype(self.dtype)
+        label = np.random.random_integers(
+            low=0, high=feature_size - 1,
+            size=self.shape[0:-1] + [1]).astype('int64')
+        outputs = self.calc_output(
+            np.reshape(logits, [batch_size, feature_size]),
+            np.reshape(label, [batch_size, 1]), self.ignore_index)
+        self.inputs = {'X': logits, 'Label': label}
+        self.outputs = {
+            'Y': np.reshape(outputs, label.shape),
+            'XShape': np.zeros(
+                shape=logits.shape, dtype=logits.dtype)
+        }
+        self.attrs = {'ignore_index': self.ignore_index}
+
+    def test_check_output(self):
+        self.check_output(no_check_set=['XShape'])
+
+    def test_check_grad(self):
+        self.check_grad(
+            inputs_to_check=['X'],
+            output_names=['Y'],
+            no_grad_set=['XShape', 'Label'])
+
+
+class CrossEntropy2OpTest2(CrossEntropy2OpTestBase):
+    def initParameters(self):
+        return [32, 64], 'float64', 3
+
+
+class CrossEntropy2OpTest3(CrossEntropy2OpTestBase):
+    def initParameters(self):
+        return [4, 8, 16, 32], 'float32', -100
+
+
+class CrossEntropy2OpTest4(CrossEntropy2OpTestBase):
+    def initParameters(self):
+        return [4, 8, 16, 32], 'float32', 3
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 12132477d28c74..f81d4fda50be19 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -524,8 +524,8 @@ def transpiler_test_impl(self):
         ops = [
             'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
             'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
-            'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
-            'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
+            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
+            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
             'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
             'split_selected_rows', 'send', 'sequence_pool_grad',
             'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
@@ -564,8 +564,8 @@ def transpiler_test_impl(self):
         ops = [
             'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
             'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
-            'elementwise_add', 'cross_entropy', 'mean', 'fill_constant',
-            'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send',
+            'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
+            'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
             'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
             'lookup_table_grad', 'split_selected_rows', 'send',
             'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
@@ -612,8 +612,8 @@ def transpiler_test_impl(self):
         ops = [
             'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
             'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
-            'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
-            'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
+            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
+            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
             'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
             'split_selected_rows', 'send', 'sequence_pool_grad',
             'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
@@ -652,8 +652,8 @@ def transpiler_test_impl(self):
         ops = [
             'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
             'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
-            'elementwise_add', 'cross_entropy', 'mean', 'fill_constant',
-            'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send',
+            'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
+            'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
             'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
             'lookup_table_grad', 'split_selected_rows', 'send',
             'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
@@ -841,8 +841,8 @@ def transpiler_test_impl(self):
         ops = [
             'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
             'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
-            'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
-            'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
+            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
+            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
             'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
             'split_selected_rows', 'send', 'sequence_pool_grad',
             'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',

From b26e9bd2326029de54901031ba93458f32a0db5b Mon Sep 17 00:00:00 2001
From: sneaxiy <sneaxiy@126.com>
Date: Tue, 12 Mar 2019 03:48:33 +0000
Subject: [PATCH 3/5] refine code test=develop

---
 paddle/fluid/operators/cross_entropy2_op.cc   | 117 ++----------
 paddle/fluid/operators/cross_entropy2_op.h    | 104 ++---------
 paddle/fluid/operators/cross_entropy_op.cc    | 137 ++------------
 .../fluid/operators/cross_entropy_op_base.h   | 169 ++++++++++++++++++
 paddle/fluid/operators/expand_op.cc           |   1 +
 paddle/fluid/operators/math.h                 |  42 +++++
 paddle/fluid/operators/math/cross_entropy.cu  |  13 +-
 paddle/fluid/operators/selu_op.h              |   5 +-
 .../sequence_ops/sequence_softmax_op.cu       |   4 +-
 .../sigmoid_cross_entropy_with_logits_op.cu   |   6 +-
 10 files changed, 259 insertions(+), 339 deletions(-)
 create mode 100644 paddle/fluid/operators/cross_entropy_op_base.h
 create mode 100644 paddle/fluid/operators/math.h

diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc
index 03b217a974c7bb..181d373cfc3d28 100644
--- a/paddle/fluid/operators/cross_entropy2_op.cc
+++ b/paddle/fluid/operators/cross_entropy2_op.cc
@@ -16,46 +16,22 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <unordered_map>
+#include "paddle/fluid/operators/cross_entropy_op_base.h"
 
 namespace paddle {
 namespace operators {
 
-class CrossEntropyOp2 : public framework::OperatorWithKernel {
+class CrossEntropyOp2 : public CrossEntropyOpBase {
  public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
+  using CrossEntropyOpBase::CrossEntropyOpBase;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    CrossEntropyOpBase::InferShape(ctx);
 
-    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
     PADDLE_ENFORCE(ctx->HasOutput("XShape"),
                    "Output(XShape) should be not null.");
 
     auto x_dims = ctx->GetInputDim("X");
-    auto label_dims = ctx->GetInputDim("Label");
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
-                      "Input(X) and Input(Label) shall have the same rank.");
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "Input(X) and Input(Label) shall have the same shape "
-                        "except the last dimension.");
-    }
-
-    PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
-                      "Last dimension of Input(Label) should be 1.");
-    auto y_dims = x_dims;
-    y_dims[rank - 1] = 1;
-    ctx->SetOutputDim("Y", y_dims);
-    ctx->ShareLoD("X", /*->*/ "Y");
-
     auto x_dims_vec = framework::vectorize(x_dims);
     x_dims_vec.push_back(0);
     ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec));
@@ -63,73 +39,25 @@ class CrossEntropyOp2 : public framework::OperatorWithKernel {
   }
 
  protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
-                                   ctx.device_context());
+  bool IsSoftLabel(framework::InferShapeContext* ctx) const override {
+    return false;
   }
 };
 
-class CrossEntropyGradientOp2 : public framework::OperatorWithKernel {
+class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase {
  public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
+  using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;
 
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("XShape"),
-                   "Input(XShape) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null.");
-
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
-                   "Input(Y@GRAD) shoudl be not null.");
-
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output(X@GRAD) should be not null.");
-
-    auto x_shapes = ctx->GetInputDim("XShape");
-    framework::DDim x_dims(x_shapes.Get(), x_shapes.size() - 1);
-    auto label_dims = ctx->GetInputDim("Label");
-    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
-                      "Input(Y@Grad) and Input(X) should have the same rank.");
-    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
-                      "Input(Label) and Input(X) should have the same rank.");
-
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "The Input(X) and Input(Label) should have the same "
-                        "shape except the last dimension.");
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(dy_dims, 0, rank - 1),
-                        "The Input(X) and Input(Y@Grad) should have the same "
-                        "shape except the last dimension.");
-    }
-    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
-                      "The last dimension of Input(Y@Grad) should be 1.");
-    PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
-                      "Last dimension of Input(Label) should be 1.");
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-    ctx->ShareLoD("XShape", framework::GradVarName("X"));
+ protected:
+  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
+    auto x_shape = ctx->GetInputDim("XShape");
+    return framework::DDim(x_shape.Get(), x_shape.size() - 1);
   }
 
- protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
-        ctx.device_context());
+  virtual const char* VarNameWithXLoD() const { return "XShape"; }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return false;
   }
 };
 
@@ -156,7 +84,7 @@ class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker {
                  "Only valid if soft_label is set to False")
         .SetDefault(-100);
     AddComment(R"DOC(
-CrossEntropy Operator.
+Hard-label CrossEntropy Operator.
 
 The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. 
 The matrix's second dimension(row length) is as same as the original last 
@@ -173,15 +101,6 @@ or not. But the output only shares the LoD information with input X.
   }
 };
 
-class CrossEntropyOpInferVarType2
-    : public framework::PassInDtypeAndVarTypeToOutput {
- protected:
-  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
-      const override {
-    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
-  }
-};
-
 class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker {
  public:
   using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
@@ -207,7 +126,7 @@ namespace ops = paddle::operators;
 using CPUCtx = paddle::platform::CPUDeviceContext;
 
 REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2,
-                  ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType2,
+                  ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType,
                   ops::CrossEntropyGradOpMaker2);
 REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2);
 REGISTER_OP_CPU_KERNEL(cross_entropy2,
diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h
index 3d209f7c5c95c2..3e9dc7ebce263d 100644
--- a/paddle/fluid/operators/cross_entropy2_op.h
+++ b/paddle/fluid/operators/cross_entropy2_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <cmath>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/for_range.h"
@@ -26,81 +27,6 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
-HOSTDEVICE inline platform::float16 RealLog(platform::float16 x) {
-#ifdef __NVCC__
-  return static_cast<platform::float16>(logf(static_cast<float>(x)));
-#else
-  return static_cast<platform::float16>(std::log(static_cast<float>(x)));
-#endif
-}
-
-HOSTDEVICE inline float RealLog(float x) {
-#ifdef __NVCC__
-  return logf(x);
-#else
-  return std::log(x);
-#endif
-}
-
-HOSTDEVICE inline double RealLog(double x) {
-#ifdef __NVCC__
-  return log(x);
-#else
-  return std::log(x);
-#endif
-}
-
-HOSTDEVICE inline platform::float16 RealExp(platform::float16 x) {
-#ifdef __NVCC__
-  return static_cast<platform::float16>(expf(static_cast<float>(x)));
-#else
-  return static_cast<platform::float16>(std::exp(static_cast<float>(x)));
-#endif
-}
-
-HOSTDEVICE inline float RealExp(float x) {
-#ifdef __NVCC__
-  return expf(x);
-#else
-  return std::exp(x);
-#endif
-}
-
-HOSTDEVICE inline double RealExp(double x) {
-#ifdef __NVCC__
-  return exp(x);
-#else
-  return std::exp(x);
-#endif
-}
-
-template <typename T>
-struct CrossEntropyForwardFunctor {
-  CrossEntropyForwardFunctor(const T *x, T *y, const int64_t *label,
-                             int64_t ignore_index, int64_t feature_size)
-      : x_(x),
-        y_(y),
-        label_(label),
-        ignore_index_(ignore_index),
-        feature_size_(feature_size) {}
-
-  HOSTDEVICE void operator()(int64_t row_idx) const {
-    auto col_idx = label_[row_idx];
-    if (col_idx != ignore_index_) {
-      y_[row_idx] = -math::TolerableValue<T>()(
-          RealLog(x_[row_idx * feature_size_ + col_idx]));
-    } else {
-      y_[row_idx] = 0;
-    }
-  }
-
-  const T *x_;
-  T *y_;
-  const int64_t *label_;
-  int64_t ignore_index_;
-  int64_t feature_size_;
-};
-
 template <typename T>
 struct CrossEntropyBackwardFunctor {
   CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy,
@@ -118,7 +44,7 @@ struct CrossEntropyBackwardFunctor {
     auto col_idx = idx % feature_size_;
     auto label = label_[row_idx];
     if (label == col_idx && label != ignore_index_) {
-      dx_[idx] = -dy_[row_idx] * RealExp(y_[row_idx]);
+      dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]);
     } else {
       dx_[idx] = 0;
     }
@@ -136,24 +62,20 @@ template <typename DeviceContext, typename T>
 class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *x = ctx.Input<Tensor>("X");
-    auto *label = ctx.Input<Tensor>("Label");
-    auto *y = ctx.Output<Tensor>("Y");
+    auto *x_original = ctx.Input<Tensor>("X");
+    int rank = x_original->dims().size();
 
-    auto *p_y = y->mutable_data<T>(ctx.GetPlace());
-    auto *p_x = x->data<T>();
-    auto *p_label = label->data<int64_t>();
+    auto x = framework::ReshapeToMatrix(*x_original, rank - 1);
+    auto label =
+        framework::ReshapeToMatrix(*ctx.Input<Tensor>("Label"), rank - 1);
+    auto *y = ctx.Output<Tensor>("Y");
+    y->mutable_data<T>(ctx.GetPlace());
 
-    int rank = x->dims().size();
-    int64_t feature_size = x->dims()[rank - 1];
-    int64_t batch_size = framework::product(x->dims()) / feature_size;
+    auto ignore_index = ctx.Attr<int>("ignore_index");
 
-    int64_t ignore_index = ctx.Attr<int>("ignore_index");
-
-    platform::ForRange<DeviceContext> for_range(
-        ctx.template device_context<DeviceContext>(), batch_size);
-    for_range(CrossEntropyForwardFunctor<T>(p_x, p_y, p_label, ignore_index,
-                                            feature_size));
+    math::CrossEntropyFunctor<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), y, &x, &label, false,
+        ignore_index);
   }
 };
 
diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc
index 3adc7baebddd06..1707f7078cad82 100644
--- a/paddle/fluid/operators/cross_entropy_op.cc
+++ b/paddle/fluid/operators/cross_entropy_op.cc
@@ -14,128 +14,11 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/cross_entropy_op.h"
 #include <string>
+#include "paddle/fluid/operators/cross_entropy_op_base.h"
 
 namespace paddle {
 namespace operators {
 
-class CrossEntropyOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto label_dims = ctx->GetInputDim("Label");
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
-                      "Input(X) and Input(Label) shall have the same rank.");
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "Input(X) and Input(Label) shall have the same shape "
-                        "except the last dimension.");
-    }
-    if (ctx->Attrs().Get<bool>("soft_label")) {
-      if (check) {
-        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
-                          "If Attr(soft_label) == true, the last dimension of "
-                          "Input(X) and Input(Label) should be equal.");
-      }
-    } else {
-      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
-                        "If Attr(softLabel) == false, the last dimension of "
-                        "Input(Label) should be 1.");
-    }
-
-    auto y_dims = x_dims;
-    y_dims[rank - 1] = 1;
-    ctx->SetOutputDim("Y", y_dims);
-    ctx->ShareLoD("X", /*->*/ "Y");
-  }
-
- protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
-                                   ctx.device_context());
-  }
-};
-
-class CrossEntropyGradientOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
-                   "Input(Y@GRAD) shoudl be not null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output(X@GRAD) should be not null.");
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto label_dims = ctx->GetInputDim("Label");
-    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
-                      "Input(Y@Grad) and Input(X) should have the same rank.");
-    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
-                      "Input(Label) and Input(X) should have the same rank.");
-
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "The Input(X) and Input(Label) should have the same "
-                        "shape except the last dimension.");
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(dy_dims, 0, rank - 1),
-                        "The Input(X) and Input(Y@Grad) should have the same "
-                        "shape except the last dimension.");
-    }
-    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
-                      "The last dimension of Input(Y@Grad) should be 1.");
-    if (ctx->Attrs().Get<bool>("soft_label")) {
-      if (check) {
-        PADDLE_ENFORCE_EQ(
-            x_dims[rank - 1], label_dims[rank - 1],
-            "When Attr(soft_label) == true, the last dimension of "
-            "Input(X) and Input(Label) should be equal.");
-      }
-    } else {
-      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
-                        "When Attr(soft_label) == false, the last dimension of "
-                        "Input(Label) should be 1.");
-    }
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-    ctx->ShareLoD("X", framework::GradVarName("X"));
-  }
-
- protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
-                                   ctx.device_context());
-  }
-};
-
 class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
@@ -200,22 +83,24 @@ or not. But the output only shares the LoD information with input X.
   }
 };
 
-class CrossEntropyOpInferVarType
-    : public framework::PassInDtypeAndVarTypeToOutput {
- protected:
-  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
-      const override {
-    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
+class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
+ public:
+  using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
+    CrossEntropyGradientOpBase::InferShape(ctx);
   }
 };
+
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
 using CPUCtx = paddle::platform::CPUDeviceContext;
 
-REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker,
-                  ops::CrossEntropyOpInferVarType,
+REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase,
+                  ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType,
                   paddle::framework::DefaultGradOpDescMaker<true>);
 REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp);
 REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<CPUCtx, float>,
diff --git a/paddle/fluid/operators/cross_entropy_op_base.h b/paddle/fluid/operators/cross_entropy_op_base.h
new file mode 100644
index 00000000000000..c3e5254c37e029
--- /dev/null
+++ b/paddle/fluid/operators/cross_entropy_op_base.h
@@ -0,0 +1,169 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+class CrossEntropyOpBase : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
+
+    auto x_dims = ctx->GetInputDim("X");
+    auto label_dims = ctx->GetInputDim("Label");
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
+                      "Input(X) and Input(Label) shall have the same rank.");
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "Input(X) and Input(Label) shall have the same shape "
+                        "except the last dimension.");
+    }
+
+    if (IsSoftLabel(ctx)) {
+      if (check) {
+        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
+                          "If Attr(soft_label) == true, the last dimension of "
+                          "Input(X) and Input(Label) should be equal.");
+      }
+    } else {
+      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
+                        "If Attr(softLabel) == false, the last dimension of "
+                        "Input(Label) should be 1.");
+    }
+
+    auto y_dims = x_dims;
+    y_dims[rank - 1] = 1;
+    ctx->SetOutputDim("Y", y_dims);
+    ctx->ShareLoD("X", /*->*/ "Y");
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
+                                   ctx.device_context());
+  }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return ctx->Attrs().Get<bool>("soft_label");
+  }
+};
+
+class CrossEntropyOpInferVarType
+    : public framework::PassInDtypeAndVarTypeToOutput {
+ protected:
+  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
+      const override {
+    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
+  }
+};
+
+class CrossEntropyGradientOpBase : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const {
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
+                   "Input(Y@GRAD) shoudl be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
+                   "Output(X@GRAD) should be not null.");
+
+    auto x_dims = GetXDim(ctx);
+    auto label_dims = ctx->GetInputDim("Label");
+    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
+                      "Input(Y@Grad) and Input(X) should have the same rank.");
+    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
+                      "Input(Label) and Input(X) should have the same rank.");
+
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "The Input(X) and Input(Label) should have the same "
+                        "shape except the last dimension.");
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(dy_dims, 0, rank - 1),
+                        "The Input(X) and Input(Y@Grad) should have the same "
+                        "shape except the last dimension.");
+    }
+    if (IsSoftLabel(ctx)) {
+      if (check) {
+        PADDLE_ENFORCE_EQ(
+            x_dims[rank - 1], label_dims[rank - 1],
+            "When Attr(soft_label) == true, the last dimension of "
+            "Input(X) and Input(Label) should be equal.");
+      }
+    } else {
+      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
+                        "When Attr(soft_label) == false, the last dimension of "
+                        "Input(Label) should be 1.");
+    }
+    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
+    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
+                      "The last dimension of Input(Y@Grad) should be 1.");
+    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
+    ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X"));
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
+        ctx.device_context());
+  }
+
+  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
+    return ctx->GetInputDim("X");
+  }
+
+  virtual const char* VarNameWithXLoD() const { return "X"; }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return ctx->Attrs().Get<bool>("soft_label");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc
index ce3d9a7aacb72a..fcb2be93635eea 100644
--- a/paddle/fluid/operators/expand_op.cc
+++ b/paddle/fluid/operators/expand_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/expand_op.h"
+#include <memory>
 #include <vector>
 
 namespace paddle {
diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h
new file mode 100644
index 00000000000000..8cc24200d37dff
--- /dev/null
+++ b/paddle/fluid/operators/math.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/platform/float16.h"
+#include "paddle/fluid/platform/hostdevice.h"
+
+#include "math.h"  // NOLINT
+
+namespace paddle {
+namespace operators {
+
+inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) {
+  return static_cast<platform::float16>(::expf(static_cast<float>(x)));
+}
+
+inline HOSTDEVICE float real_exp(float x) { return ::expf(x); }
+
+inline HOSTDEVICE double real_exp(double x) { return ::exp(x); }
+
+inline HOSTDEVICE platform::float16 real_log(platform::float16 x) {
+  return static_cast<platform::float16>(::logf(static_cast<float>(x)));
+}
+
+inline HOSTDEVICE float real_log(float x) { return ::logf(x); }
+
+inline HOSTDEVICE double real_log(double x) { return ::log(x); }
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu
index cb200ec8d6ea53..44cbdf2e988219 100644
--- a/paddle/fluid/operators/math/cross_entropy.cu
+++ b/paddle/fluid/operators/math/cross_entropy.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/platform/cuda_device_function.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
@@ -20,17 +21,6 @@ namespace paddle {
 namespace operators {
 namespace math {
 
-namespace {
-
-__device__ __forceinline__ float real_log(float x) { return logf(x); }
-
-__device__ __forceinline__ double real_log(double x) { return log(x); }
-
-__device__ __forceinline__ platform::float16 real_log(
-    const platform::float16& val) {
-  return static_cast<platform::float16>(logf(static_cast<float>(val)));
-}
-
 template <typename T>
 __global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label,
                                    const int N, const int D,
@@ -61,7 +51,6 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label,
     Y[blockIdx.x] = -val;
   }
 }
-}  // namespace
 
 template <typename T>
 class CrossEntropyFunctor<platform::CUDADeviceContext, T> {
diff --git a/paddle/fluid/operators/selu_op.h b/paddle/fluid/operators/selu_op.h
index bdb506885c9327..b2fc834c42f65f 100644
--- a/paddle/fluid/operators/selu_op.h
+++ b/paddle/fluid/operators/selu_op.h
@@ -15,13 +15,12 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/platform/for_range.h"
+
 namespace paddle {
 namespace operators {
 
-static HOSTDEVICE float real_exp(float x) { return expf(x); }
-static HOSTDEVICE float real_exp(double x) { return exp(x); }
-
 template <typename T>
 struct SeluFunctor {
   SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr)
diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
index cc5e9821903fb7..a9dc0a4fda253d 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <algorithm>
 #include <cub/cub.cuh>  // NOLINT
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"
 
 namespace paddle {
@@ -21,9 +22,6 @@ namespace operators {
 
 using LoDTensor = framework::LoDTensor;
 
-__device__ __forceinline__ float real_exp(float x) { return expf(x); }
-__device__ __forceinline__ double real_exp(double x) { return exp(x); }
-
 template <typename T, int BlockDim>
 using BlockReduce = cub::BlockReduce<T, BlockDim>;
 
diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
index 2a4570ef5cec0b..aea69de6434a38 100644
--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "cub/cub.cuh"
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
 #include "paddle/fluid/platform/hostdevice.h"
@@ -21,11 +22,6 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
-static HOSTDEVICE float real_exp(float x) { return expf(x); }
-static HOSTDEVICE float real_exp(double x) { return exp(x); }
-static HOSTDEVICE float real_log(float x) { return logf(x); }
-static HOSTDEVICE float real_log(double x) { return log(x); }
-
 static constexpr int kNumCUDAThreads = 512;
 static constexpr int kNumMaxinumNumBlocks = 4096;
 

From 1e9fd40777c6055845b65f592bebd2cbab4728a9 Mon Sep 17 00:00:00 2001
From: sneaxiy <sneaxiy@126.com>
Date: Wed, 13 Mar 2019 11:03:26 +0000
Subject: [PATCH 4/5] combine op files test=develop

---
 paddle/fluid/operators/cross_entropy2_op.cc   | 137 ----------
 paddle/fluid/operators/cross_entropy2_op.cu   |  29 --
 paddle/fluid/operators/cross_entropy2_op.h    | 110 --------
 paddle/fluid/operators/cross_entropy_op.cc    | 256 +++++++++++++++++-
 paddle/fluid/operators/cross_entropy_op.cu    |  10 +
 paddle/fluid/operators/cross_entropy_op.h     |  81 ++++++
 .../fluid/operators/cross_entropy_op_base.h   | 169 ------------
 7 files changed, 345 insertions(+), 447 deletions(-)
 delete mode 100644 paddle/fluid/operators/cross_entropy2_op.cc
 delete mode 100644 paddle/fluid/operators/cross_entropy2_op.cu
 delete mode 100644 paddle/fluid/operators/cross_entropy2_op.h
 delete mode 100644 paddle/fluid/operators/cross_entropy_op_base.h

diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc
deleted file mode 100644
index 181d373cfc3d28..00000000000000
--- a/paddle/fluid/operators/cross_entropy2_op.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/cross_entropy2_op.h"
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include "paddle/fluid/operators/cross_entropy_op_base.h"
-
-namespace paddle {
-namespace operators {
-
-class CrossEntropyOp2 : public CrossEntropyOpBase {
- public:
-  using CrossEntropyOpBase::CrossEntropyOpBase;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    CrossEntropyOpBase::InferShape(ctx);
-
-    PADDLE_ENFORCE(ctx->HasOutput("XShape"),
-                   "Output(XShape) should be not null.");
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto x_dims_vec = framework::vectorize(x_dims);
-    x_dims_vec.push_back(0);
-    ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec));
-    ctx->ShareLoD("X", /*->*/ "XShape");
-  }
-
- protected:
-  bool IsSoftLabel(framework::InferShapeContext* ctx) const override {
-    return false;
-  }
-};
-
-class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase {
- public:
-  using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;
-
- protected:
-  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
-    auto x_shape = ctx->GetInputDim("XShape");
-    return framework::DDim(x_shape.Get(), x_shape.size() - 1);
-  }
-
-  virtual const char* VarNameWithXLoD() const { return "XShape"; }
-
-  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
-    return false;
-  }
-};
-
-class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X",
-             "(Tensor, default Tensor<float>), a tensor whose last dimension "
-             "size is equal to the number of classes. This input is a "
-             "probability computed by the previous operator, which is almost "
-             "always the result of a softmax operator.");
-    AddInput(
-        "Label",
-        "(Tensor), the tensor which represents the ground truth. It has the "
-        "same shape with 'X' except the last dimension. One hot Tensor.");
-    AddOutput("Y",
-              "(Tensor, default Tensor<float>), a tensor whose shape is same "
-              "with 'X' except that the last dimension size is 1. It "
-              "represents the cross entropy loss.");
-    AddOutput("XShape", "Temporaily variable to save shape and LoD of X.");
-    AddAttr<int>("ignore_index",
-                 "(int, default -100), Specifies a target value that is"
-                 "ignored and does not contribute to the input gradient."
-                 "Only valid if soft_label is set to False")
-        .SetDefault(-100);
-    AddComment(R"DOC(
-Hard-label CrossEntropy Operator.
-
-The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. 
-The matrix's second dimension(row length) is as same as the original last 
-dimension, and the first dimension(column length) is the product of all other 
-original dimensions. Then the softmax computation will take palce on each raw 
-of flattened matrixs.
-
-Only support hard label.
-
-Both the input X and Label can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD information with input X.
-
-)DOC");
-  }
-};
-
-class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker {
- public:
-  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
-
- protected:
-  std::unique_ptr<framework::OpDesc> Apply() const override {
-    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
-    op->SetType("cross_entropy_grad2");
-    op->SetInput("Label", Input("Label"));
-    op->SetInput("Y", Output("Y"));
-    op->SetInput("XShape", Output("XShape"));
-    op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
-    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
-    op->SetAttrMap(Attrs());
-    return op;
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-using CPUCtx = paddle::platform::CPUDeviceContext;
-
-REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2,
-                  ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType,
-                  ops::CrossEntropyGradOpMaker2);
-REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2);
-REGISTER_OP_CPU_KERNEL(cross_entropy2,
-                       ops::CrossEntropyOpKernel2<CPUCtx, float>,
-                       ops::CrossEntropyOpKernel2<CPUCtx, double>);
-REGISTER_OP_CPU_KERNEL(cross_entropy_grad2,
-                       ops::CrossEntropyGradientOpKernel2<CPUCtx, float>,
-                       ops::CrossEntropyGradientOpKernel2<CPUCtx, double>);
diff --git a/paddle/fluid/operators/cross_entropy2_op.cu b/paddle/fluid/operators/cross_entropy2_op.cu
deleted file mode 100644
index 1868c1b866016d..00000000000000
--- a/paddle/fluid/operators/cross_entropy2_op.cu
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/cross_entropy2_op.h"
-#include "paddle/fluid/platform/float16.h"
-
-namespace plat = paddle::platform;
-namespace ops = paddle::operators;
-using CUDACtx = paddle::platform::CUDADeviceContext;
-REGISTER_OP_CUDA_KERNEL(cross_entropy2,
-                        ops::CrossEntropyOpKernel2<CUDACtx, float>,
-                        ops::CrossEntropyOpKernel2<CUDACtx, double>,
-                        ops::CrossEntropyOpKernel2<CUDACtx, plat::float16>);
-
-REGISTER_OP_CUDA_KERNEL(
-    cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2<CUDACtx, float>,
-    ops::CrossEntropyGradientOpKernel2<CUDACtx, double>,
-    ops::CrossEntropyGradientOpKernel2<CUDACtx, plat::float16>);
diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h
deleted file mode 100644
index 3e9dc7ebce263d..00000000000000
--- a/paddle/fluid/operators/cross_entropy2_op.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <cmath>
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
-#include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/for_range.h"
-
-namespace paddle {
-namespace operators {
-
-using Tensor = framework::Tensor;
-
-template <typename T>
-struct CrossEntropyBackwardFunctor {
-  CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy,
-                              const int64_t *label, int64_t ignore_index,
-                              int64_t feature_size)
-      : dx_(dx),
-        y_(y),
-        dy_(dy),
-        label_(label),
-        ignore_index_(ignore_index),
-        feature_size_(feature_size) {}
-
-  HOSTDEVICE void operator()(int64_t idx) const {
-    auto row_idx = idx / feature_size_;
-    auto col_idx = idx % feature_size_;
-    auto label = label_[row_idx];
-    if (label == col_idx && label != ignore_index_) {
-      dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]);
-    } else {
-      dx_[idx] = 0;
-    }
-  }
-
-  T *dx_;
-  const T *y_;
-  const T *dy_;
-  const int64_t *label_;
-  int64_t ignore_index_;
-  int64_t feature_size_;
-};
-
-template <typename DeviceContext, typename T>
-class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *x_original = ctx.Input<Tensor>("X");
-    int rank = x_original->dims().size();
-
-    auto x = framework::ReshapeToMatrix(*x_original, rank - 1);
-    auto label =
-        framework::ReshapeToMatrix(*ctx.Input<Tensor>("Label"), rank - 1);
-    auto *y = ctx.Output<Tensor>("Y");
-    y->mutable_data<T>(ctx.GetPlace());
-
-    auto ignore_index = ctx.Attr<int>("ignore_index");
-
-    math::CrossEntropyFunctor<DeviceContext, T>()(
-        ctx.template device_context<DeviceContext>(), y, &x, &label, false,
-        ignore_index);
-  }
-};
-
-template <typename DeviceContext, typename T>
-class CrossEntropyGradientOpKernel2 : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto *y = ctx.Input<Tensor>("Y");
-    auto *dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
-    auto *label = ctx.Input<Tensor>("Label");
-
-    auto *p_dx = dx->mutable_data<T>(ctx.GetPlace());
-    auto *p_y = y->data<T>();
-    auto *p_dy = dy->data<T>();
-    auto *p_label = label->data<int64_t>();
-
-    int64_t ignore_index = ctx.Attr<int>("ignore_index");
-    int rank = dx->dims().size();
-    int64_t feature_size = dx->dims()[rank - 1];
-    int64_t batch_size = framework::product(dx->dims()) / feature_size;
-
-    platform::ForRange<DeviceContext> for_range(
-        ctx.template device_context<DeviceContext>(),
-        batch_size * feature_size);
-    for_range(CrossEntropyBackwardFunctor<T>(p_dx, p_y, p_dy, p_label,
-                                             ignore_index, feature_size));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc
index 1707f7078cad82..dd1b48cecfdc5b 100644
--- a/paddle/fluid/operators/cross_entropy_op.cc
+++ b/paddle/fluid/operators/cross_entropy_op.cc
@@ -14,11 +14,154 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/cross_entropy_op.h"
 #include <string>
-#include "paddle/fluid/operators/cross_entropy_op_base.h"
 
 namespace paddle {
 namespace operators {
 
+class CrossEntropyOpBase : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
+
+    auto x_dims = ctx->GetInputDim("X");
+    auto label_dims = ctx->GetInputDim("Label");
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
+                      "Input(X) and Input(Label) shall have the same rank.");
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "Input(X) and Input(Label) shall have the same shape "
+                        "except the last dimension.");
+    }
+
+    if (IsSoftLabel(ctx)) {
+      if (check) {
+        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
+                          "If Attr(soft_label) == true, the last dimension of "
+                          "Input(X) and Input(Label) should be equal.");
+      }
+    } else {
+      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
+                        "If Attr(softLabel) == false, the last dimension of "
+                        "Input(Label) should be 1.");
+    }
+
+    auto y_dims = x_dims;
+    y_dims[rank - 1] = 1;
+    ctx->SetOutputDim("Y", y_dims);
+    ctx->ShareLoD("X", /*->*/ "Y");
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
+                                   ctx.device_context());
+  }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return ctx->Attrs().Get<bool>("soft_label");
+  }
+};
+
+class CrossEntropyGradientOpBase : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const {
+    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
+                   "Input(Y@GRAD) shoudl be not null.");
+    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
+                   "Output(X@GRAD) should be not null.");
+
+    auto x_dims = GetXDim(ctx);
+    auto label_dims = ctx->GetInputDim("Label");
+    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
+    int rank = x_dims.size();
+    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
+                      "Input(Y@Grad) and Input(X) should have the same rank.");
+    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
+                      "Input(Label) and Input(X) should have the same rank.");
+
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "The Input(X) and Input(Label) should have the same "
+                        "shape except the last dimension.");
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(dy_dims, 0, rank - 1),
+                        "The Input(X) and Input(Y@Grad) should have the same "
+                        "shape except the last dimension.");
+    }
+    if (IsSoftLabel(ctx)) {
+      if (check) {
+        PADDLE_ENFORCE_EQ(
+            x_dims[rank - 1], label_dims[rank - 1],
+            "When Attr(soft_label) == true, the last dimension of "
+            "Input(X) and Input(Label) should be equal.");
+      }
+    } else {
+      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
+                        "When Attr(soft_label) == false, the last dimension of "
+                        "Input(Label) should be 1.");
+    }
+    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
+    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
+                      "The last dimension of Input(Y@Grad) should be 1.");
+    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
+    ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X"));
+  }
+
+ protected:
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
+        ctx.device_context());
+  }
+
+  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
+    return ctx->GetInputDim("X");
+  }
+
+  virtual const char* VarNameWithXLoD() const { return "X"; }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return ctx->Attrs().Get<bool>("soft_label");
+  }
+};
+
+class CrossEntropyOpInferVarType
+    : public framework::PassInDtypeAndVarTypeToOutput {
+ protected:
+  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
+      const override {
+    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
+  }
+};
+
 class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
@@ -87,12 +230,110 @@ class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
  public:
   using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;
 
-  void InferShape(framework::InferShapeContext *ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
     CrossEntropyGradientOpBase::InferShape(ctx);
   }
 };
 
+class CrossEntropyOp2 : public CrossEntropyOpBase {
+ public:
+  using CrossEntropyOpBase::CrossEntropyOpBase;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    CrossEntropyOpBase::InferShape(ctx);
+
+    PADDLE_ENFORCE(ctx->HasOutput("XShape"),
+                   "Output(XShape) should be not null.");
+
+    auto x_dims = ctx->GetInputDim("X");
+    auto x_dims_vec = framework::vectorize(x_dims);
+    x_dims_vec.push_back(0);
+    ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec));
+    ctx->ShareLoD("X", /*->*/ "XShape");
+  }
+
+ protected:
+  bool IsSoftLabel(framework::InferShapeContext* ctx) const override {
+    return false;
+  }
+};
+
+class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase {
+ public:
+  using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;
+
+ protected:
+  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
+    auto x_shape = ctx->GetInputDim("XShape");
+    return framework::DDim(x_shape.Get(), x_shape.size() - 1);
+  }
+
+  virtual const char* VarNameWithXLoD() const { return "XShape"; }
+
+  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
+    return false;
+  }
+};
+
+class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X",
+             "(Tensor, default Tensor<float>), a tensor whose last dimension "
+             "size is equal to the number of classes. This input is a "
+             "probability computed by the previous operator, which is almost "
+             "always the result of a softmax operator.");
+    AddInput(
+        "Label",
+        "(Tensor), the tensor which represents the ground truth. It has the "
+        "same shape with 'X' except the last dimension. One hot Tensor.");
+    AddOutput("Y",
+              "(Tensor, default Tensor<float>), a tensor whose shape is same "
+              "with 'X' except that the last dimension size is 1. It "
+              "represents the cross entropy loss.");
+    AddOutput("XShape", "Temporaily variable to save shape and LoD of X.");
+    AddAttr<int>("ignore_index",
+                 "(int, default -100), Specifies a target value that is"
+                 "ignored and does not contribute to the input gradient."
+                 "Only valid if soft_label is set to False")
+        .SetDefault(-100);
+    AddComment(R"DOC(
+Hard-label CrossEntropy Operator.
+
+The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. 
+The matrix's second dimension(row length) is as same as the original last 
+dimension, and the first dimension(column length) is the product of all other 
+original dimensions. Then the softmax computation will take palce on each raw 
+of flattened matrixs.
+
+Only support hard label.
+
+Both the input X and Label can carry the LoD (Level of Details) information,
+or not. But the output only shares the LoD information with input X.
+
+)DOC");
+  }
+};
+
+class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
+    op->SetType("cross_entropy_grad2");
+    op->SetInput("Label", Input("Label"));
+    op->SetInput("Y", Output("Y"));
+    op->SetInput("XShape", Output("XShape"));
+    op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
+    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op->SetAttrMap(Attrs());
+    return op;
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
 
@@ -108,3 +349,14 @@ REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<CPUCtx, float>,
 REGISTER_OP_CPU_KERNEL(cross_entropy_grad,
                        ops::CrossEntropyGradientOpKernel<CPUCtx, float>,
                        ops::CrossEntropyGradientOpKernel<CPUCtx, double>);
+
+REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2,
+                  ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType,
+                  ops::CrossEntropyGradOpDescMaker2);
+REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2);
+REGISTER_OP_CPU_KERNEL(cross_entropy2,
+                       ops::CrossEntropyOpKernel2<CPUCtx, float>,
+                       ops::CrossEntropyOpKernel2<CPUCtx, double>);
+REGISTER_OP_CPU_KERNEL(cross_entropy_grad2,
+                       ops::CrossEntropyGradientOpKernel2<CPUCtx, float>,
+                       ops::CrossEntropyGradientOpKernel2<CPUCtx, double>);
diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu
index fcd34383a85f69..243e7f52c1e3c4 100644
--- a/paddle/fluid/operators/cross_entropy_op.cu
+++ b/paddle/fluid/operators/cross_entropy_op.cu
@@ -27,3 +27,13 @@ REGISTER_OP_CUDA_KERNEL(
     cross_entropy_grad, ops::CrossEntropyGradientOpKernel<CUDACtx, float>,
     ops::CrossEntropyGradientOpKernel<CUDACtx, double>,
     ops::CrossEntropyGradientOpKernel<CUDACtx, plat::float16>);
+
+REGISTER_OP_CUDA_KERNEL(cross_entropy2,
+                        ops::CrossEntropyOpKernel2<CUDACtx, float>,
+                        ops::CrossEntropyOpKernel2<CUDACtx, double>,
+                        ops::CrossEntropyOpKernel2<CUDACtx, plat::float16>);
+
+REGISTER_OP_CUDA_KERNEL(
+    cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2<CUDACtx, float>,
+    ops::CrossEntropyGradientOpKernel2<CUDACtx, double>,
+    ops::CrossEntropyGradientOpKernel2<CUDACtx, plat::float16>);
diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h
index f123e11542d85c..05609e4bc20b1c 100644
--- a/paddle/fluid/operators/cross_entropy_op.h
+++ b/paddle/fluid/operators/cross_entropy_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/for_range.h"
@@ -137,5 +138,85 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
   }
 };
 
+template <typename T>
+struct HardLabelCrossEntropyBackwardFunctor {
+  HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy,
+                                       const int64_t* label,
+                                       int64_t ignore_index,
+                                       int64_t feature_size)
+      : dx_(dx),
+        y_(y),
+        dy_(dy),
+        label_(label),
+        ignore_index_(ignore_index),
+        feature_size_(feature_size) {}
+
+  HOSTDEVICE void operator()(int64_t idx) const {
+    auto row_idx = idx / feature_size_;
+    auto col_idx = idx % feature_size_;
+    auto label = label_[row_idx];
+    if (label == col_idx && label != ignore_index_) {
+      dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]);
+    } else {
+      dx_[idx] = 0;
+    }
+  }
+
+  T* dx_;
+  const T* y_;
+  const T* dy_;
+  const int64_t* label_;
+  int64_t ignore_index_;
+  int64_t feature_size_;
+};
+
+template <typename DeviceContext, typename T>
+class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x_original = ctx.Input<Tensor>("X");
+    int rank = x_original->dims().size();
+
+    auto x = framework::ReshapeToMatrix(*x_original, rank - 1);
+    auto label =
+        framework::ReshapeToMatrix(*ctx.Input<Tensor>("Label"), rank - 1);
+    auto* y = ctx.Output<Tensor>("Y");
+    y->mutable_data<T>(ctx.GetPlace());
+
+    auto ignore_index = ctx.Attr<int>("ignore_index");
+
+    math::CrossEntropyFunctor<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), y, &x, &label, false,
+        ignore_index);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class CrossEntropyGradientOpKernel2 : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto* y = ctx.Input<Tensor>("Y");
+    auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
+    auto* label = ctx.Input<Tensor>("Label");
+
+    auto* p_dx = dx->mutable_data<T>(ctx.GetPlace());
+    auto* p_y = y->data<T>();
+    auto* p_dy = dy->data<T>();
+    auto* p_label = label->data<int64_t>();
+
+    int64_t ignore_index = ctx.Attr<int>("ignore_index");
+    int rank = dx->dims().size();
+    int64_t feature_size = dx->dims()[rank - 1];
+    int64_t batch_size = framework::product(dx->dims()) / feature_size;
+
+    platform::ForRange<DeviceContext> for_range(
+        ctx.template device_context<DeviceContext>(),
+        batch_size * feature_size);
+    for_range(HardLabelCrossEntropyBackwardFunctor<T>(
+        p_dx, p_y, p_dy, p_label, ignore_index, feature_size));
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/cross_entropy_op_base.h b/paddle/fluid/operators/cross_entropy_op_base.h
deleted file mode 100644
index c3e5254c37e029..00000000000000
--- a/paddle/fluid/operators/cross_entropy_op_base.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
-
-class CrossEntropyOpBase : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
-
-    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto label_dims = ctx->GetInputDim("Label");
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
-                      "Input(X) and Input(Label) shall have the same rank.");
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "Input(X) and Input(Label) shall have the same shape "
-                        "except the last dimension.");
-    }
-
-    if (IsSoftLabel(ctx)) {
-      if (check) {
-        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
-                          "If Attr(soft_label) == true, the last dimension of "
-                          "Input(X) and Input(Label) should be equal.");
-      }
-    } else {
-      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
-                        "If Attr(softLabel) == false, the last dimension of "
-                        "Input(Label) should be 1.");
-    }
-
-    auto y_dims = x_dims;
-    y_dims[rank - 1] = 1;
-    ctx->SetOutputDim("Y", y_dims);
-    ctx->ShareLoD("X", /*->*/ "Y");
-  }
-
- protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
-                                   ctx.device_context());
-  }
-
-  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
-    return ctx->Attrs().Get<bool>("soft_label");
-  }
-};
-
-class CrossEntropyOpInferVarType
-    : public framework::PassInDtypeAndVarTypeToOutput {
- protected:
-  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
-      const override {
-    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
-  }
-};
-
-class CrossEntropyGradientOpBase : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const {
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
-                   "Input(Y@GRAD) shoudl be not null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output(X@GRAD) should be not null.");
-
-    auto x_dims = GetXDim(ctx);
-    auto label_dims = ctx->GetInputDim("Label");
-    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
-    int rank = x_dims.size();
-    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
-                      "Input(Y@Grad) and Input(X) should have the same rank.");
-    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
-                      "Input(Label) and Input(X) should have the same rank.");
-
-    bool check = true;
-    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
-                                framework::product(label_dims) <= 0)) {
-      check = false;
-    }
-
-    if (check) {
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(label_dims, 0, rank - 1),
-                        "The Input(X) and Input(Label) should have the same "
-                        "shape except the last dimension.");
-      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                        framework::slice_ddim(dy_dims, 0, rank - 1),
-                        "The Input(X) and Input(Y@Grad) should have the same "
-                        "shape except the last dimension.");
-    }
-    if (IsSoftLabel(ctx)) {
-      if (check) {
-        PADDLE_ENFORCE_EQ(
-            x_dims[rank - 1], label_dims[rank - 1],
-            "When Attr(soft_label) == true, the last dimension of "
-            "Input(X) and Input(Label) should be equal.");
-      }
-    } else {
-      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
-                        "When Attr(soft_label) == false, the last dimension of "
-                        "Input(Label) should be 1.");
-    }
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
-                      "The last dimension of Input(Y@Grad) should be 1.");
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-    ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X"));
-  }
-
- protected:
-  // Explicitly set that the data type of computation kernel of cross_entropy
-  // is determined by its input "X".
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
-        ctx.device_context());
-  }
-
-  virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
-    return ctx->GetInputDim("X");
-  }
-
-  virtual const char* VarNameWithXLoD() const { return "X"; }
-
-  virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
-    return ctx->Attrs().Get<bool>("soft_label");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle

From 487624e15def7ee99ee8b8def64dff905061b118 Mon Sep 17 00:00:00 2001
From: sneaxiy <sneaxiy@126.com>
Date: Wed, 13 Mar 2019 13:14:58 +0000
Subject: [PATCH 5/5] fix travis-ci test=develop

---
 paddle/fluid/operators/cross_entropy_op.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc
index dd1b48cecfdc5b..7e744e68e9737f 100644
--- a/paddle/fluid/operators/cross_entropy_op.cc
+++ b/paddle/fluid/operators/cross_entropy_op.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/cross_entropy_op.h"
+#include <memory>
 #include <string>
+#include <unordered_map>
 
 namespace paddle {
 namespace operators {