From d7407c90aa3ee847fda052fdca9f10b788249875 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Fri, 8 Mar 2019 13:01:30 +0000 Subject: [PATCH 1/5] refine cross_entropy mem test=develop --- paddle/fluid/operators/cross_entropy2_op.cc | 218 ++++++++++++++++++++ paddle/fluid/operators/cross_entropy2_op.cu | 29 +++ paddle/fluid/operators/cross_entropy2_op.h | 188 +++++++++++++++++ python/paddle/fluid/layers/nn.py | 16 ++ 4 files changed, 451 insertions(+) create mode 100644 paddle/fluid/operators/cross_entropy2_op.cc create mode 100644 paddle/fluid/operators/cross_entropy2_op.cu create mode 100644 paddle/fluid/operators/cross_entropy2_op.h diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc new file mode 100644 index 00000000000000..03b217a974c7bb --- /dev/null +++ b/paddle/fluid/operators/cross_entropy2_op.cc @@ -0,0 +1,218 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/cross_entropy2_op.h" +#include +#include +#include + +namespace paddle { +namespace operators { + +class CrossEntropyOp2 : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + + PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("XShape"), + "Output(XShape) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto label_dims = ctx->GetInputDim("Label"); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, label_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } + + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, + "Last dimension of Input(Label) should be 1."); + auto y_dims = x_dims; + y_dims[rank - 1] = 1; + ctx->SetOutputDim("Y", y_dims); + ctx->ShareLoD("X", /*->*/ "Y"); + + auto x_dims_vec = framework::vectorize(x_dims); + x_dims_vec.push_back(0); + ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); + ctx->ShareLoD("X", /*->*/ "XShape"); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } +}; + +class CrossEntropyGradientOp2 : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("XShape"), + "Input(XShape) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null."); + + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) shoudl be not null."); + + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Output(X@GRAD) should be not null."); + + auto x_shapes = ctx->GetInputDim("XShape"); + framework::DDim x_dims(x_shapes.Get(), x_shapes.size() - 1); + auto label_dims = ctx->GetInputDim("Label"); + auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(dy_dims.size(), rank, + "Input(Y@Grad) and Input(X) should have the same rank."); + PADDLE_ENFORCE_EQ(label_dims.size(), rank, + "Input(Label) and Input(X) should have the same rank."); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + } + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, + "Last dimension of Input(Label) should be 1."); + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD("XShape", framework::GradVarName("X")); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + ctx.Input(framework::GradVarName("Y"))->type(), + ctx.device_context()); + } +}; + +class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a tensor whose last dimension " + "size is equal to the number of classes. This input is a " + "probability computed by the previous operator, which is almost " + "always the result of a softmax operator."); + AddInput( + "Label", + "(Tensor), the tensor which represents the ground truth. It has the " + "same shape with 'X' except the last dimension. One hot Tensor."); + AddOutput("Y", + "(Tensor, default Tensor), a tensor whose shape is same " + "with 'X' except that the last dimension size is 1. It " + "represents the cross entropy loss."); + AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); + AddAttr("ignore_index", + "(int, default -100), Specifies a target value that is" + "ignored and does not contribute to the input gradient." + "Only valid if soft_label is set to False") + .SetDefault(-100); + AddComment(R"DOC( +CrossEntropy Operator. + +The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. +The matrix's second dimension(row length) is as same as the original last +dimension, and the first dimension(column length) is the product of all other +original dimensions. Then the softmax computation will take palce on each raw +of flattened matrixs. + +Only support hard label. + +Both the input X and Label can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input X. + +)DOC"); + } +}; + +class CrossEntropyOpInferVarType2 + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; + +class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("cross_entropy_grad2"); + op->SetInput("Label", Input("Label")); + op->SetInput("Y", Output("Y")); + op->SetInput("XShape", Output("XShape")); + op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CPUCtx = paddle::platform::CPUDeviceContext; + +REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, + ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType2, + ops::CrossEntropyGradOpMaker2); +REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); +REGISTER_OP_CPU_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); +REGISTER_OP_CPU_KERNEL(cross_entropy_grad2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy2_op.cu b/paddle/fluid/operators/cross_entropy2_op.cu new file mode 100644 index 00000000000000..1868c1b866016d --- /dev/null +++ b/paddle/fluid/operators/cross_entropy2_op.cu @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/cross_entropy2_op.h" +#include "paddle/fluid/platform/float16.h" + +namespace plat = paddle::platform; +namespace ops = paddle::operators; +using CUDACtx = paddle::platform::CUDADeviceContext; +REGISTER_OP_CUDA_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); + +REGISTER_OP_CUDA_KERNEL( + cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h new file mode 100644 index 00000000000000..3d209f7c5c95c2 --- /dev/null +++ b/paddle/fluid/operators/cross_entropy2_op.h @@ -0,0 +1,188 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/cross_entropy.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/for_range.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +HOSTDEVICE inline platform::float16 RealLog(platform::float16 x) { +#ifdef __NVCC__ + return static_cast(logf(static_cast(x))); +#else + return static_cast(std::log(static_cast(x))); +#endif +} + +HOSTDEVICE inline float RealLog(float x) { +#ifdef __NVCC__ + return logf(x); +#else + return std::log(x); +#endif +} + +HOSTDEVICE inline double RealLog(double x) { +#ifdef __NVCC__ + return log(x); +#else + return std::log(x); +#endif +} + +HOSTDEVICE inline platform::float16 RealExp(platform::float16 x) { +#ifdef __NVCC__ + return static_cast(expf(static_cast(x))); +#else + return static_cast(std::exp(static_cast(x))); +#endif +} + +HOSTDEVICE inline float RealExp(float x) { +#ifdef __NVCC__ + return expf(x); +#else + return std::exp(x); +#endif +} + +HOSTDEVICE inline double RealExp(double x) { +#ifdef __NVCC__ + return exp(x); +#else + return std::exp(x); +#endif +} + +template +struct CrossEntropyForwardFunctor { + CrossEntropyForwardFunctor(const T *x, T *y, const int64_t *label, + int64_t ignore_index, int64_t feature_size) + : x_(x), + y_(y), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t row_idx) const { + auto col_idx = label_[row_idx]; + if (col_idx != ignore_index_) { + y_[row_idx] = -math::TolerableValue()( + RealLog(x_[row_idx * feature_size_ + col_idx])); + } else { + y_[row_idx] = 0; + } + } + + const T *x_; + T *y_; + const int64_t *label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + +template +struct CrossEntropyBackwardFunctor { + CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy, + const int64_t *label, int64_t ignore_index, + int64_t feature_size) + : dx_(dx), + y_(y), + dy_(dy), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t idx) const { + auto row_idx = idx / feature_size_; + auto col_idx = idx % feature_size_; + auto label = label_[row_idx]; + if (label == col_idx && label != ignore_index_) { + dx_[idx] = -dy_[row_idx] * RealExp(y_[row_idx]); + } else { + dx_[idx] = 0; + } + } + + T *dx_; + const T *y_; + const T *dy_; + const int64_t *label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + +template +class CrossEntropyOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *x = ctx.Input("X"); + auto *label = ctx.Input("Label"); + auto *y = ctx.Output("Y"); + + auto *p_y = y->mutable_data(ctx.GetPlace()); + auto *p_x = x->data(); + auto *p_label = label->data(); + + int rank = x->dims().size(); + int64_t feature_size = x->dims()[rank - 1]; + int64_t batch_size = framework::product(x->dims()) / feature_size; + + int64_t ignore_index = ctx.Attr("ignore_index"); + + platform::ForRange for_range( + ctx.template device_context(), batch_size); + for_range(CrossEntropyForwardFunctor(p_x, p_y, p_label, ignore_index, + feature_size)); + } +}; + +template +class CrossEntropyGradientOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *y = ctx.Input("Y"); + auto *dy = ctx.Input(framework::GradVarName("Y")); + auto *label = ctx.Input("Label"); + + auto *p_dx = dx->mutable_data(ctx.GetPlace()); + auto *p_y = y->data(); + auto *p_dy = dy->data(); + auto *p_label = label->data(); + + int64_t ignore_index = ctx.Attr("ignore_index"); + int rank = dx->dims().size(); + int64_t feature_size = dx->dims()[rank - 1]; + int64_t batch_size = framework::product(dx->dims()) / feature_size; + + platform::ForRange for_range( + ctx.template device_context(), + batch_size * feature_size); + for_range(CrossEntropyBackwardFunctor(p_dx, p_y, p_dy, p_label, + ignore_index, feature_size)); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9d1d5fe0932ea8..4f384ce37d7c88 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1432,6 +1432,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) """ + if not soft_label: + return cross_entropy2(input, label, ignore_index) helper = LayerHelper('cross_entropy', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op( @@ -1444,6 +1446,20 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): return out +def cross_entropy2(input, label, ignore_index=kIgnoreIndex): + helper = LayerHelper('cross_entropy2', **locals()) + out = helper.create_variable_for_type_inference(dtype=input.dtype) + xshape = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='cross_entropy2', + inputs={'X': [input], + 'Label': [label]}, + outputs={'Y': [out], + 'XShape': [xshape]}, + attrs={'ignore_index': ignore_index}) + return out + + def bpr_loss(input, label, name=None): """ Bayesian Personalized Ranking Loss Operator. From cfd012e2cb82dc0a2f4ddcc0d23eeefbb28aff0a Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Mon, 11 Mar 2019 04:05:00 +0000 Subject: [PATCH 2/5] add unittest test=develop --- paddle/fluid/operators/expand_op.cc | 18 ++++- .../tests/unittests/test_cross_entropy2_op.py | 79 +++++++++++++++++++ .../tests/unittests/test_dist_transpiler.py | 20 ++--- 3 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 44a2f37b667724..ce3d9a7aacb72a 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -138,12 +138,28 @@ class ExpandGradOp : public framework::OperatorWithKernel { } }; +class ExpandGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("expand_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::ExpandGradOpDescMaker); REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp); REGISTER_OP_CPU_KERNEL( expand, ops::ExpandKernel, diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py new file mode 100644 index 00000000000000..c29d422361b2fc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py @@ -0,0 +1,79 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from op_test import OpTest +import unittest +import numpy as np +import six + + +class CrossEntropy2OpTestBase(OpTest): + def initParameters(self): + return [32, 64], 'float32', -100 + + def calc_output(self, logits, label, ignore_index): + ret = np.zeros(shape=label.shape, dtype=logits.dtype) + for idx in six.moves.range(label.shape[0]): + if label[idx] == ignore_index: + continue + ret[idx] = -np.log(logits[idx][label[idx]]) + return ret + + def setUp(self): + self.shape, self.dtype, self.ignore_index = self.initParameters() + self.op_type = 'cross_entropy2' + feature_size = int(self.shape[-1]) + batch_size = int(np.prod(self.shape) / feature_size) + logits = (np.random.random(size=self.shape) + 1).astype(self.dtype) + label = np.random.random_integers( + low=0, high=feature_size - 1, + size=self.shape[0:-1] + [1]).astype('int64') + outputs = self.calc_output( + np.reshape(logits, [batch_size, feature_size]), + np.reshape(label, [batch_size, 1]), self.ignore_index) + self.inputs = {'X': logits, 'Label': label} + self.outputs = { + 'Y': np.reshape(outputs, label.shape), + 'XShape': np.zeros( + shape=logits.shape, dtype=logits.dtype) + } + self.attrs = {'ignore_index': self.ignore_index} + + def test_check_output(self): + self.check_output(no_check_set=['XShape']) + + def test_check_grad(self): + self.check_grad( + inputs_to_check=['X'], + output_names=['Y'], + no_grad_set=['XShape', 'Label']) + + +class CrossEntropy2OpTest2(CrossEntropy2OpTestBase): + def initParameters(self): + return [32, 64], 'float64', 3 + + +class CrossEntropy2OpTest3(CrossEntropy2OpTestBase): + def initParameters(self): + return [4, 8, 16, 32], 'float32', -100 + + +class CrossEntropy2OpTest4(CrossEntropy2OpTestBase): + def initParameters(self): + return [4, 8, 16, 32], 'float32', 3 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 12132477d28c74..f81d4fda50be19 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -524,8 +524,8 @@ def transpiler_test_impl(self): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -564,8 +564,8 @@ def transpiler_test_impl(self): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -612,8 +612,8 @@ def transpiler_test_impl(self): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -652,8 +652,8 @@ def transpiler_test_impl(self): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -841,8 +841,8 @@ def transpiler_test_impl(self): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', From b26e9bd2326029de54901031ba93458f32a0db5b Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Tue, 12 Mar 2019 03:48:33 +0000 Subject: [PATCH 3/5] refine code test=develop --- paddle/fluid/operators/cross_entropy2_op.cc | 117 ++---------- paddle/fluid/operators/cross_entropy2_op.h | 104 ++--------- paddle/fluid/operators/cross_entropy_op.cc | 137 ++------------ .../fluid/operators/cross_entropy_op_base.h | 169 ++++++++++++++++++ paddle/fluid/operators/expand_op.cc | 1 + paddle/fluid/operators/math.h | 42 +++++ paddle/fluid/operators/math/cross_entropy.cu | 13 +- paddle/fluid/operators/selu_op.h | 5 +- .../sequence_ops/sequence_softmax_op.cu | 4 +- .../sigmoid_cross_entropy_with_logits_op.cu | 6 +- 10 files changed, 259 insertions(+), 339 deletions(-) create mode 100644 paddle/fluid/operators/cross_entropy_op_base.h create mode 100644 paddle/fluid/operators/math.h diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc index 03b217a974c7bb..181d373cfc3d28 100644 --- a/paddle/fluid/operators/cross_entropy2_op.cc +++ b/paddle/fluid/operators/cross_entropy2_op.cc @@ -16,46 +16,22 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/operators/cross_entropy_op_base.h" namespace paddle { namespace operators { -class CrossEntropyOp2 : public framework::OperatorWithKernel { +class CrossEntropyOp2 : public CrossEntropyOpBase { public: - using framework::OperatorWithKernel::OperatorWithKernel; + using CrossEntropyOpBase::CrossEntropyOpBase; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + CrossEntropyOpBase::InferShape(ctx); - PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); PADDLE_ENFORCE(ctx->HasOutput("XShape"), "Output(XShape) should be not null."); auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(rank, label_dims.size(), - "Input(X) and Input(Label) shall have the same rank."); - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension."); - } - - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, - "Last dimension of Input(Label) should be 1."); - auto y_dims = x_dims; - y_dims[rank - 1] = 1; - ctx->SetOutputDim("Y", y_dims); - ctx->ShareLoD("X", /*->*/ "Y"); - auto x_dims_vec = framework::vectorize(x_dims); x_dims_vec.push_back(0); ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); @@ -63,73 +39,25 @@ class CrossEntropyOp2 : public framework::OperatorWithKernel { } protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); + bool IsSoftLabel(framework::InferShapeContext* ctx) const override { + return false; } }; -class CrossEntropyGradientOp2 : public framework::OperatorWithKernel { +class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { public: - using framework::OperatorWithKernel::OperatorWithKernel; + using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("XShape"), - "Input(XShape) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null."); - - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), - "Input(Y@GRAD) shoudl be not null."); - - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Output(X@GRAD) should be not null."); - - auto x_shapes = ctx->GetInputDim("XShape"); - framework::DDim x_dims(x_shapes.Get(), x_shapes.size() - 1); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(dy_dims.size(), rank, - "Input(Y@Grad) and Input(X) should have the same rank."); - PADDLE_ENFORCE_EQ(label_dims.size(), rank, - "Input(Label) and Input(X) should have the same rank."); - - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(dy_dims, 0, rank - 1), - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension."); - } - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, - "The last dimension of Input(Y@Grad) should be 1."); - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, - "Last dimension of Input(Label) should be 1."); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("XShape", framework::GradVarName("X")); + protected: + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + auto x_shape = ctx->GetInputDim("XShape"); + return framework::DDim(x_shape.Get(), x_shape.size() - 1); } - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - ctx.Input(framework::GradVarName("Y"))->type(), - ctx.device_context()); + virtual const char* VarNameWithXLoD() const { return "XShape"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return false; } }; @@ -156,7 +84,7 @@ class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { "Only valid if soft_label is set to False") .SetDefault(-100); AddComment(R"DOC( -CrossEntropy Operator. +Hard-label CrossEntropy Operator. The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. The matrix's second dimension(row length) is as same as the original last @@ -173,15 +101,6 @@ or not. But the output only shares the LoD information with input X. } }; -class CrossEntropyOpInferVarType2 - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map GetInputOutputWithSameType() - const override { - return std::unordered_map{{"X", /*->*/ "Y"}}; - } -}; - class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker { public: using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; @@ -207,7 +126,7 @@ namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, - ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType2, + ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType, ops::CrossEntropyGradOpMaker2); REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); REGISTER_OP_CPU_KERNEL(cross_entropy2, diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h index 3d209f7c5c95c2..3e9dc7ebce263d 100644 --- a/paddle/fluid/operators/cross_entropy2_op.h +++ b/paddle/fluid/operators/cross_entropy2_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" @@ -26,81 +27,6 @@ namespace operators { using Tensor = framework::Tensor; -HOSTDEVICE inline platform::float16 RealLog(platform::float16 x) { -#ifdef __NVCC__ - return static_cast(logf(static_cast(x))); -#else - return static_cast(std::log(static_cast(x))); -#endif -} - -HOSTDEVICE inline float RealLog(float x) { -#ifdef __NVCC__ - return logf(x); -#else - return std::log(x); -#endif -} - -HOSTDEVICE inline double RealLog(double x) { -#ifdef __NVCC__ - return log(x); -#else - return std::log(x); -#endif -} - -HOSTDEVICE inline platform::float16 RealExp(platform::float16 x) { -#ifdef __NVCC__ - return static_cast(expf(static_cast(x))); -#else - return static_cast(std::exp(static_cast(x))); -#endif -} - -HOSTDEVICE inline float RealExp(float x) { -#ifdef __NVCC__ - return expf(x); -#else - return std::exp(x); -#endif -} - -HOSTDEVICE inline double RealExp(double x) { -#ifdef __NVCC__ - return exp(x); -#else - return std::exp(x); -#endif -} - -template -struct CrossEntropyForwardFunctor { - CrossEntropyForwardFunctor(const T *x, T *y, const int64_t *label, - int64_t ignore_index, int64_t feature_size) - : x_(x), - y_(y), - label_(label), - ignore_index_(ignore_index), - feature_size_(feature_size) {} - - HOSTDEVICE void operator()(int64_t row_idx) const { - auto col_idx = label_[row_idx]; - if (col_idx != ignore_index_) { - y_[row_idx] = -math::TolerableValue()( - RealLog(x_[row_idx * feature_size_ + col_idx])); - } else { - y_[row_idx] = 0; - } - } - - const T *x_; - T *y_; - const int64_t *label_; - int64_t ignore_index_; - int64_t feature_size_; -}; - template struct CrossEntropyBackwardFunctor { CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy, @@ -118,7 +44,7 @@ struct CrossEntropyBackwardFunctor { auto col_idx = idx % feature_size_; auto label = label_[row_idx]; if (label == col_idx && label != ignore_index_) { - dx_[idx] = -dy_[row_idx] * RealExp(y_[row_idx]); + dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]); } else { dx_[idx] = 0; } @@ -136,24 +62,20 @@ template class CrossEntropyOpKernel2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *label = ctx.Input("Label"); - auto *y = ctx.Output("Y"); + auto *x_original = ctx.Input("X"); + int rank = x_original->dims().size(); - auto *p_y = y->mutable_data(ctx.GetPlace()); - auto *p_x = x->data(); - auto *p_label = label->data(); + auto x = framework::ReshapeToMatrix(*x_original, rank - 1); + auto label = + framework::ReshapeToMatrix(*ctx.Input("Label"), rank - 1); + auto *y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); - int rank = x->dims().size(); - int64_t feature_size = x->dims()[rank - 1]; - int64_t batch_size = framework::product(x->dims()) / feature_size; + auto ignore_index = ctx.Attr("ignore_index"); - int64_t ignore_index = ctx.Attr("ignore_index"); - - platform::ForRange for_range( - ctx.template device_context(), batch_size); - for_range(CrossEntropyForwardFunctor(p_x, p_y, p_label, ignore_index, - feature_size)); + math::CrossEntropyFunctor()( + ctx.template device_context(), y, &x, &label, false, + ignore_index); } }; diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 3adc7baebddd06..1707f7078cad82 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -14,128 +14,11 @@ limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" #include +#include "paddle/fluid/operators/cross_entropy_op_base.h" namespace paddle { namespace operators { -class CrossEntropyOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(rank, label_dims.size(), - "Input(X) and Input(Label) shall have the same rank."); - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension."); - } - if (ctx->Attrs().Get("soft_label")) { - if (check) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "If Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); - } - } else { - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, - "If Attr(softLabel) == false, the last dimension of " - "Input(Label) should be 1."); - } - - auto y_dims = x_dims; - y_dims[rank - 1] = 1; - ctx->SetOutputDim("Y", y_dims); - ctx->ShareLoD("X", /*->*/ "Y"); - } - - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); - } -}; - -class CrossEntropyGradientOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), - "Input(Y@GRAD) shoudl be not null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Output(X@GRAD) should be not null."); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(dy_dims.size(), rank, - "Input(Y@Grad) and Input(X) should have the same rank."); - PADDLE_ENFORCE_EQ(label_dims.size(), rank, - "Input(Label) and Input(X) should have the same rank."); - - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(dy_dims, 0, rank - 1), - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension."); - } - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, - "The last dimension of Input(Y@Grad) should be 1."); - if (ctx->Attrs().Get("soft_label")) { - if (check) { - PADDLE_ENFORCE_EQ( - x_dims[rank - 1], label_dims[rank - 1], - "When Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); - } - } else { - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, - "When Attr(soft_label) == false, the last dimension of " - "Input(Label) should be 1."); - } - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); - } -}; - class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { @@ -200,22 +83,24 @@ or not. But the output only shares the LoD information with input X. } }; -class CrossEntropyOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map GetInputOutputWithSameType() - const override { - return std::unordered_map{{"X", /*->*/ "Y"}}; +class CrossEntropyGradientOp : public CrossEntropyGradientOpBase { + public: + using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + CrossEntropyGradientOpBase::InferShape(ctx); } }; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; -REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, - ops::CrossEntropyOpInferVarType, +REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase, + ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, diff --git a/paddle/fluid/operators/cross_entropy_op_base.h b/paddle/fluid/operators/cross_entropy_op_base.h new file mode 100644 index 00000000000000..c3e5254c37e029 --- /dev/null +++ b/paddle/fluid/operators/cross_entropy_op_base.h @@ -0,0 +1,169 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class CrossEntropyOpBase : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + + PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto label_dims = ctx->GetInputDim("Label"); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, label_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } + + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } + } else { + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, + "If Attr(softLabel) == false, the last dimension of " + "Input(Label) should be 1."); + } + + auto y_dims = x_dims; + y_dims[rank - 1] = 1; + ctx->SetOutputDim("Y", y_dims); + ctx->ShareLoD("X", /*->*/ "Y"); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } +}; + +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; + +class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) shoudl be not null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Output(X@GRAD) should be not null."); + + auto x_dims = GetXDim(ctx); + auto label_dims = ctx->GetInputDim("Label"); + auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(dy_dims.size(), rank, + "Input(Y@Grad) and Input(X) should have the same rank."); + PADDLE_ENFORCE_EQ(label_dims.size(), rank, + "Input(Label) and Input(X) should have the same rank."); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + } + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ( + x_dims[rank - 1], label_dims[rank - 1], + "When Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } + } else { + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, + "When Attr(soft_label) == false, the last dimension of " + "Input(Label) should be 1."); + } + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X")); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + ctx.Input(framework::GradVarName("Y"))->type(), + ctx.device_context()); + } + + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + return ctx->GetInputDim("X"); + } + + virtual const char* VarNameWithXLoD() const { return "X"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index ce3d9a7aacb72a..fcb2be93635eea 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" +#include #include namespace paddle { diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h new file mode 100644 index 00000000000000..8cc24200d37dff --- /dev/null +++ b/paddle/fluid/operators/math.h @@ -0,0 +1,42 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/hostdevice.h" + +#include "math.h" // NOLINT + +namespace paddle { +namespace operators { + +inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) { + return static_cast(::expf(static_cast(x))); +} + +inline HOSTDEVICE float real_exp(float x) { return ::expf(x); } + +inline HOSTDEVICE double real_exp(double x) { return ::exp(x); } + +inline HOSTDEVICE platform::float16 real_log(platform::float16 x) { + return static_cast(::logf(static_cast(x))); +} + +inline HOSTDEVICE float real_log(float x) { return ::logf(x); } + +inline HOSTDEVICE double real_log(double x) { return ::log(x); } + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index cb200ec8d6ea53..44cbdf2e988219 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/platform/cuda_device_function.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -20,17 +21,6 @@ namespace paddle { namespace operators { namespace math { -namespace { - -__device__ __forceinline__ float real_log(float x) { return logf(x); } - -__device__ __forceinline__ double real_log(double x) { return log(x); } - -__device__ __forceinline__ platform::float16 real_log( - const platform::float16& val) { - return static_cast(logf(static_cast(val))); -} - template __global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label, const int N, const int D, @@ -61,7 +51,6 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, Y[blockIdx.x] = -val; } } -} // namespace template class CrossEntropyFunctor { diff --git a/paddle/fluid/operators/selu_op.h b/paddle/fluid/operators/selu_op.h index bdb506885c9327..b2fc834c42f65f 100644 --- a/paddle/fluid/operators/selu_op.h +++ b/paddle/fluid/operators/selu_op.h @@ -15,13 +15,12 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/for_range.h" + namespace paddle { namespace operators { -static HOSTDEVICE float real_exp(float x) { return expf(x); } -static HOSTDEVICE float real_exp(double x) { return exp(x); } - template struct SeluFunctor { SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr) diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index cc5e9821903fb7..a9dc0a4fda253d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include // NOLINT +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h" namespace paddle { @@ -21,9 +22,6 @@ namespace operators { using LoDTensor = framework::LoDTensor; -__device__ __forceinline__ float real_exp(float x) { return expf(x); } -__device__ __forceinline__ double real_exp(double x) { return exp(x); } - template using BlockReduce = cub::BlockReduce; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu index 2a4570ef5cec0b..aea69de6434a38 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "cub/cub.cuh" +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/hostdevice.h" @@ -21,11 +22,6 @@ namespace operators { using Tensor = framework::Tensor; -static HOSTDEVICE float real_exp(float x) { return expf(x); } -static HOSTDEVICE float real_exp(double x) { return exp(x); } -static HOSTDEVICE float real_log(float x) { return logf(x); } -static HOSTDEVICE float real_log(double x) { return log(x); } - static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; From 1e9fd40777c6055845b65f592bebd2cbab4728a9 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Wed, 13 Mar 2019 11:03:26 +0000 Subject: [PATCH 4/5] combine op files test=develop --- paddle/fluid/operators/cross_entropy2_op.cc | 137 ---------- paddle/fluid/operators/cross_entropy2_op.cu | 29 -- paddle/fluid/operators/cross_entropy2_op.h | 110 -------- paddle/fluid/operators/cross_entropy_op.cc | 256 +++++++++++++++++- paddle/fluid/operators/cross_entropy_op.cu | 10 + paddle/fluid/operators/cross_entropy_op.h | 81 ++++++ .../fluid/operators/cross_entropy_op_base.h | 169 ------------ 7 files changed, 345 insertions(+), 447 deletions(-) delete mode 100644 paddle/fluid/operators/cross_entropy2_op.cc delete mode 100644 paddle/fluid/operators/cross_entropy2_op.cu delete mode 100644 paddle/fluid/operators/cross_entropy2_op.h delete mode 100644 paddle/fluid/operators/cross_entropy_op_base.h diff --git a/paddle/fluid/operators/cross_entropy2_op.cc b/paddle/fluid/operators/cross_entropy2_op.cc deleted file mode 100644 index 181d373cfc3d28..00000000000000 --- a/paddle/fluid/operators/cross_entropy2_op.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cross_entropy2_op.h" -#include -#include -#include -#include "paddle/fluid/operators/cross_entropy_op_base.h" - -namespace paddle { -namespace operators { - -class CrossEntropyOp2 : public CrossEntropyOpBase { - public: - using CrossEntropyOpBase::CrossEntropyOpBase; - - void InferShape(framework::InferShapeContext* ctx) const override { - CrossEntropyOpBase::InferShape(ctx); - - PADDLE_ENFORCE(ctx->HasOutput("XShape"), - "Output(XShape) should be not null."); - - auto x_dims = ctx->GetInputDim("X"); - auto x_dims_vec = framework::vectorize(x_dims); - x_dims_vec.push_back(0); - ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); - ctx->ShareLoD("X", /*->*/ "XShape"); - } - - protected: - bool IsSoftLabel(framework::InferShapeContext* ctx) const override { - return false; - } -}; - -class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { - public: - using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; - - protected: - virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { - auto x_shape = ctx->GetInputDim("XShape"); - return framework::DDim(x_shape.Get(), x_shape.size() - 1); - } - - virtual const char* VarNameWithXLoD() const { return "XShape"; } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return false; - } -}; - -class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), a tensor whose last dimension " - "size is equal to the number of classes. This input is a " - "probability computed by the previous operator, which is almost " - "always the result of a softmax operator."); - AddInput( - "Label", - "(Tensor), the tensor which represents the ground truth. It has the " - "same shape with 'X' except the last dimension. One hot Tensor."); - AddOutput("Y", - "(Tensor, default Tensor), a tensor whose shape is same " - "with 'X' except that the last dimension size is 1. It " - "represents the cross entropy loss."); - AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); - AddAttr("ignore_index", - "(int, default -100), Specifies a target value that is" - "ignored and does not contribute to the input gradient." - "Only valid if soft_label is set to False") - .SetDefault(-100); - AddComment(R"DOC( -Hard-label CrossEntropy Operator. - -The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. -The matrix's second dimension(row length) is as same as the original last -dimension, and the first dimension(column length) is the product of all other -original dimensions. Then the softmax computation will take palce on each raw -of flattened matrixs. - -Only support hard label. - -Both the input X and Label can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD information with input X. - -)DOC"); - } -}; - -class CrossEntropyGradOpMaker2 : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - std::unique_ptr op(new framework::OpDesc()); - op->SetType("cross_entropy_grad2"); - op->SetInput("Label", Input("Label")); - op->SetInput("Y", Output("Y")); - op->SetInput("XShape", Output("XShape")); - op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - op->SetAttrMap(Attrs()); - return op; - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -using CPUCtx = paddle::platform::CPUDeviceContext; - -REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, - ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType, - ops::CrossEntropyGradOpMaker2); -REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); -REGISTER_OP_CPU_KERNEL(cross_entropy2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2); -REGISTER_OP_CPU_KERNEL(cross_entropy_grad2, - ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy2_op.cu b/paddle/fluid/operators/cross_entropy2_op.cu deleted file mode 100644 index 1868c1b866016d..00000000000000 --- a/paddle/fluid/operators/cross_entropy2_op.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cross_entropy2_op.h" -#include "paddle/fluid/platform/float16.h" - -namespace plat = paddle::platform; -namespace ops = paddle::operators; -using CUDACtx = paddle::platform::CUDADeviceContext; -REGISTER_OP_CUDA_KERNEL(cross_entropy2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2); - -REGISTER_OP_CUDA_KERNEL( - cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy2_op.h b/paddle/fluid/operators/cross_entropy2_op.h deleted file mode 100644 index 3e9dc7ebce263d..00000000000000 --- a/paddle/fluid/operators/cross_entropy2_op.h +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math.h" -#include "paddle/fluid/operators/math/cross_entropy.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/platform/for_range.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -struct CrossEntropyBackwardFunctor { - CrossEntropyBackwardFunctor(T *dx, const T *y, const T *dy, - const int64_t *label, int64_t ignore_index, - int64_t feature_size) - : dx_(dx), - y_(y), - dy_(dy), - label_(label), - ignore_index_(ignore_index), - feature_size_(feature_size) {} - - HOSTDEVICE void operator()(int64_t idx) const { - auto row_idx = idx / feature_size_; - auto col_idx = idx % feature_size_; - auto label = label_[row_idx]; - if (label == col_idx && label != ignore_index_) { - dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]); - } else { - dx_[idx] = 0; - } - } - - T *dx_; - const T *y_; - const T *dy_; - const int64_t *label_; - int64_t ignore_index_; - int64_t feature_size_; -}; - -template -class CrossEntropyOpKernel2 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x_original = ctx.Input("X"); - int rank = x_original->dims().size(); - - auto x = framework::ReshapeToMatrix(*x_original, rank - 1); - auto label = - framework::ReshapeToMatrix(*ctx.Input("Label"), rank - 1); - auto *y = ctx.Output("Y"); - y->mutable_data(ctx.GetPlace()); - - auto ignore_index = ctx.Attr("ignore_index"); - - math::CrossEntropyFunctor()( - ctx.template device_context(), y, &x, &label, false, - ignore_index); - } -}; - -template -class CrossEntropyGradientOpKernel2 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *y = ctx.Input("Y"); - auto *dy = ctx.Input(framework::GradVarName("Y")); - auto *label = ctx.Input("Label"); - - auto *p_dx = dx->mutable_data(ctx.GetPlace()); - auto *p_y = y->data(); - auto *p_dy = dy->data(); - auto *p_label = label->data(); - - int64_t ignore_index = ctx.Attr("ignore_index"); - int rank = dx->dims().size(); - int64_t feature_size = dx->dims()[rank - 1]; - int64_t batch_size = framework::product(dx->dims()) / feature_size; - - platform::ForRange for_range( - ctx.template device_context(), - batch_size * feature_size); - for_range(CrossEntropyBackwardFunctor(p_dx, p_y, p_dy, p_label, - ignore_index, feature_size)); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 1707f7078cad82..dd1b48cecfdc5b 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -14,11 +14,154 @@ limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" #include -#include "paddle/fluid/operators/cross_entropy_op_base.h" namespace paddle { namespace operators { +class CrossEntropyOpBase : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + + PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto label_dims = ctx->GetInputDim("Label"); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, label_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } + + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } + } else { + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, + "If Attr(softLabel) == false, the last dimension of " + "Input(Label) should be 1."); + } + + auto y_dims = x_dims; + y_dims[rank - 1] = 1; + ctx->SetOutputDim("Y", y_dims); + ctx->ShareLoD("X", /*->*/ "Y"); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } +}; + +class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) shoudl be not null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Output(X@GRAD) should be not null."); + + auto x_dims = GetXDim(ctx); + auto label_dims = ctx->GetInputDim("Label"); + auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(dy_dims.size(), rank, + "Input(Y@Grad) and Input(X) should have the same rank."); + PADDLE_ENFORCE_EQ(label_dims.size(), rank, + "Input(Label) and Input(X) should have the same rank."); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + } + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ( + x_dims[rank - 1], label_dims[rank - 1], + "When Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } + } else { + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, + "When Attr(soft_label) == false, the last dimension of " + "Input(Label) should be 1."); + } + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X")); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + ctx.Input(framework::GradVarName("Y"))->type(), + ctx.device_context()); + } + + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + return ctx->GetInputDim("X"); + } + + virtual const char* VarNameWithXLoD() const { return "X"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } +}; + +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; + class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { @@ -87,12 +230,110 @@ class CrossEntropyGradientOp : public CrossEntropyGradientOpBase { public: using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; - void InferShape(framework::InferShapeContext *ctx) const override { + void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); CrossEntropyGradientOpBase::InferShape(ctx); } }; +class CrossEntropyOp2 : public CrossEntropyOpBase { + public: + using CrossEntropyOpBase::CrossEntropyOpBase; + + void InferShape(framework::InferShapeContext* ctx) const override { + CrossEntropyOpBase::InferShape(ctx); + + PADDLE_ENFORCE(ctx->HasOutput("XShape"), + "Output(XShape) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto x_dims_vec = framework::vectorize(x_dims); + x_dims_vec.push_back(0); + ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); + ctx->ShareLoD("X", /*->*/ "XShape"); + } + + protected: + bool IsSoftLabel(framework::InferShapeContext* ctx) const override { + return false; + } +}; + +class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { + public: + using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; + + protected: + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + auto x_shape = ctx->GetInputDim("XShape"); + return framework::DDim(x_shape.Get(), x_shape.size() - 1); + } + + virtual const char* VarNameWithXLoD() const { return "XShape"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return false; + } +}; + +class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a tensor whose last dimension " + "size is equal to the number of classes. This input is a " + "probability computed by the previous operator, which is almost " + "always the result of a softmax operator."); + AddInput( + "Label", + "(Tensor), the tensor which represents the ground truth. It has the " + "same shape with 'X' except the last dimension. One hot Tensor."); + AddOutput("Y", + "(Tensor, default Tensor), a tensor whose shape is same " + "with 'X' except that the last dimension size is 1. It " + "represents the cross entropy loss."); + AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); + AddAttr("ignore_index", + "(int, default -100), Specifies a target value that is" + "ignored and does not contribute to the input gradient." + "Only valid if soft_label is set to False") + .SetDefault(-100); + AddComment(R"DOC( +Hard-label CrossEntropy Operator. + +The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. +The matrix's second dimension(row length) is as same as the original last +dimension, and the first dimension(column length) is the product of all other +original dimensions. Then the softmax computation will take palce on each raw +of flattened matrixs. + +Only support hard label. + +Both the input X and Label can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input X. + +)DOC"); + } +}; + +class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("cross_entropy_grad2"); + op->SetInput("Label", Input("Label")); + op->SetInput("Y", Output("Y")); + op->SetInput("XShape", Output("XShape")); + op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle @@ -108,3 +349,14 @@ REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, REGISTER_OP_CPU_KERNEL(cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); + +REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, + ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType, + ops::CrossEntropyGradOpDescMaker2); +REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); +REGISTER_OP_CPU_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); +REGISTER_OP_CPU_KERNEL(cross_entropy_grad2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu index fcd34383a85f69..243e7f52c1e3c4 100644 --- a/paddle/fluid/operators/cross_entropy_op.cu +++ b/paddle/fluid/operators/cross_entropy_op.cu @@ -27,3 +27,13 @@ REGISTER_OP_CUDA_KERNEL( cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); + +REGISTER_OP_CUDA_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); + +REGISTER_OP_CUDA_KERNEL( + cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index f123e11542d85c..05609e4bc20b1c 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" @@ -137,5 +138,85 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { } }; +template +struct HardLabelCrossEntropyBackwardFunctor { + HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy, + const int64_t* label, + int64_t ignore_index, + int64_t feature_size) + : dx_(dx), + y_(y), + dy_(dy), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t idx) const { + auto row_idx = idx / feature_size_; + auto col_idx = idx % feature_size_; + auto label = label_[row_idx]; + if (label == col_idx && label != ignore_index_) { + dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]); + } else { + dx_[idx] = 0; + } + } + + T* dx_; + const T* y_; + const T* dy_; + const int64_t* label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + +template +class CrossEntropyOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x_original = ctx.Input("X"); + int rank = x_original->dims().size(); + + auto x = framework::ReshapeToMatrix(*x_original, rank - 1); + auto label = + framework::ReshapeToMatrix(*ctx.Input("Label"), rank - 1); + auto* y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + + auto ignore_index = ctx.Attr("ignore_index"); + + math::CrossEntropyFunctor()( + ctx.template device_context(), y, &x, &label, false, + ignore_index); + } +}; + +template +class CrossEntropyGradientOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* y = ctx.Input("Y"); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* label = ctx.Input("Label"); + + auto* p_dx = dx->mutable_data(ctx.GetPlace()); + auto* p_y = y->data(); + auto* p_dy = dy->data(); + auto* p_label = label->data(); + + int64_t ignore_index = ctx.Attr("ignore_index"); + int rank = dx->dims().size(); + int64_t feature_size = dx->dims()[rank - 1]; + int64_t batch_size = framework::product(dx->dims()) / feature_size; + + platform::ForRange for_range( + ctx.template device_context(), + batch_size * feature_size); + for_range(HardLabelCrossEntropyBackwardFunctor( + p_dx, p_y, p_dy, p_label, ignore_index, feature_size)); + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/cross_entropy_op_base.h b/paddle/fluid/operators/cross_entropy_op_base.h deleted file mode 100644 index c3e5254c37e029..00000000000000 --- a/paddle/fluid/operators/cross_entropy_op_base.h +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class CrossEntropyOpBase : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - - PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(rank, label_dims.size(), - "Input(X) and Input(Label) shall have the same rank."); - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension."); - } - - if (IsSoftLabel(ctx)) { - if (check) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "If Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); - } - } else { - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, - "If Attr(softLabel) == false, the last dimension of " - "Input(Label) should be 1."); - } - - auto y_dims = x_dims; - y_dims[rank - 1] = 1; - ctx->SetOutputDim("Y", y_dims); - ctx->ShareLoD("X", /*->*/ "Y"); - } - - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); - } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return ctx->Attrs().Get("soft_label"); - } -}; - -class CrossEntropyOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map GetInputOutputWithSameType() - const override { - return std::unordered_map{{"X", /*->*/ "Y"}}; - } -}; - -class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), - "Input(Y@GRAD) shoudl be not null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Output(X@GRAD) should be not null."); - - auto x_dims = GetXDim(ctx); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ(dy_dims.size(), rank, - "Input(Y@Grad) and Input(X) should have the same rank."); - PADDLE_ENFORCE_EQ(label_dims.size(), rank, - "Input(Label) and Input(X) should have the same rank."); - - bool check = true; - if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || - framework::product(label_dims) <= 0)) { - check = false; - } - - if (check) { - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(dy_dims, 0, rank - 1), - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension."); - } - if (IsSoftLabel(ctx)) { - if (check) { - PADDLE_ENFORCE_EQ( - x_dims[rank - 1], label_dims[rank - 1], - "When Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); - } - } else { - PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, - "When Attr(soft_label) == false, the last dimension of " - "Input(Label) should be 1."); - } - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, - "The last dimension of Input(Y@Grad) should be 1."); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - ctx.Input(framework::GradVarName("Y"))->type(), - ctx.device_context()); - } - - virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { - return ctx->GetInputDim("X"); - } - - virtual const char* VarNameWithXLoD() const { return "X"; } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return ctx->Attrs().Get("soft_label"); - } -}; - -} // namespace operators -} // namespace paddle From 487624e15def7ee99ee8b8def64dff905061b118 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Wed, 13 Mar 2019 13:14:58 +0000 Subject: [PATCH 5/5] fix travis-ci test=develop --- paddle/fluid/operators/cross_entropy_op.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index dd1b48cecfdc5b..7e744e68e9737f 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" +#include #include +#include namespace paddle { namespace operators {