Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 158 additions & 19 deletions paddle/fluid/operators/cross_entropy_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/cross_entropy_op.h"
#include <memory>
#include <string>
#include <unordered_map>

namespace paddle {
namespace operators {

class CrossEntropyOp : public framework::OperatorWithKernel {
class CrossEntropyOpBase : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");

PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");

auto x_dims = ctx->GetInputDim("X");
Expand All @@ -43,7 +46,8 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
}
if (ctx->Attrs().Get<bool>("soft_label")) {

if (IsSoftLabel(ctx)) {
if (check) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"If Attr(soft_label) == true, the last dimension of "
Expand All @@ -69,21 +73,24 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.device_context());
}

virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
return ctx->Attrs().Get<bool>("soft_label");
}
};

class CrossEntropyGradientOp : public framework::OperatorWithKernel {
class CrossEntropyGradientOpBase : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
void InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
"Input(Y@GRAD) shoudl be not null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Output(X@GRAD) should be not null.");

auto x_dims = ctx->GetInputDim("X");
auto x_dims = GetXDim(ctx);
auto label_dims = ctx->GetInputDim("Label");
auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
int rank = x_dims.size();
Expand All @@ -108,9 +115,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension.");
}
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1.");
if (ctx->Attrs().Get<bool>("soft_label")) {
if (IsSoftLabel(ctx)) {
if (check) {
PADDLE_ENFORCE_EQ(
x_dims[rank - 1], label_dims[rank - 1],
Expand All @@ -123,16 +128,39 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"Input(Label) should be 1.");
}
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD("X", framework::GradVarName("X"));
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1.");
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X"));
}

protected:
// Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.device_context());
return framework::OpKernelType(
ctx.Input<Tensor>(framework::GradVarName("Y"))->type(),
ctx.device_context());
}

virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
return ctx->GetInputDim("X");
}

virtual const char* VarNameWithXLoD() const { return "X"; }

virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
return ctx->Attrs().Get<bool>("soft_label");
}
};

class CrossEntropyOpInferVarType
: public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
const override {
return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
}
};

Expand Down Expand Up @@ -200,26 +228,137 @@ or not. But the output only shares the LoD information with input X.
}
};

class CrossEntropyOpInferVarType
: public framework::PassInDtypeAndVarTypeToOutput {
class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
public:
using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;

void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
CrossEntropyGradientOpBase::InferShape(ctx);
}
};

class CrossEntropyOp2 : public CrossEntropyOpBase {
public:
using CrossEntropyOpBase::CrossEntropyOpBase;

void InferShape(framework::InferShapeContext* ctx) const override {
CrossEntropyOpBase::InferShape(ctx);

PADDLE_ENFORCE(ctx->HasOutput("XShape"),
"Output(XShape) should be not null.");

auto x_dims = ctx->GetInputDim("X");
auto x_dims_vec = framework::vectorize(x_dims);
x_dims_vec.push_back(0);
ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec));
ctx->ShareLoD("X", /*->*/ "XShape");
}

protected:
std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
const override {
return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
bool IsSoftLabel(framework::InferShapeContext* ctx) const override {
return false;
}
};

class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase {
public:
using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase;

protected:
virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const {
auto x_shape = ctx->GetInputDim("XShape");
return framework::DDim(x_shape.Get(), x_shape.size() - 1);
}

virtual const char* VarNameWithXLoD() const { return "XShape"; }

virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const {
return false;
}
};

class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(Tensor, default Tensor<float>), a tensor whose last dimension "
"size is equal to the number of classes. This input is a "
"probability computed by the previous operator, which is almost "
"always the result of a softmax operator.");
AddInput(
"Label",
"(Tensor), the tensor which represents the ground truth. It has the "
"same shape with 'X' except the last dimension. One hot Tensor.");
AddOutput("Y",
"(Tensor, default Tensor<float>), a tensor whose shape is same "
"with 'X' except that the last dimension size is 1. It "
"represents the cross entropy loss.");
AddOutput("XShape", "Temporaily variable to save shape and LoD of X.");
AddAttr<int>("ignore_index",
"(int, default -100), Specifies a target value that is"
"ignored and does not contribute to the input gradient."
"Only valid if soft_label is set to False")
.SetDefault(-100);
AddComment(R"DOC(
Hard-label CrossEntropy Operator.

The input 'X' and 'Label' will first be logically flattened to 2-D matrixs.
The matrix's second dimension(row length) is as same as the original last
dimension, and the first dimension(column length) is the product of all other
original dimensions. Then the softmax computation will take palce on each raw
of flattened matrixs.

Only support hard label.

Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.

)DOC");
}
};

class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;

protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("cross_entropy_grad2");
op->SetInput("Label", Input("Label"));
op->SetInput("Y", Output("Y"));
op->SetInput("XShape", Output("XShape"));
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext;

REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker,
ops::CrossEntropyOpInferVarType,
REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase,
ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp);
REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<CPUCtx, float>,
ops::CrossEntropyOpKernel<CPUCtx, double>);
REGISTER_OP_CPU_KERNEL(cross_entropy_grad,
ops::CrossEntropyGradientOpKernel<CPUCtx, float>,
ops::CrossEntropyGradientOpKernel<CPUCtx, double>);

REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2,
ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType,
ops::CrossEntropyGradOpDescMaker2);
REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2);
REGISTER_OP_CPU_KERNEL(cross_entropy2,
ops::CrossEntropyOpKernel2<CPUCtx, float>,
ops::CrossEntropyOpKernel2<CPUCtx, double>);
REGISTER_OP_CPU_KERNEL(cross_entropy_grad2,
ops::CrossEntropyGradientOpKernel2<CPUCtx, float>,
ops::CrossEntropyGradientOpKernel2<CPUCtx, double>);
10 changes: 10 additions & 0 deletions paddle/fluid/operators/cross_entropy_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,13 @@ REGISTER_OP_CUDA_KERNEL(
cross_entropy_grad, ops::CrossEntropyGradientOpKernel<CUDACtx, float>,
ops::CrossEntropyGradientOpKernel<CUDACtx, double>,
ops::CrossEntropyGradientOpKernel<CUDACtx, plat::float16>);

REGISTER_OP_CUDA_KERNEL(cross_entropy2,
ops::CrossEntropyOpKernel2<CUDACtx, float>,
ops::CrossEntropyOpKernel2<CUDACtx, double>,
ops::CrossEntropyOpKernel2<CUDACtx, plat::float16>);

REGISTER_OP_CUDA_KERNEL(
cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2<CUDACtx, float>,
ops::CrossEntropyGradientOpKernel2<CUDACtx, double>,
ops::CrossEntropyGradientOpKernel2<CUDACtx, plat::float16>);
81 changes: 81 additions & 0 deletions paddle/fluid/operators/cross_entropy_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
Expand Down Expand Up @@ -137,5 +138,85 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
}
};

template <typename T>
struct HardLabelCrossEntropyBackwardFunctor {
HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy,
const int64_t* label,
int64_t ignore_index,
int64_t feature_size)
: dx_(dx),
y_(y),
dy_(dy),
label_(label),
ignore_index_(ignore_index),
feature_size_(feature_size) {}

HOSTDEVICE void operator()(int64_t idx) const {
auto row_idx = idx / feature_size_;
auto col_idx = idx % feature_size_;
auto label = label_[row_idx];
if (label == col_idx && label != ignore_index_) {
dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]);
} else {
dx_[idx] = 0;
}
}

T* dx_;
const T* y_;
const T* dy_;
const int64_t* label_;
int64_t ignore_index_;
int64_t feature_size_;
};

template <typename DeviceContext, typename T>
class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x_original = ctx.Input<Tensor>("X");
int rank = x_original->dims().size();

auto x = framework::ReshapeToMatrix(*x_original, rank - 1);
auto label =
framework::ReshapeToMatrix(*ctx.Input<Tensor>("Label"), rank - 1);
auto* y = ctx.Output<Tensor>("Y");
y->mutable_data<T>(ctx.GetPlace());

auto ignore_index = ctx.Attr<int>("ignore_index");

math::CrossEntropyFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), y, &x, &label, false,
ignore_index);
}
};

template <typename DeviceContext, typename T>
class CrossEntropyGradientOpKernel2 : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* y = ctx.Input<Tensor>("Y");
auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* label = ctx.Input<Tensor>("Label");

auto* p_dx = dx->mutable_data<T>(ctx.GetPlace());
auto* p_y = y->data<T>();
auto* p_dy = dy->data<T>();
auto* p_label = label->data<int64_t>();

int64_t ignore_index = ctx.Attr<int>("ignore_index");
int rank = dx->dims().size();
int64_t feature_size = dx->dims()[rank - 1];
int64_t batch_size = framework::product(dx->dims()) / feature_size;

platform::ForRange<DeviceContext> for_range(
ctx.template device_context<DeviceContext>(),
batch_size * feature_size);
for_range(HardLabelCrossEntropyBackwardFunctor<T>(
p_dx, p_y, p_dy, p_label, ignore_index, feature_size));
}
};

} // namespace operators
} // namespace paddle
Loading