Skip to content

Commit dc62a22

Browse files
authored
Revert "[oneDNN] Fix to issue #34554 (#34623)" (#34838)
This reverts commit 0a5c99e.
1 parent dffb0b2 commit dc62a22

File tree

8 files changed

+279
-436
lines changed

8 files changed

+279
-436
lines changed

paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,13 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
4747
float scale_o = ctx.Attr<float>("Scale_out");
4848
int axis = ctx.Attr<int>("axis");
4949

50-
platform::BinaryMKLDNNHandler<T> handler(BINARY_OP, axis, mkldnn_engine,
51-
ctx.GetPlace(), x, y, z, scale_x,
52-
scale_y, scale_o);
50+
platform::BinaryMKLDNNHandler<T> handler(
51+
BINARY_OP, axis, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z,
52+
scale_x, scale_y, scale_o, ctx.OutputName("Out"));
5353

5454
const auto src_x_memory = handler.AcquireSrcMemory(x);
5555
const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
56-
// (jczaja) For Inplace src and dst should be the same memory object.
57-
// So x should share buffer with z. But UT mechanics is testing inplace
58-
// execution for this op not checking that x can be bradcasted to match in
59-
// shape y tensor.
60-
// This is wrong as when x is to be broadcasted then z(out) will match the
61-
// shape of y which is bigger than x. Hence if x is smaller in shape than z
62-
// and they share a buffer (of
63-
// shape x) then this buffer is not big enough to hold result of elementwise
64-
// operation.
65-
auto dst_memory = (x->numel() == z->numel() && x->IsSharedBufferWith(*z))
66-
? src_x_memory
67-
: handler.AcquireDstMemory(z);
56+
const auto dst_memory = handler.AcquireDstMemory(z);
6857

6958
const auto binary_prim = handler.AcquireForwardPrimitive();
7059

paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
4848
if (dx) {
4949
// dx = dout*y
5050
platform::BinaryMKLDNNHandler<T> handler(
51-
dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
52-
dout, y, dx, 1.0f, 1.0f, 1.0f);
51+
dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine,
52+
ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f,
53+
ctx.InputName(framework::GradVarName("Out")));
5354

5455
const auto src_dout_memory = handler.AcquireSrcMemory(dout);
5556
const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
@@ -74,8 +75,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
7475
// Handler is having nullptr passed instead of output tensor as
7576
// we want Dst buffer to be allocated by oneDNN not to use Tensor
7677
platform::BinaryMKLDNNHandler<T> handler(
77-
dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
78-
dout, x, nullptr, 1.0f, 1.0f, 1.0f);
78+
dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine,
79+
ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f,
80+
ctx.InputName(framework::GradVarName("Out")));
7981

8082
const auto src_dout_memory = handler.AcquireSrcMemory(dout);
8183
const auto src_x_memory = handler.AcquireSecondSrcMemory(x);

paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
7979
paddle::platform::errors::PreconditionNotMet(
8080
"Operator DNNL eletwise_forward must use CPUPlace"));
8181
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
82-
const auto &mkldnn_engine = dev_ctx.GetEngine();
8382

8483
const auto *x = ctx.Input<Tensor>("X");
8584
auto *y = ctx.Output<Tensor>("Out");
8685

8786
bool is_inplaced = x->IsSharedBufferWith(*y);
8887

89-
platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
90-
ctx.GetPlace(), x);
88+
platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, dev_ctx,
89+
ctx.GetPlace(), x,
90+
ctx.InputName("X"), is_inplaced);
9191

9292
auto src_memory_p = handler.AcquireSrcMemory(x);
9393
auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y);
@@ -106,14 +106,13 @@ template <typename T>
106106
void eltwise_grad(const framework::ExecutionContext &ctx,
107107
mkldnn::algorithm algorithm) {
108108
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
109-
const auto &mkldnn_engine = dev_ctx.GetEngine();
110109

111110
const auto *x = ctx.Input<Tensor>("X");
112111
const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
113112
auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));
114113

115-
platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
116-
ctx.GetPlace(), x, diff_y);
114+
platform::ActivationMKLDNNHandler<T> handler(
115+
algorithm, ctx, dev_ctx, ctx.GetPlace(), x, diff_y, ctx.InputName("X"));
117116

118117
auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
119118
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(diff_y);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
cc_test(test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS op_registry elementwise_mul_op elementwise_add_op activation_op softmax_op conv_op im2col vol2col softmax scope device_context enforce)
1+
cc_test(test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS op_registry elementwise_mul_op elementwise_add_op activation_op softmax_op softmax scope device_context enforce)

paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,18 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
2929
void RunKernel(const framework::ExecutionContext& ctx) const {
3030
const auto& dev_ctx =
3131
ctx.template device_context<platform::MKLDNNDeviceContext>();
32-
const auto& mkldnn_engine = dev_ctx.GetEngine();
3332

3433
auto* x = ctx.Input<Tensor>("X");
3534
auto* out = ctx.Output<Tensor>("Out");
3635

3736
bool is_inplaced = x->IsSharedBufferWith(*out);
3837

3938
platform::ActivationMKLDNNHandler<T> handler(
40-
mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(),
41-
x);
39+
mkldnn::algorithm::eltwise_linear, ctx, dev_ctx, ctx.GetPlace(), x,
40+
ctx.InputName("X"), is_inplaced);
4241

4342
auto src_memory_p = handler.AcquireSrcMemory(x);
44-
auto dst_memory_p =
45-
is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
43+
auto dst_memory_p = handler.AcquireDstMemory(out);
4644
auto activation_p = handler.AcquireForwardPrimitive();
4745

4846
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();

paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -32,56 +32,69 @@ using platform::to_void_cast;
3232

3333
template <typename T>
3434
class SoftmaxMKLDNNHandler
35-
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
36-
mkldnn::softmax_backward> {
35+
: public platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
36+
mkldnn::softmax_backward> {
3737
public:
38-
SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine,
38+
SoftmaxMKLDNNHandler(const MKLDNNDeviceContext& dev_ctx,
39+
const mkldnn::engine mkldnn_engine,
3940
platform::Place cpu_place, const Tensor* input,
40-
Tensor* output, const int axis)
41-
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
42-
mkldnn::softmax_backward>(
43-
mkldnn_engine, cpu_place) {
44-
PADDLE_ENFORCE_EQ(
45-
input->dims(), output->dims(),
46-
platform::errors::InvalidArgument(
47-
"The shape of input and output tensor must be identical."));
48-
49-
auto softmax_tz = framework::vectorize(input->dims());
50-
auto md = memory::desc(softmax_tz, platform::MKLDNNGetDataType<T>(),
51-
input->format());
52-
53-
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
54-
axis);
41+
Tensor* output, const int axis,
42+
const std::string uniq_name, bool is_inplaced)
43+
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
44+
mkldnn::softmax_backward>(
45+
dev_ctx, mkldnn_engine, cpu_place,
46+
// Softmax may be inplace then uniq_name is no longer unique
47+
is_inplaced ? platform::CreateKey(
48+
dev_ctx, framework::vectorize(input->dims()),
49+
axis, uniq_name)
50+
: platform::CreateKey(
51+
dev_ctx, framework::vectorize(input->dims()),
52+
uniq_name)) {
53+
if (!this->isCached()) {
54+
PADDLE_ENFORCE_EQ(
55+
input->dims(), output->dims(),
56+
platform::errors::InvalidArgument(
57+
"The shape of input and output tensor must be identical."));
58+
59+
auto softmax_tz = framework::vectorize(input->dims());
60+
auto md = memory::desc(softmax_tz, platform::MKLDNNGetDataType<T>(),
61+
input->format());
62+
63+
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
64+
axis);
65+
}
5566
}
5667

5768
SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx,
58-
const mkldnn::engine mkldnn_engine,
69+
const MKLDNNDeviceContext& dev_ctx,
5970
platform::Place cpu_place, const Tensor* out,
6071
const Tensor* out_grad, Tensor* in_x_grad,
6172
const std::string& unique_name)
62-
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
63-
mkldnn::softmax_backward>(
64-
mkldnn_engine, cpu_place) {
65-
PADDLE_ENFORCE_EQ(out_grad->dims(), in_x_grad->dims(),
66-
platform::errors::InvalidArgument(
67-
"The shape of softmax_grad's input "
68-
"and output must be identical, but shapes differ, "
69-
"out_grad: %s in_grad: %s",
70-
out_grad->dims(), in_x_grad->dims()));
71-
72-
auto dims = out_grad->dims(); // input and output share the same shape
73-
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
74-
auto softmax_tz = framework::vectorize<int64_t>(dims);
75-
76-
auto data_softmax_md = MKLDNNMemDesc(
77-
softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
78-
auto diff_softmax_md = MKLDNNMemDesc(
79-
softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
80-
81-
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
82-
data_softmax_md, axis);
83-
this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
84-
axis);
73+
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
74+
mkldnn::softmax_backward>(
75+
dev_ctx, dev_ctx.GetEngine(), cpu_place,
76+
platform::CreateKey(dev_ctx, framework::vectorize(out->dims()),
77+
unique_name)) {
78+
if (!this->isBwdCached()) {
79+
PADDLE_ENFORCE_EQ(
80+
out_grad->dims(), in_x_grad->dims(),
81+
platform::errors::InvalidArgument("The shape of softmax_grad's input "
82+
"and output must be identical."));
83+
84+
auto dims = out_grad->dims(); // input and output share the same shape
85+
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
86+
auto softmax_tz = framework::vectorize<int64_t>(dims);
87+
88+
auto data_softmax_md = MKLDNNMemDesc(
89+
softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
90+
auto diff_softmax_md = MKLDNNMemDesc(
91+
softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
92+
93+
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
94+
data_softmax_md, axis);
95+
this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
96+
axis);
97+
}
8598
}
8699
};
87100

@@ -98,8 +111,9 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
98111

99112
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
100113

101-
SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input,
102-
output, axis);
114+
SoftmaxMKLDNNHandler<T> handler(dev_ctx, mkldnn_engine, ctx.GetPlace(),
115+
input, output, axis, ctx.OutputName("Out"),
116+
is_inplaced);
103117

104118
auto softmax_src_memory_p = handler.AcquireSrcMemory(input);
105119
// For Inplace src and and dst are the same memory object
@@ -135,12 +149,11 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
135149
paddle::platform::errors::PreconditionNotMet(
136150
"Operator DNNL SoftmaxGrad must use CPUPlace"));
137151
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
138-
const auto& mkldnn_engine = dev_ctx.GetEngine();
139152
const Tensor* output = ctx.Input<Tensor>("Out");
140153
auto* out_grad = ctx.template Input<Tensor>(framework::GradVarName("Out"));
141154
auto* in_x_grad = ctx.template Output<Tensor>(framework::GradVarName("X"));
142155

143-
SoftmaxMKLDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), output,
156+
SoftmaxMKLDNNHandler<T> handler(ctx, dev_ctx, ctx.GetPlace(), output,
144157
out_grad, in_x_grad, ctx.InputName("Out"));
145158

146159
auto dst_memory_p = handler.AcquireDstMemory(output);

paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ USE_OP(relu);
3333
USE_OP_DEVICE_KERNEL(relu, MKLDNN);
3434
USE_OP(softmax);
3535
USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
36-
USE_OP(conv2d);
37-
USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);
3836

3937
namespace paddle {
4038
namespace operators {
@@ -66,19 +64,16 @@ class CacheTester {
6664

6765
template <typename T>
6866
void RunOperator(const platform::Place &place, const std::string &op_type,
69-
const framework::DDim &dims, const std::string &first_input) {
67+
const framework::DDim &dims, const std::string &output_name,
68+
bool inplace = false) {
7069
framework::Scope scope;
7170

7271
std::map<const std::string, int> num_inputs = {{"softmax", 1},
7372
{"relu", 1},
74-
{"conv2d", 2},
7573
{"elementwise_add", 2},
7674
{"elementwise_mul", 2}};
7775

78-
std::string first_input_var_name = (op_type == "conv2d") ? "Input" : "X";
79-
std::string second_input_var_name = (op_type == "conv2d") ? "Filter" : "Y";
80-
std::string output_var_name = (op_type == "conv2d") ? "Output" : "Out";
81-
std::string output_name = "output";
76+
std::string first_input = inplace == true ? output_name : "x";
8277

8378
std::vector<InputVars> input_names = {
8479
{first_input, scope.Var(first_input)->GetMutable<framework::LoDTensor>()},
@@ -118,40 +113,71 @@ void RunOperator(const platform::Place &place, const std::string &op_type,
118113

119114
auto &pool = platform::DeviceContextPool::Instance();
120115

121-
auto op =
122-
num_inputs[op_type] > 1
123-
? framework::OpRegistry::CreateOp(
124-
op_type, {{first_input_var_name, {first_input}},
125-
{second_input_var_name, {"x1"}}},
126-
{{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}})
127-
: framework::OpRegistry::CreateOp(
128-
op_type, {{first_input_var_name, {first_input}}},
129-
{{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}});
116+
auto op = num_inputs[op_type] > 1
117+
? framework::OpRegistry::CreateOp(
118+
op_type, {{"X", {first_input}}, {"Y", {"x1"}}},
119+
{{"Out", {output_name}}}, {{"use_mkldnn", {true}}})
120+
: framework::OpRegistry::CreateOp(
121+
op_type, {{"X", {first_input}}}, {{"Out", {output_name}}},
122+
{{"use_mkldnn", {true}}});
130123

131124
op->Run(scope, place);
132125
pool.Get(place)->Wait();
133126
}
134127

135-
TEST(test_conv2d_reuse_cache, cpu_place) {
136-
framework::DDim dims({1, 16, 32, 64});
128+
TEST(test_softmax_reuse_cache, cpu_place) {
129+
framework::DDim dims({32, 64});
137130
platform::CPUPlace p;
138131
CacheTester ct;
139-
RunOperator<float>(p, "conv2d", dims, "input_signal");
140-
RunOperator<float>(p, "conv2d", dims, "input_signal");
141-
PADDLE_ENFORCE_EQ(ct.Analyze(9), true,
132+
RunOperator<float>(p, "softmax", dims, "softmax_out");
133+
RunOperator<float>(p, "softmax", dims, "softmax_out");
134+
PADDLE_ENFORCE_EQ(ct.Analyze(4), true,
142135
platform::errors::InvalidArgument(
143-
"Invalid number of cached oneDNN objects"));
136+
"Wrong number of cached oneDNN objects"));
144137
}
145138

146-
TEST(test_conv2d_noreuse_cache, cpu_place) {
147-
framework::DDim dims({1, 16, 32, 64});
139+
TEST(test_softmax_noreuse_cache, cpu_place) {
140+
framework::DDim dims({32, 64});
148141
platform::CPUPlace p;
149142
CacheTester ct;
150-
RunOperator<float>(p, "conv2d", dims, "input_signal");
151-
RunOperator<float>(p, "conv2d", dims, "input_signal2");
152-
PADDLE_ENFORCE_EQ(ct.Analyze(18), true,
143+
RunOperator<float>(p, "softmax", dims, "softmax_out");
144+
RunOperator<float>(p, "softmax", dims, "softmax_out2");
145+
PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
153146
platform::errors::InvalidArgument(
154-
"Invalid number of cached oneDNN objects"));
147+
"Wrong number of cached oneDNN objects"));
148+
}
149+
150+
TEST(test_softmax_inplace_cache, cpu_place) {
151+
framework::DDim dims({32, 64});
152+
platform::CPUPlace p;
153+
CacheTester ct;
154+
RunOperator<float>(p, "softmax", dims, "softmax_out");
155+
RunOperator<float>(p, "softmax", dims, "softmax_out", true);
156+
PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
157+
platform::errors::InvalidArgument(
158+
"Wrong number of cached oneDNN objects"));
159+
}
160+
161+
TEST(test_relu_inplace_cache, cpu_place) {
162+
framework::DDim dims({32, 64});
163+
platform::CPUPlace p;
164+
CacheTester ct;
165+
RunOperator<float>(p, "relu", dims, "relu_out");
166+
RunOperator<float>(p, "relu", dims, "relu_out", true);
167+
PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
168+
platform::errors::InvalidArgument(
169+
"Wrong number of cached oneDNN objects"));
170+
}
171+
172+
TEST(test_elementwise_add_reuse_cache, cpu_place) {
173+
framework::DDim dims({32, 64});
174+
platform::CPUPlace p;
175+
CacheTester ct;
176+
RunOperator<float>(p, "elementwise_add", dims, "elementwise_add_out");
177+
RunOperator<float>(p, "relu", dims, "elementwise_add_out", true);
178+
PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
179+
platform::errors::InvalidArgument(
180+
"Wrong number of cached oneDNN objects"));
155181
}
156182

157183
} // namespace operators

0 commit comments

Comments
 (0)