Skip to content

Commit f1c1d9e

Browse files
authored
[oneDNN ] disabling more ops caching (#34830)
* - disabled caching of layer norm - fix in compilation - compilation fix - transpose caching disabled - compilation fix - more compilation fixes - sum caching disabled - compilation fix * - LRN with disabled cache * lint fixes
1 parent 7b3295a commit f1c1d9e

File tree

5 files changed

+197
-271
lines changed

5 files changed

+197
-271
lines changed

paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc

Lines changed: 39 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,45 +19,36 @@ namespace paddle {
1919
namespace operators {
2020

2121
template <typename T>
22-
class LayerNormMKLDNNHandler
23-
: public platform::MKLDNNHandlerT<T, dnnl::layer_normalization_forward> {
22+
class LayerNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
23+
T, dnnl::layer_normalization_forward> {
2424
public:
2525
LayerNormMKLDNNHandler(const std::vector<int64_t>& dims, const float& epsilon,
2626
const dnnl::normalization_flags& flags,
2727
const bool& is_test, const MKLDNNMemoryFormat fmt,
28-
const platform::MKLDNNDeviceContext& dev_ctx,
29-
platform::Place cpu_place,
30-
const std::string& uniq_name)
31-
: platform::MKLDNNHandlerT<T, dnnl::layer_normalization_forward>(
32-
dev_ctx, dev_ctx.GetEngine(), cpu_place,
33-
platform::CreateKey(dev_ctx, dims, uniq_name)) {
34-
if (!this->isCached()) {
35-
auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
36-
if (!is_test) {
37-
// TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced
38-
auto stats_md = dnnl::memory::desc(
39-
{begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType<float>(),
40-
platform::MKLDNNFormatForSize(dims.size() - 1,
41-
MKLDNNMemoryFormat::nchw));
42-
this->AcquireForwardPrimitiveDescriptor(
43-
dnnl::prop_kind::forward_training, md, stats_md, epsilon, flags);
44-
} else {
45-
this->AcquireForwardPrimitiveDescriptor(
46-
dnnl::prop_kind::forward_inference, md, epsilon, flags);
47-
}
28+
const mkldnn::engine engine, platform::Place cpu_place)
29+
: platform::MKLDNNHandlerNoCachingT<T, dnnl::layer_normalization_forward>(
30+
engine, cpu_place) {
31+
auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
32+
if (!is_test) {
33+
// TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced
34+
auto stats_md = dnnl::memory::desc(
35+
{begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType<float>(),
36+
platform::MKLDNNFormatForSize(dims.size() - 1,
37+
MKLDNNMemoryFormat::nchw));
38+
this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training,
39+
md, stats_md, epsilon, flags);
40+
} else {
41+
this->AcquireForwardPrimitiveDescriptor(
42+
dnnl::prop_kind::forward_inference, md, epsilon, flags);
4843
}
4944
}
5045

51-
std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory() {
52-
return this->AcquireMemoryFromPrimitive("@scaleshift_mem_p");
53-
}
54-
5546
std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory(
5647
std::vector<float>& scaleshift_data) {
5748
// scaleshift_data comes from temporary buffer so we need to copy it into
5849
// created memory primitivie
59-
auto scaleshift_mem = this->AcquireMemoryFromPrimitive(
60-
this->fwd_pd_->weights_desc(), "@scaleshift_mem_p");
50+
auto scaleshift_mem =
51+
this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc());
6152
auto data_ptr = scaleshift_mem->get_data_handle();
6253
std::size_t num_bytes = scaleshift_data.size() * sizeof(float);
6354
std::memcpy(data_ptr, scaleshift_data.data(), num_bytes);
@@ -68,15 +59,15 @@ class LayerNormMKLDNNHandler
6859
T* mean_data = mean->mutable_data<T>(this->place_,
6960
this->fwd_pd_->mean_desc().get_size());
7061
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(),
71-
mean_data, "@mean_mem_p");
62+
mean_data);
7263
}
7364

7465
std::shared_ptr<dnnl::memory> AcquireVarianceMemory(
7566
framework::Tensor* variance) {
7667
T* variance_data = variance->mutable_data<T>(
7768
this->place_, this->fwd_pd_->variance_desc().get_size());
7869
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(),
79-
variance_data, "@variance_mem_p");
70+
variance_data);
8071
}
8172
};
8273

@@ -95,6 +86,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
9586

9687
auto& dev_ctx =
9788
ctx.template device_context<platform::MKLDNNDeviceContext>();
89+
const auto& mkldnn_engine = dev_ctx.GetEngine();
9890

9991
auto src_tz = paddle::framework::vectorize(x->dims());
10092
PADDLE_ENFORCE_EQ(begin_norm_axis, (src_tz.size() - 1),
@@ -112,8 +104,8 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
112104
}
113105

114106
LayerNormMKLDNNHandler<T> handler(src_tz, epsilon, flags, is_test,
115-
x->format(), dev_ctx, ctx.GetPlace(),
116-
ctx.OutputName("Y"));
107+
x->format(), mkldnn_engine,
108+
ctx.GetPlace());
117109

118110
auto src_memory = handler.AcquireSrcMemory(x);
119111
auto dst_memory = handler.AcquireDstMemory(y);
@@ -139,24 +131,22 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
139131
args.insert({DNNL_ARG_VARIANCE, *variance_memory});
140132
}
141133

142-
auto scaleshift_memory = handler.AcquireScaleShiftMemory();
134+
std::shared_ptr<mkldnn::memory> scaleshift_memory;
143135
if (with_scaleshift) {
144-
if (scaleshift_memory == nullptr || !is_test) {
145-
auto scale_tz = paddle::framework::vectorize(scale->dims());
146-
const unsigned int C = scale_tz[0];
147-
148-
// MKLDNN requires a single piece of memory for scale and shift/bias
149-
// data
150-
std::vector<float> scaleshift_data;
151-
scaleshift_data.reserve(2 * C);
152-
scaleshift_data.insert(scaleshift_data.begin(), scale->data<float>(),
153-
scale->data<float>() + C);
154-
155-
scaleshift_data.insert(scaleshift_data.end(), bias->data<float>(),
156-
bias->data<float>() + C);
157-
158-
scaleshift_memory = handler.AcquireScaleShiftMemory(scaleshift_data);
159-
}
136+
auto scale_tz = paddle::framework::vectorize(scale->dims());
137+
const unsigned int C = scale_tz[0];
138+
139+
// MKLDNN requires a single piece of memory for scale and shift/bias
140+
// data
141+
std::vector<float> scaleshift_data;
142+
scaleshift_data.reserve(2 * C);
143+
scaleshift_data.insert(scaleshift_data.begin(), scale->data<float>(),
144+
scale->data<float>() + C);
145+
146+
scaleshift_data.insert(scaleshift_data.end(), bias->data<float>(),
147+
bias->data<float>() + C);
148+
149+
scaleshift_memory = handler.AcquireScaleShiftMemory(scaleshift_data);
160150
args.insert({DNNL_ARG_SCALE_SHIFT, *scaleshift_memory});
161151
}
162152

paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc

Lines changed: 64 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -21,94 +21,86 @@ using paddle::framework::Tensor;
2121
using paddle::platform::MKLDNNDeviceContext;
2222

2323
template <typename T>
24-
class LRNMKLDNNHandler : public platform::MKLDNNHandlerT<T, mkldnn::lrn_forward,
25-
mkldnn::lrn_backward> {
24+
class LRNMKLDNNHandler
25+
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
26+
mkldnn::lrn_backward> {
2627
public:
2728
LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
28-
const MKLDNNDeviceContext& dev_ctx,
2929
const mkldnn::engine mkldnn_engine,
30-
platform::Place cpu_place, const Tensor* input,
31-
const std::string& unique_name)
32-
33-
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
34-
dev_ctx, mkldnn_engine, cpu_place,
35-
platform::CreateKey(dev_ctx, framework::vectorize(input->dims()),
36-
unique_name)) {
37-
if (!this->isCached()) {
38-
const int n = ctx.Attr<int>("n");
39-
// MKL-DNN implements LRN in a caffe way:
40-
// http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
41-
// Where sum of squares is divided by size of normalization window
42-
// this is not the case for PaddlePaddle LRN.
43-
// Hence we need to compensate for this diffrence by
44-
// multipliing alpha by size of window(n)
45-
const float alpha = ctx.Attr<float>("alpha") * static_cast<float>(n);
46-
const float beta = ctx.Attr<float>("beta");
47-
const float k = ctx.Attr<float>("k");
48-
bool is_test = ctx.Attr<bool>("is_test");
49-
50-
auto dims = framework::vectorize(input->dims());
51-
52-
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
53-
input->format());
54-
55-
this->AcquireForwardPrimitiveDescriptor(
56-
is_test ? mkldnn::prop_kind::forward_inference
57-
: mkldnn::prop_kind::forward_training,
58-
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
59-
}
30+
platform::Place cpu_place, const Tensor* input)
31+
32+
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
33+
mkldnn::lrn_backward>(mkldnn_engine,
34+
cpu_place) {
35+
const int n = ctx.Attr<int>("n");
36+
// MKL-DNN implements LRN in a caffe way:
37+
// http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
38+
// Where sum of squares is divided by size of normalization window
39+
// this is not the case for PaddlePaddle LRN.
40+
// Hence we need to compensate for this diffrence by
41+
// multipliing alpha by size of window(n)
42+
const float alpha = ctx.Attr<float>("alpha") * static_cast<float>(n);
43+
const float beta = ctx.Attr<float>("beta");
44+
const float k = ctx.Attr<float>("k");
45+
bool is_test = ctx.Attr<bool>("is_test");
46+
47+
auto dims = framework::vectorize(input->dims());
48+
49+
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
50+
input->format());
51+
52+
this->AcquireForwardPrimitiveDescriptor(
53+
is_test ? mkldnn::prop_kind::forward_inference
54+
: mkldnn::prop_kind::forward_training,
55+
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
6056
}
6157

6258
LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
63-
const MKLDNNDeviceContext& dev_ctx,
59+
const mkldnn::engine mkldnn_engine,
6460
platform::Place cpu_place, const Tensor* in_x,
65-
const Tensor* out_grad, Tensor* in_x_grad,
66-
const std::string& unique_name)
67-
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
68-
dev_ctx, dev_ctx.GetEngine(), cpu_place,
69-
platform::CreateKey(dev_ctx, framework::vectorize(in_x->dims()),
70-
unique_name)) {
71-
if (!this->isBwdCached()) {
72-
PADDLE_ENFORCE_EQ(
73-
ctx.Attr<bool>("is_test"), false,
74-
platform::errors::PreconditionNotMet(
75-
"is_test attribute should be set to False in training phase."));
76-
77-
const int n = ctx.Attr<int>("n");
78-
const float alpha = ctx.Attr<float>("alpha") * static_cast<float>(n);
79-
const float beta = ctx.Attr<float>("beta");
80-
const float k = ctx.Attr<float>("k");
81-
82-
auto dims = framework::vectorize<int64_t>(in_x->dims());
83-
84-
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
85-
in_x->format());
86-
auto diff_md = mkldnn::memory::desc(
87-
dims, platform::MKLDNNGetDataType<T>(), out_grad->format());
88-
89-
this->AcquireForwardPrimitiveDescriptor(
90-
mkldnn::prop_kind::forward_training,
91-
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
92-
93-
this->AcquireBackwardPrimitiveDescriptor(
94-
mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha,
95-
beta, k);
96-
}
61+
const Tensor* out_grad, Tensor* in_x_grad)
62+
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
63+
mkldnn::lrn_backward>(mkldnn_engine,
64+
cpu_place) {
65+
PADDLE_ENFORCE_EQ(
66+
ctx.Attr<bool>("is_test"), false,
67+
platform::errors::PreconditionNotMet(
68+
"is_test attribute should be set to False in training phase."));
69+
70+
const int n = ctx.Attr<int>("n");
71+
const float alpha = ctx.Attr<float>("alpha") * static_cast<float>(n);
72+
const float beta = ctx.Attr<float>("beta");
73+
const float k = ctx.Attr<float>("k");
74+
75+
auto dims = framework::vectorize<int64_t>(in_x->dims());
76+
77+
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
78+
in_x->format());
79+
auto diff_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
80+
out_grad->format());
81+
82+
this->AcquireForwardPrimitiveDescriptor(
83+
mkldnn::prop_kind::forward_training,
84+
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
85+
86+
this->AcquireBackwardPrimitiveDescriptor(
87+
mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta,
88+
k);
9789
}
9890

9991
std::shared_ptr<mkldnn::memory> AcquireWorkspaceMemory(Tensor* workspace) {
10092
T* ptr = workspace->mutable_data<T>(
10193
this->place_, this->fwd_pd_->workspace_desc().get_size());
10294
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(),
103-
ptr, "@wrk_mem_p");
95+
ptr);
10496
}
10597

10698
std::shared_ptr<mkldnn::memory> AcquireBackwardWorkspaceMemory(
10799
const Tensor* workspace) {
108100
const T* workspace_data = workspace->data<T>();
109101
return this->AcquireMemoryFromPrimitive(
110102
this->fwd_pd_->workspace_desc(),
111-
platform::to_void_cast<T>(workspace_data), "@bwd-wrk_mem_p");
103+
platform::to_void_cast<T>(workspace_data));
112104
}
113105
};
114106

@@ -131,8 +123,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
131123
auto out = ctx.Output<Tensor>("Out");
132124
auto mid = ctx.Output<Tensor>("MidOut");
133125

134-
LRNMKLDNNHandler<T> handler(ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), x,
135-
ctx.OutputName("Out"));
126+
LRNMKLDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), x);
136127

137128
auto src_memory = handler.AcquireSrcMemory(x);
138129
auto dst_memory = handler.AcquireDstMemory(out);
@@ -178,9 +169,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
178169
auto in_x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
179170

180171
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
172+
const auto& mkldnn_engine = dev_ctx.GetEngine();
181173

182-
LRNMKLDNNHandler<T> handler(ctx, dev_ctx, ctx.GetPlace(), in_x, out_grad,
183-
in_x_grad, ctx.InputName("Out"));
174+
LRNMKLDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), in_x,
175+
out_grad, in_x_grad);
184176

185177
auto src_memory = handler.AcquireSrcMemory(in_x);
186178
auto workspace = handler.AcquireBackwardWorkspaceMemory(mid);

0 commit comments

Comments
 (0)