From b9c4eb9fa81f4bb3801c9b118d146979aacdc070 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Mon, 9 Aug 2021 16:32:34 +0200 Subject: [PATCH 1/3] - disabled caching of layer norm - fix in compilation - compilation fix - transpose caching disabled - compilation fix - more compilation fixes - sum caching disabled - compilation fix --- .../operators/mkldnn/layer_norm_mkldnn_op.cc | 88 ++++++++---------- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 75 ++++++--------- .../operators/mkldnn/transpose_mkldnn_op.cc | 76 +++++++++++++-- paddle/fluid/platform/mkldnn_reuse.h | 92 ------------------- 4 files changed, 133 insertions(+), 198 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index cc4bfbae2665fe..e84266caa227c9 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -19,45 +19,36 @@ namespace paddle { namespace operators { template -class LayerNormMKLDNNHandler - : public platform::MKLDNNHandlerT { +class LayerNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< + T, dnnl::layer_normalization_forward> { public: LayerNormMKLDNNHandler(const std::vector& dims, const float& epsilon, const dnnl::normalization_flags& flags, const bool& is_test, const MKLDNNMemoryFormat fmt, - const platform::MKLDNNDeviceContext& dev_ctx, - platform::Place cpu_place, - const std::string& uniq_name) - : platform::MKLDNNHandlerT( - dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, dims, uniq_name)) { - if (!this->isCached()) { - auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); - if (!is_test) { - // TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced - auto stats_md = dnnl::memory::desc( - {begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType(), - platform::MKLDNNFormatForSize(dims.size() - 1, - MKLDNNMemoryFormat::nchw)); - this->AcquireForwardPrimitiveDescriptor( - dnnl::prop_kind::forward_training, md, stats_md, epsilon, flags); - } else { - this->AcquireForwardPrimitiveDescriptor( - dnnl::prop_kind::forward_inference, md, epsilon, flags); - } + const mkldnn::engine engine, platform::Place cpu_place) + : platform::MKLDNNHandlerNoCachingT( + engine, cpu_place) { + auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); + if (!is_test) { + // TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced + auto stats_md = dnnl::memory::desc( + {begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType(), + platform::MKLDNNFormatForSize(dims.size() - 1, + MKLDNNMemoryFormat::nchw)); + this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, + md, stats_md, epsilon, flags); + } else { + this->AcquireForwardPrimitiveDescriptor( + dnnl::prop_kind::forward_inference, md, epsilon, flags); } } - std::shared_ptr AcquireScaleShiftMemory() { - return this->AcquireMemoryFromPrimitive("@scaleshift_mem_p"); - } - std::shared_ptr AcquireScaleShiftMemory( std::vector& scaleshift_data) { // scaleshift_data comes from temporary buffer so we need to copy it into // created memory primitivie - auto scaleshift_mem = this->AcquireMemoryFromPrimitive( - this->fwd_pd_->weights_desc(), "@scaleshift_mem_p"); + auto scaleshift_mem = + this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc()); auto data_ptr = scaleshift_mem->get_data_handle(); std::size_t num_bytes = scaleshift_data.size() * sizeof(float); std::memcpy(data_ptr, scaleshift_data.data(), num_bytes); @@ -68,7 +59,7 @@ class LayerNormMKLDNNHandler T* mean_data = mean->mutable_data(this->place_, this->fwd_pd_->mean_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), - mean_data, "@mean_mem_p"); + mean_data); } std::shared_ptr AcquireVarianceMemory( @@ -76,7 +67,7 @@ class LayerNormMKLDNNHandler T* variance_data = variance->mutable_data( this->place_, this->fwd_pd_->variance_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), - variance_data, "@variance_mem_p"); + variance_data); } }; @@ -95,6 +86,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { auto& dev_ctx = ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); auto src_tz = paddle::framework::vectorize(x->dims()); PADDLE_ENFORCE_EQ(begin_norm_axis, (src_tz.size() - 1), @@ -112,8 +104,8 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { } LayerNormMKLDNNHandler handler(src_tz, epsilon, flags, is_test, - x->format(), dev_ctx, ctx.GetPlace(), - ctx.OutputName("Y")); + x->format(), mkldnn_engine, + ctx.GetPlace()); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(y); @@ -139,24 +131,22 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { args.insert({DNNL_ARG_VARIANCE, *variance_memory}); } - auto scaleshift_memory = handler.AcquireScaleShiftMemory(); + std::shared_ptr scaleshift_memory; if (with_scaleshift) { - if (scaleshift_memory == nullptr || !is_test) { - auto scale_tz = paddle::framework::vectorize(scale->dims()); - const unsigned int C = scale_tz[0]; - - // MKLDNN requires a single piece of memory for scale and shift/bias - // data - std::vector scaleshift_data; - scaleshift_data.reserve(2 * C); - scaleshift_data.insert(scaleshift_data.begin(), scale->data(), - scale->data() + C); - - scaleshift_data.insert(scaleshift_data.end(), bias->data(), - bias->data() + C); - - scaleshift_memory = handler.AcquireScaleShiftMemory(scaleshift_data); - } + auto scale_tz = paddle::framework::vectorize(scale->dims()); + const unsigned int C = scale_tz[0]; + + // MKLDNN requires a single piece of memory for scale and shift/bias + // data + std::vector scaleshift_data; + scaleshift_data.reserve(2 * C); + scaleshift_data.insert(scaleshift_data.begin(), scale->data(), + scale->data() + C); + + scaleshift_data.insert(scaleshift_data.end(), bias->data(), + bias->data() + C); + + scaleshift_memory = handler.AcquireScaleShiftMemory(scaleshift_data); args.insert({DNNL_ARG_SCALE_SHIFT, *scaleshift_memory}); } diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 1813aabf1d8548..4cc9f53b9b6b22 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -45,44 +45,35 @@ using paddle::platform::MKLDNNDeviceContext; using platform::to_void_cast; template -class SumMKLDNNHandler : public platform::MKLDNNHandlerT { +class SumMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { public: - SumMKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, - platform::Place cpu_place, + SumMKLDNNHandler(mkldnn::engine engine, platform::Place cpu_place, const std::vector& in_vars, - framework::LoDTensor* z, const std::string& uniq_name) + framework::LoDTensor* z) - : platform::MKLDNNHandlerT( - dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, framework::vectorize(z->dims()), - uniq_name)), + : platform::MKLDNNHandlerNoCachingT(engine, cpu_place), num_inputs_(0) { - for (size_t i = 0; i < in_vars.size(); i++) { - srcs_suffix_.push_back(std::string("-") + std::to_string(i)); - } + auto dst_tz = framework::vectorize(z->dims()); + auto src_tz = dst_tz; - if (!this->isCached()) { - auto dst_tz = framework::vectorize(z->dims()); - auto src_tz = dst_tz; - - std::vector srcs_md; - for (size_t i = 0; i < in_vars.size(); i++) { - auto& input_it = in_vars[i]->Get(); - if (input_it.numel() == 0) { - continue; - } - MKLDNNMemoryFormat input_format = input_it.format(); - srcs_md.push_back(mkldnn::memory::desc( - src_tz, platform::MKLDNNGetDataType(), input_format)); - ++num_inputs_; + std::vector srcs_md; + for (size_t i = 0; i < in_vars.size(); i++) { + auto& input_it = in_vars[i]->Get(); + if (input_it.numel() == 0) { + continue; } - std::vector scales(num_inputs_, 1.0); + MKLDNNMemoryFormat input_format = input_it.format(); + srcs_md.push_back(mkldnn::memory::desc( + src_tz, platform::MKLDNNGetDataType(), input_format)); + ++num_inputs_; + } + std::vector scales(num_inputs_, 1.0); - auto dst_md = mkldnn::memory::desc( - dst_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::any); + auto dst_md = mkldnn::memory::desc(dst_tz, platform::MKLDNNGetDataType(), + MKLDNNMemoryFormat::any); - this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md); - } + this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md); } // (jczaja) sum oneDNN prim is not having .desc attribute so @@ -90,37 +81,27 @@ class SumMKLDNNHandler : public platform::MKLDNNHandlerT { void AcquireForwardPrimitiveDescriptor( const mkldnn::memory::desc& dst_md, const std::vector& scales, const std::vector& srcs_md) { - // Sum op does not have backward so no passing from FWD to BWD is needed - const std::string key_pd = this->key_ + "@fwd_pd"; - this->fwd_pd_ = std::static_pointer_cast( - this->dev_ctx_.GetBlob(key_pd)); - if (this->fwd_pd_ == nullptr) { - this->fwd_pd_.reset(new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, - this->engine_)); - this->dev_ctx_.SetBlob(key_pd, this->fwd_pd_); - } + this->fwd_pd_.reset( + new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_)); } std::shared_ptr AcquireSrcMemory( const framework::Tensor& input, int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), - to_void_cast(input_data), - "@src_mem_p" + srcs_suffix_[i]); + to_void_cast(input_data)); } - using platform::MKLDNNHandlerT::AcquireDstMemory; + using platform::MKLDNNHandlerNoCachingT::AcquireDstMemory; std::shared_ptr AcquireDstMemory(void) { - return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), - "@dst_mem_p"); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc()); } inline int GetNumInputs(void) { return num_inputs_; } private: int num_inputs_; - std::vector srcs_suffix_; }; template @@ -131,6 +112,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { paddle::platform::errors::PreconditionNotMet( "Operator DNNL Sum must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); auto in_vars = ctx.MultiInputVar("X"); PADDLE_ENFORCE_NE(in_vars.empty(), true, platform::errors::InvalidArgument( @@ -140,8 +122,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { bool in_place = (input0.numel() > 0) && input0.IsSharedBufferWith(*output); - SumMKLDNNHandler handler(dev_ctx, ctx.GetPlace(), in_vars, output, - ctx.OutputName("Out")); + SumMKLDNNHandler handler(mkldnn_engine, ctx.GetPlace(), in_vars, output); // Create list of SRC MEMs std::vector> srcs_mem; diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index 4c46a92700996a..a92e8e6cb047f9 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -24,6 +24,70 @@ namespace operators { using Tensor = framework::Tensor; using framework::DataLayout; +template +class TransposeMKLDNNHandler { + public: + TransposeMKLDNNHandler(std::vector& dims, // NOLINT + std::vector& axis, // NOLINT + mkldnn::engine engine) + : dims_(dims), + axis_(axis), + logical_axis_(dims.size(), 0), + engine_(engine) {} + + std::shared_ptr AcquireSrcMemory( + const MKLDNNMemoryFormat& fmt, void* ptr) { + // Make memory descriptor using input format, unless it + // cannot be trusted (nchw) then make up memory fmt manually + for (size_t i = 0; i < this->logical_axis_.size(); ++i) { + this->logical_axis_[i] = i; + } + + auto src_md = fmt != MKLDNNMemoryFormat::nchw + ? platform::MKLDNNMemDesc( + dims_, platform::MKLDNNGetDataType(), fmt) + : Axis2MemoryDesc(dims_, logical_axis_); + return std::make_shared(src_md, engine_, ptr); + } + + std::shared_ptr AcquireDstMemory(framework::Tensor* output, + platform::Place place) { + auto dst_md = Axis2MemoryDesc(dims_, axis_); + auto dst_data = output->mutable_data(place, dst_md.get_size()); + return std::make_shared(dst_md, engine_, dst_data); + } + + std::shared_ptr AcquireTranspose( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p) { + return std::make_shared(*(src_memory_p), *(dst_memory_p)); + } + + protected: + mkldnn::memory::desc Axis2MemoryDesc(std::vector& nchw_tz, // NOLINT + std::vector& axis // NOLINT + ) { + size_t ndims = axis.size(); + + std::vector strides(ndims); + unsigned int total_stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[axis[i]] = total_stride; + total_stride *= nchw_tz[axis[i]]; + } + mkldnn::memory::desc mem_d(nchw_tz, platform::MKLDNNGetDataType(), + strides); + + return mem_d; + } + + private: + std::vector dims_; + std::vector axis_; + std::vector logical_axis_; + mkldnn::engine engine_; +}; + template class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -48,11 +112,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { auto nchw_tz = paddle::framework::vectorize(input->dims()); - const std::string key = - platform::CreateKey(dev_ctx, nchw_tz, ctx.OutputName("Out")); - - platform::TransposeMKLDNNHandler handler(nchw_tz, axis, dev_ctx, - mkldnn_engine, key); + TransposeMKLDNNHandler handler(nchw_tz, axis, mkldnn_engine); auto transpose_src_memory_p = handler.AcquireSrcMemory( input->format(), platform::to_void_cast(input_data)); @@ -103,11 +163,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto nchw_tz = paddle::framework::vectorize(out_grad->dims()); - const std::string key = platform::CreateKey( - dev_ctx, nchw_tz, ctx.OutputName(framework::GradVarName("X"))); - - platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx, - mkldnn_engine, key); + TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, mkldnn_engine); auto transpose_src_memory_p = handler.AcquireSrcMemory( out_grad->format(), platform::to_void_cast(out_grad_data)); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 95b8e0c610b1d4..d4a504c131b684 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1072,98 +1072,6 @@ class ActivationMKLDNNHandler } }; -template -class TransposeMKLDNNHandler : public MKLDNNHandler { - public: - TransposeMKLDNNHandler(std::vector& dims, // NOLINT - std::vector& axis, // NOLINT - const platform::MKLDNNDeviceContext& dev_ctx, - mkldnn::engine engine, const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - dims_(dims), - axis_(axis), - logical_axis_(dims.size(), 0) {} - - std::shared_ptr AcquireSrcMemory( - const MKLDNNMemoryFormat& fmt, void* ptr) { - auto local_key = key_ + "@user_src_mem_p"; - auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - // Make memory descriptor using input format, unless it - // cannot be trusted (nchw) then make up memory fmt manually - for (size_t i = 0; i < logical_axis_.size(); ++i) { - logical_axis_[i] = i; - } - - auto src_md = fmt != MKLDNNMemoryFormat::nchw - ? platform::MKLDNNMemDesc( - dims_, platform::MKLDNNGetDataType(), fmt) - : Axis2MemoryDesc(dims_, logical_axis_); - mem_p = std::make_shared(src_md, engine_, ptr); - dev_ctx_.SetBlob(local_key, mem_p); - } else { - mem_p->set_data_handle(ptr); - } - return mem_p; - } - - std::shared_ptr AcquireDstMemory(framework::Tensor* output, - platform::Place place) { - auto local_key = key_ + "@user_dst_mem_p"; - auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - auto dst_md = Axis2MemoryDesc(dims_, axis_); - - auto dst_data = output->mutable_data(place, dst_md.get_size()); - - mem_p = std::make_shared(dst_md, engine_, dst_data); - dev_ctx_.SetBlob(local_key, mem_p); - } else { - auto dst_data = output->mutable_data(place); - mem_p->set_data_handle(dst_data); - } - return mem_p; - } - - std::shared_ptr AcquireTranspose( - std::shared_ptr dst_memory_p, - std::shared_ptr src_memory_p) { - auto prim_key = key_ + "@transpose_p"; - auto transpose_p = - std::static_pointer_cast(dev_ctx_.GetBlob(prim_key)); - if (transpose_p == nullptr) { - transpose_p = - std::make_shared(*(src_memory_p), *(dst_memory_p)); - dev_ctx_.SetBlob(prim_key, transpose_p); - } - return transpose_p; - } - - protected: - mkldnn::memory::desc Axis2MemoryDesc(std::vector& nchw_tz, // NOLINT - std::vector& axis // NOLINT - ) { - size_t ndims = axis.size(); - - std::vector strides(ndims); - unsigned int total_stride = 1; - for (int i = ndims - 1; i >= 0; --i) { - strides[axis[i]] = total_stride; - total_stride *= nchw_tz[axis[i]]; - } - mkldnn::memory::desc mem_d(nchw_tz, platform::MKLDNNGetDataType(), - strides); - - return mem_d; - } - - private: - std::vector dims_; - std::vector axis_; - std::vector logical_axis_; -}; class ReorderMKLDNNHandler : public MKLDNNHandler { public: From f53bf6e3f2229e7c36af6f73e404e08c8e200bff Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Wed, 11 Aug 2021 17:21:29 +0200 Subject: [PATCH 2/3] - LRN with disabled cache --- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 136 +++++++++--------- 1 file changed, 64 insertions(+), 72 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 5b563e666af0aa..8a89499e4b5744 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -21,86 +21,78 @@ using paddle::framework::Tensor; using paddle::platform::MKLDNNDeviceContext; template -class LRNMKLDNNHandler : public platform::MKLDNNHandlerT { +class LRNMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { public: LRNMKLDNNHandler(const framework::ExecutionContext& ctx, - const MKLDNNDeviceContext& dev_ctx, const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const Tensor* input, - const std::string& unique_name) - - : platform::MKLDNNHandlerT( - dev_ctx, mkldnn_engine, cpu_place, - platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), - unique_name)) { - if (!this->isCached()) { - const int n = ctx.Attr("n"); - // MKL-DNN implements LRN in a caffe way: - // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html - // Where sum of squares is divided by size of normalization window - // this is not the case for PaddlePaddle LRN. - // Hence we need to compensate for this diffrence by - // multipliing alpha by size of window(n) - const float alpha = ctx.Attr("alpha") * static_cast(n); - const float beta = ctx.Attr("beta"); - const float k = ctx.Attr("k"); - bool is_test = ctx.Attr("is_test"); - - auto dims = framework::vectorize(input->dims()); - - auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), - input->format()); - - this->AcquireForwardPrimitiveDescriptor( - is_test ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training, - mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); - } + platform::Place cpu_place, const Tensor* input) + + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { + const int n = ctx.Attr("n"); + // MKL-DNN implements LRN in a caffe way: + // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html + // Where sum of squares is divided by size of normalization window + // this is not the case for PaddlePaddle LRN. + // Hence we need to compensate for this diffrence by + // multipliing alpha by size of window(n) + const float alpha = ctx.Attr("alpha") * static_cast(n); + const float beta = ctx.Attr("beta"); + const float k = ctx.Attr("k"); + bool is_test = ctx.Attr("is_test"); + + auto dims = framework::vectorize(input->dims()); + + auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), + input->format()); + + this->AcquireForwardPrimitiveDescriptor( + is_test ? mkldnn::prop_kind::forward_inference + : mkldnn::prop_kind::forward_training, + mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); } LRNMKLDNNHandler(const framework::ExecutionContext& ctx, - const MKLDNNDeviceContext& dev_ctx, + const mkldnn::engine mkldnn_engine, platform::Place cpu_place, const Tensor* in_x, - const Tensor* out_grad, Tensor* in_x_grad, - const std::string& unique_name) - : platform::MKLDNNHandlerT( - dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, framework::vectorize(in_x->dims()), - unique_name)) { - if (!this->isBwdCached()) { - PADDLE_ENFORCE_EQ( - ctx.Attr("is_test"), false, - platform::errors::PreconditionNotMet( - "is_test attribute should be set to False in training phase.")); - - const int n = ctx.Attr("n"); - const float alpha = ctx.Attr("alpha") * static_cast(n); - const float beta = ctx.Attr("beta"); - const float k = ctx.Attr("k"); - - auto dims = framework::vectorize(in_x->dims()); - - auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), - in_x->format()); - auto diff_md = mkldnn::memory::desc( - dims, platform::MKLDNNGetDataType(), out_grad->format()); - - this->AcquireForwardPrimitiveDescriptor( - mkldnn::prop_kind::forward_training, - mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); - - this->AcquireBackwardPrimitiveDescriptor( - mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, - beta, k); - } + const Tensor* out_grad, Tensor* in_x_grad) + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { + PADDLE_ENFORCE_EQ( + ctx.Attr("is_test"), false, + platform::errors::PreconditionNotMet( + "is_test attribute should be set to False in training phase.")); + + const int n = ctx.Attr("n"); + const float alpha = ctx.Attr("alpha") * static_cast(n); + const float beta = ctx.Attr("beta"); + const float k = ctx.Attr("k"); + + auto dims = framework::vectorize(in_x->dims()); + + auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), + in_x->format()); + auto diff_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), + out_grad->format()); + + this->AcquireForwardPrimitiveDescriptor( + mkldnn::prop_kind::forward_training, + mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); + + this->AcquireBackwardPrimitiveDescriptor( + mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta, + k); } std::shared_ptr AcquireWorkspaceMemory(Tensor* workspace) { T* ptr = workspace->mutable_data( this->place_, this->fwd_pd_->workspace_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(), - ptr, "@wrk_mem_p"); + ptr); } std::shared_ptr AcquireBackwardWorkspaceMemory( @@ -108,7 +100,7 @@ class LRNMKLDNNHandler : public platform::MKLDNNHandlerTdata(); return this->AcquireMemoryFromPrimitive( this->fwd_pd_->workspace_desc(), - platform::to_void_cast(workspace_data), "@bwd-wrk_mem_p"); + platform::to_void_cast(workspace_data)); } }; @@ -131,8 +123,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { auto out = ctx.Output("Out"); auto mid = ctx.Output("MidOut"); - LRNMKLDNNHandler handler(ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, - ctx.OutputName("Out")); + LRNMKLDNNHandler handler(ctx, mkldnn_engine, ctx.GetPlace(), x); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(out); @@ -178,9 +169,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto in_x_grad = ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); - LRNMKLDNNHandler handler(ctx, dev_ctx, ctx.GetPlace(), in_x, out_grad, - in_x_grad, ctx.InputName("Out")); + LRNMKLDNNHandler handler(ctx, mkldnn_engine, ctx.GetPlace(), in_x, + out_grad, in_x_grad); auto src_memory = handler.AcquireSrcMemory(in_x); auto workspace = handler.AcquireBackwardWorkspaceMemory(mid); From c62e05f5751b1429424961852fab2dca682e871b Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Mon, 16 Aug 2021 10:40:18 +0200 Subject: [PATCH 3/3] lint fixes --- paddle/fluid/platform/mkldnn_reuse.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index d4a504c131b684..0b7e96a25477b9 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1072,7 +1072,6 @@ class ActivationMKLDNNHandler } }; - class ReorderMKLDNNHandler : public MKLDNNHandler { public: ReorderMKLDNNHandler(std::vector& dims, // NOLINT