From e9ccd4daaf1a6433b98e88e3825465c39f29720c Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 14:53:42 +0200 Subject: [PATCH 01/18] - concat refactoring draft --- .../operators/mkldnn/concat_mkldnn_op.cc | 223 +++++------------- 1 file changed, 64 insertions(+), 159 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 8901c0afb369ad..d381b626cb6ec7 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -29,6 +29,62 @@ using mkldnn::concat; using mkldnn::stream; using platform::to_void_cast; +template +class ConcatMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { + public: + ConcatMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, + const mkldnn::engine mkldnn_engine, + const std::vector inputs, Tensor* output) + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + ctx.GetPlace()) { + int concat_axis = ctx.Attr("axis"); + const int rank = multi_input[0]->dims().size(); + PADDLE_ENFORCE_EQ( + concat_axis >= -rank && concat_axis < rank, true, + platform::errors::InvalidArgument( + "The axis is expected to be in range of [%d, %d), but got %d", + -rank, rank, concat_axis)); + + if (ctx.HasInput("AxisTensor")) { + auto* axis_tensor = ctx.Input("AxisTensor"); + concat_axis = GetDataFromTensor(axis_tensor)[0]; + auto out_dims = multi_input[0]->dims(); + for (size_t i = 1; i < multi_input.size(); ++i) { + out_dims[concat_axis] += multi_input[i]->dims()[concat_axis]; + } + output->Resize(out_dims); + } + + if (concat_axis < 0) { + concat_axis = concat_axis + rank; + } + + memory::data_type dt = + paddle::framework::ToMKLDNNDataType(multi_input[0]->type()); + std::vector srcs_md(multi_input.size()); + + // Create memory descriptors for each of inputs + const auto dims = + paddle::framework::vectorize(multi_input[0].dims()); + for (size_t i = 0; i < multi_input.size(); i++) { + srcs_md.emplace_back(memory::desc(dims, dt, multi_input[i].format())); + } + + auto dst_dims = paddle::framework::vectorize(output->dims()); + auto dst_md = memory::desc(dst_dims, dt, MKLDNNMemoryFormat::any); + + this->AcquireForwardPrimitiveDescriptor(dst_md, concat_axis, srcs_md); + } + + std::shared_ptr AcquireSrcMemory( + const framework::Tensor& input, int i) { + const T* input_data = input.data(); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), + to_void_cast(input_data)); + } +}; + static void EnforceLayouts(const std::vector inputs) { for (auto* input : inputs) { PADDLE_ENFORCE_EQ( @@ -40,28 +96,6 @@ static void EnforceLayouts(const std::vector inputs) { } } -static memory::desc CreateMemDesc(const Tensor& input, - const memory::data_type& dt) { - const auto dims = paddle::framework::vectorize(input.dims()); - const auto format = input.format(); - auto mem_desc = memory::desc(dims, dt, format); - return mem_desc; -} - -static platform::CPUPlace GetCpuPlace( - const paddle::framework::ExecutionContext& ctx) { - auto place = ctx.GetPlace(); - PADDLE_ENFORCE(paddle::platform::is_cpu_place(place), - platform::errors::InvalidArgument("It must use CPUPlace.")); - return BOOST_GET_CONST(platform::CPUPlace, place); -} - -static const mkldnn::engine& GetMKLDNNEngine( - const paddle::framework::ExecutionContext& ctx) { - auto& dev_ctx = ctx.template device_context(); - return dev_ctx.GetEngine(); -} - // From a multi-input, gather only nonempty inputs static const std::vector ReduceMultiInput( const std::vector& inputs) { @@ -72,159 +106,30 @@ static const std::vector ReduceMultiInput( return reduced; } -static const std::vector GetDimsForKey( - const std::vector& inputs) { - auto dims_key = paddle::framework::vectorize(inputs[0]->dims()); - for (auto it = std::next(inputs.begin()); it != inputs.end(); ++it) { - dims_key.push_back((*it)->dims()[0]); - } - return dims_key; -} - -template -class ConcatPrimitiveFactory { - public: - concat::primitive_desc CreateConcatPrimDescriptor( - const std::vector multi_input, Tensor* output, - int concat_axis, const mkldnn::engine& mkldnn_engine, - const memory::data_type& dt = memory::data_type::f32) { - CreateSourcesDescriptors(multi_input, mkldnn_engine, dt); - auto dst_desc = CreateDstMemDescriptor(output, dt); - return concat::primitive_desc(dst_desc, concat_axis, srcs_d, mkldnn_engine); - } - - concat CreateConcatPrimitive(const concat::primitive_desc& concat_pd, - Tensor* output, platform::CPUPlace place, - const mkldnn::engine& mkldnn_engine) { - dst_mem = mkldnn::memory( - concat_pd.dst_desc(), mkldnn_engine, - output->mutable_data(place, concat_pd.dst_desc().get_size())); - - return concat(concat_pd); - } - - void SetSrcDataHandleByIndex(const std::vector& srcs, const size_t& i, - void* handler) { - srcs[i].set_data_handle(handler); - } - - void SetDstDataHandle(const memory& dst_mem, void* handler) { - dst_mem.set_data_handle(handler); - } - - std::vector GetSrcs() { return srcs; } - - memory GetDst() { return dst_mem.get(); } - - private: - memory::desc CreateDstMemDescriptor(Tensor* output, - const memory::data_type& dt) { - auto dst_dims = paddle::framework::vectorize(output->dims()); - return memory::desc(dst_dims, dt, MKLDNNMemoryFormat::any); - } - - void CreateSourcesDescriptors(const std::vector multi_input, - const mkldnn::engine& mkldnn_engine, - const memory::data_type& dt) { - for (size_t i = 0; i < multi_input.size(); i++) { - auto mem_desc = CreateMemDesc(*multi_input[i], dt); - srcs_d.push_back(mem_desc); - srcs.push_back(memory(mem_desc, mkldnn_engine, - to_void_cast(multi_input[i]->data()))); - } - } - - private: - std::vector srcs_d; - std::vector srcs; - paddle::optional dst_mem; -}; - template class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { public: void Compute(const paddle::framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); // If any of the multiple inputs of concat has an input size of 0, the // actual size of the multi_input will change auto multi_input = ReduceMultiInput(ctx.MultiInput("X")); EnforceLayouts(multi_input); Tensor* output = ctx.Output("Out"); - int concat_axis = ctx.Attr("axis"); - const int rank = multi_input[0]->dims().size(); - PADDLE_ENFORCE_EQ( - concat_axis >= -rank && concat_axis < rank, true, - platform::errors::InvalidArgument( - "The axis is expected to be in range of [%d, %d), but got %d", - -rank, rank, concat_axis)); - platform::MKLDNNDeviceContext::tls().log_lib_version(); - if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); - concat_axis = GetDataFromTensor(axis_tensor)[0]; - auto out_dims = multi_input[0]->dims(); - for (size_t i = 1; i < multi_input.size(); ++i) { - out_dims[concat_axis] += multi_input[i]->dims()[concat_axis]; - } - output->Resize(out_dims); - } + ConcatMKLDNNHandler handler(ctx, mkldnn_engine, multi_input, output); - if (concat_axis < 0) { - concat_axis = concat_axis + rank; - } - auto& dev_ctx = - ctx.template device_context(); - auto place = GetCpuPlace(ctx); - - memory::data_type dt = - paddle::framework::ToMKLDNNDataType(multi_input[0]->type()); + std::shared_ptr> srcs(multi_input.size()); - ConcatPrimitiveFactory prim_creator; - std::string key = - platform::CreateKey(dev_ctx, GetDimsForKey(multi_input), - multi_input.size(), ctx.OutputName("Out"), dt); - key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key); - - const std::string key_prim = key + "@concat_p"; - const std::string key_concat_pd = key + "@concat_pd"; - const std::string key_srcs = key + "@concat_srcs"; - const std::string key_dst = key + "@concat_dst"; - - std::shared_ptr concat_pd; - std::shared_ptr> srcs; - std::shared_ptr dst_mem; - auto concat_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); - - const auto& mkldnn_engine = dev_ctx.GetEngine(); - if (concat_p == nullptr) { - concat_pd = std::make_shared( - prim_creator.CreateConcatPrimDescriptor( - multi_input, output, concat_axis, mkldnn_engine, dt)); - concat_p = std::make_shared(prim_creator.CreateConcatPrimitive( - *concat_pd, output, place, mkldnn_engine)); - srcs = std::make_shared>(prim_creator.GetSrcs()); - dst_mem = std::make_shared(prim_creator.GetDst()); - dev_ctx.SetBlob(key_prim, concat_p); - dev_ctx.SetBlob(key_concat_pd, concat_pd); - dev_ctx.SetBlob(key_srcs, srcs); - dev_ctx.SetBlob(key_dst, dst_mem); - } else { - srcs = std::static_pointer_cast>( - dev_ctx.GetBlob(key_srcs)); - dst_mem = std::static_pointer_cast(dev_ctx.GetBlob(key_dst)); - concat_pd = std::static_pointer_cast( - dev_ctx.GetBlob(key_concat_pd)); - for (size_t i = 0; i < multi_input.size(); i++) { - prim_creator.SetSrcDataHandleByIndex( - *srcs, i, to_void_cast(multi_input[i]->data())); - } - prim_creator.SetDstDataHandle( - *dst_mem, - output->mutable_data(place, concat_pd->dst_desc().get_size())); - } + auto dst_mem = handler.AcquireDstMemory(output); + auto concat_p = handler.AcquireForwardPrimitive(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { + srcs[i] = handler.AcquireSrcMemory(multi_input[i], i); args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, (*srcs).at(i)}); } args.insert({MKLDNN_ARG_DST, *dst_mem}); From 3e28487761f71cf5a5551f6b946746478ba7bad0 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:05:06 +0200 Subject: [PATCH 02/18] - cmpilation fixes --- .../operators/mkldnn/concat_mkldnn_op.cc | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index d381b626cb6ec7..60867e664ec8fb 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -39,7 +39,7 @@ class ConcatMKLDNNHandler : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { int concat_axis = ctx.Attr("axis"); - const int rank = multi_input[0]->dims().size(); + const int rank = inputs[0]->dims().size(); PADDLE_ENFORCE_EQ( concat_axis >= -rank && concat_axis < rank, true, platform::errors::InvalidArgument( @@ -49,9 +49,9 @@ class ConcatMKLDNNHandler if (ctx.HasInput("AxisTensor")) { auto* axis_tensor = ctx.Input("AxisTensor"); concat_axis = GetDataFromTensor(axis_tensor)[0]; - auto out_dims = multi_input[0]->dims(); - for (size_t i = 1; i < multi_input.size(); ++i) { - out_dims[concat_axis] += multi_input[i]->dims()[concat_axis]; + auto out_dims = inputs[0]->dims(); + for (size_t i = 1; i < inputs.size(); ++i) { + out_dims[concat_axis] += inputs[i]->dims()[concat_axis]; } output->Resize(out_dims); } @@ -61,14 +61,13 @@ class ConcatMKLDNNHandler } memory::data_type dt = - paddle::framework::ToMKLDNNDataType(multi_input[0]->type()); - std::vector srcs_md(multi_input.size()); + paddle::framework::ToMKLDNNDataType(inputs[0]->type()); + std::vector srcs_md(inputs.size()); // Create memory descriptors for each of inputs - const auto dims = - paddle::framework::vectorize(multi_input[0].dims()); - for (size_t i = 0; i < multi_input.size(); i++) { - srcs_md.emplace_back(memory::desc(dims, dt, multi_input[i].format())); + const auto dims = paddle::framework::vectorize(inputs[0].dims()); + for (size_t i = 0; i < inputs.size(); i++) { + srcs_md.emplace_back(memory::desc(dims, dt, inputs[i].format())); } auto dst_dims = paddle::framework::vectorize(output->dims()); From 0d3c0caa614add2153dc85279ce293f5a4c076d3 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:24:54 +0200 Subject: [PATCH 03/18] - yet another compilation fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 60867e664ec8fb..ea4b8a80ecae08 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -65,9 +65,9 @@ class ConcatMKLDNNHandler std::vector srcs_md(inputs.size()); // Create memory descriptors for each of inputs - const auto dims = paddle::framework::vectorize(inputs[0].dims()); - for (size_t i = 0; i < inputs.size(); i++) { - srcs_md.emplace_back(memory::desc(dims, dt, inputs[i].format())); + const auto dims = paddle::framework::vectorize(inputs[0]->dims()); + for (size_t i = 0; i < inputs->size(); i++) { + srcs_md.emplace_back(memory::desc(dims, dt, inputs[i]->format())); } auto dst_dims = paddle::framework::vectorize(output->dims()); From 64404bb006daa2718fd0693c84423bdeeb3855a6 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:26:47 +0200 Subject: [PATCH 04/18] - fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index ea4b8a80ecae08..f393ea60d0a5bd 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -66,7 +66,7 @@ class ConcatMKLDNNHandler // Create memory descriptors for each of inputs const auto dims = paddle::framework::vectorize(inputs[0]->dims()); - for (size_t i = 0; i < inputs->size(); i++) { + for (size_t i = 0; i < inputs.size(); i++) { srcs_md.emplace_back(memory::desc(dims, dt, inputs[i]->format())); } From 886939de83f06328cfa7830067d9f870d05a41a3 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:28:53 +0200 Subject: [PATCH 05/18] - compilation fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index f393ea60d0a5bd..976c46e1836648 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -128,7 +128,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { - srcs[i] = handler.AcquireSrcMemory(multi_input[i], i); + srcs.push_back(handler.AcquireSrcMemory(multi_input[i], i)); args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, (*srcs).at(i)}); } args.insert({MKLDNN_ARG_DST, *dst_mem}); From be5686d21ed20ae7dbdff9992bd2a0c4b4896e4d Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:31:09 +0200 Subject: [PATCH 06/18] - fixes to compilation --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 976c46e1836648..29343d5629bf56 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -120,7 +120,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { ConcatMKLDNNHandler handler(ctx, mkldnn_engine, multi_input, output); - std::shared_ptr> srcs(multi_input.size()); + std::vector> srcs(multi_input.size()); auto dst_mem = handler.AcquireDstMemory(output); auto concat_p = handler.AcquireForwardPrimitive(); From 09e9d01dc7c9e6ed068f3cd5b67eb5359cbd816b Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:33:03 +0200 Subject: [PATCH 07/18] - another compilation fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 29343d5629bf56..ea6156e74ec8cb 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -129,7 +129,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { srcs.push_back(handler.AcquireSrcMemory(multi_input[i], i)); - args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, (*srcs).at(i)}); + args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); } args.insert({MKLDNN_ARG_DST, *dst_mem}); From 0750402e47ae3a0aa3c219cefff4aa810ede6065 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:35:29 +0200 Subject: [PATCH 08/18] - fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index ea6156e74ec8cb..5903650236480f 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -128,7 +128,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { - srcs.push_back(handler.AcquireSrcMemory(multi_input[i], i)); + srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i)); args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); } args.insert({MKLDNN_ARG_DST, *dst_mem}); From 85a056a37ee38cdf3d915328ad4098d1f60badc9 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:41:37 +0200 Subject: [PATCH 09/18] - Added overloaded AcquirePrimitiveDesc for concat --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 5903650236480f..855edfa4ba5138 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -76,6 +76,15 @@ class ConcatMKLDNNHandler this->AcquireForwardPrimitiveDescriptor(dst_md, concat_axis, srcs_md); } + // (jczaja) concat oneDNN prim is not having .desc attribute so + // we cannot use base AcquireForwardPrimitiveDescriptor + void AcquireForwardPrimitiveDescriptor( + const mkldnn::memory::desc& dst_md, const int concat_axis, + const std::vector& srcs_md) { + this->fwd_pd_.reset(new dnnl::sum::primitive_desc(dst_md, concat_axis, + srcs_md, this->engine_)); + } + std::shared_ptr AcquireSrcMemory( const framework::Tensor& input, int i) { const T* input_data = input.data(); From 9bcbc44374ef2c8935159d5c5c29fc624391d733 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:42:55 +0200 Subject: [PATCH 10/18] - fix --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 855edfa4ba5138..f1493490f0d9c5 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -81,7 +81,7 @@ class ConcatMKLDNNHandler void AcquireForwardPrimitiveDescriptor( const mkldnn::memory::desc& dst_md, const int concat_axis, const std::vector& srcs_md) { - this->fwd_pd_.reset(new dnnl::sum::primitive_desc(dst_md, concat_axis, + this->fwd_pd_.reset(new dnnl::concat::primitive_desc(dst_md, concat_axis, srcs_md, this->engine_)); } From be97ba942241ae490b336556c100fd985319e37c Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 15:50:09 +0200 Subject: [PATCH 11/18] - reserve introduced --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index f1493490f0d9c5..da1bad48885463 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -62,7 +62,8 @@ class ConcatMKLDNNHandler memory::data_type dt = paddle::framework::ToMKLDNNDataType(inputs[0]->type()); - std::vector srcs_md(inputs.size()); + std::vector srcs_md; + srcs_md.resize(inputs.size()); // Create memory descriptors for each of inputs const auto dims = paddle::framework::vectorize(inputs[0]->dims()); @@ -81,8 +82,8 @@ class ConcatMKLDNNHandler void AcquireForwardPrimitiveDescriptor( const mkldnn::memory::desc& dst_md, const int concat_axis, const std::vector& srcs_md) { - this->fwd_pd_.reset(new dnnl::concat::primitive_desc(dst_md, concat_axis, - srcs_md, this->engine_)); + this->fwd_pd_.reset(new dnnl::concat::primitive_desc( + dst_md, concat_axis, srcs_md, this->engine_)); } std::shared_ptr AcquireSrcMemory( @@ -107,7 +108,8 @@ static void EnforceLayouts(const std::vector inputs) { // From a multi-input, gather only nonempty inputs static const std::vector ReduceMultiInput( const std::vector& inputs) { - std::vector reduced(inputs.size()); + std::vector reduced; + reduced.resize(inputs.size()); auto end_it = std::copy_if(inputs.begin(), inputs.end(), reduced.begin(), [](const Tensor* t) { return t->numel() > 0; }); reduced.resize(std::distance(reduced.begin(), end_it)); @@ -129,7 +131,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { ConcatMKLDNNHandler handler(ctx, mkldnn_engine, multi_input, output); - std::vector> srcs(multi_input.size()); + std::vector> srcs; + srcs.reserve(multi_input.size()); auto dst_mem = handler.AcquireDstMemory(output); auto concat_p = handler.AcquireForwardPrimitive(); From 753b29290d9d438457c519115128864902ae9fa6 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 16:10:19 +0200 Subject: [PATCH 12/18] - UT fixes --- .../fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py index 4f3dece5be3422..4900b42d3618d1 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py @@ -87,4 +87,6 @@ def init_kernel_type(self): if __name__ == '__main__': + from paddle import enable_static + enable_static() unittest.main() From ef3341365defb0c82dd95a56479103f7a7af8dc9 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 16:16:02 +0200 Subject: [PATCH 13/18] - test concat int8 improved --- .../fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py index ca15ea2aaf9750..ef2fa1c1cc268e 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py @@ -122,4 +122,6 @@ def init_shape(self): create_test_int8_class(TestConcatOp2) if __name__ == '__main__': + from paddle import enable_static + enable_static() unittest.main() From e273d29b09092ba70dc7e4c5856ddd0ed3b4890e Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 16:46:16 +0200 Subject: [PATCH 14/18] - fixes --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index da1bad48885463..79857ab9681686 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -63,7 +63,7 @@ class ConcatMKLDNNHandler memory::data_type dt = paddle::framework::ToMKLDNNDataType(inputs[0]->type()); std::vector srcs_md; - srcs_md.resize(inputs.size()); + srcs_md.reserve(inputs.size()); // Create memory descriptors for each of inputs const auto dims = paddle::framework::vectorize(inputs[0]->dims()); @@ -109,7 +109,7 @@ static void EnforceLayouts(const std::vector inputs) { static const std::vector ReduceMultiInput( const std::vector& inputs) { std::vector reduced; - reduced.resize(inputs.size()); + reduced.reserve(inputs.size()); auto end_it = std::copy_if(inputs.begin(), inputs.end(), reduced.begin(), [](const Tensor* t) { return t->numel() > 0; }); reduced.resize(std::distance(reduced.begin(), end_it)); From 7861573e95eb49c90910fe66c70c9c1d98ef1625 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Aug 2021 17:02:24 +0200 Subject: [PATCH 15/18] - fix to crash --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 79857ab9681686..cc16a2ddab6bb5 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -108,8 +108,7 @@ static void EnforceLayouts(const std::vector inputs) { // From a multi-input, gather only nonempty inputs static const std::vector ReduceMultiInput( const std::vector& inputs) { - std::vector reduced; - reduced.reserve(inputs.size()); + std::vector reduced(inputs.size()); auto end_it = std::copy_if(inputs.begin(), inputs.end(), reduced.begin(), [](const Tensor* t) { return t->numel() > 0; }); reduced.resize(std::distance(reduced.begin(), end_it)); From f393f0c8ae797faef404eb528f4f57d2f974ae4a Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Wed, 18 Aug 2021 16:27:15 +0200 Subject: [PATCH 16/18] - lint fixes --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index cc16a2ddab6bb5..d582a8fdb7b976 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -35,7 +35,7 @@ class ConcatMKLDNNHandler public: ConcatMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const mkldnn::engine mkldnn_engine, - const std::vector inputs, Tensor* output) + const std::vector& inputs, Tensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { int concat_axis = ctx.Attr("axis"); @@ -66,8 +66,9 @@ class ConcatMKLDNNHandler srcs_md.reserve(inputs.size()); // Create memory descriptors for each of inputs - const auto dims = paddle::framework::vectorize(inputs[0]->dims()); for (size_t i = 0; i < inputs.size(); i++) { + const auto dims = + paddle::framework::vectorize(inputs[i]->dims()); srcs_md.emplace_back(memory::desc(dims, dt, inputs[i]->format())); } From 6832c49b0a51219cfbb86084c4ef6d5764626834 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Thu, 19 Aug 2021 14:45:26 +0200 Subject: [PATCH 17/18] - fixes after review --- .../operators/mkldnn/concat_mkldnn_op.cc | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index d582a8fdb7b976..ae30fe67787518 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -33,7 +33,7 @@ template class ConcatMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: - ConcatMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, + ConcatMKLDNNHandler(const framework::ExecutionContext& ctx, const mkldnn::engine mkldnn_engine, const std::vector& inputs, Tensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, @@ -60,19 +60,17 @@ class ConcatMKLDNNHandler concat_axis = concat_axis + rank; } - memory::data_type dt = - paddle::framework::ToMKLDNNDataType(inputs[0]->type()); + memory::data_type dt = framework::ToMKLDNNDataType(inputs[0]->type()); std::vector srcs_md; srcs_md.reserve(inputs.size()); // Create memory descriptors for each of inputs - for (size_t i = 0; i < inputs.size(); i++) { - const auto dims = - paddle::framework::vectorize(inputs[i]->dims()); + for (size_t i = 0; i < inputs.size(); ++i) { + const auto dims = framework::vectorize(inputs[i]->dims()); srcs_md.emplace_back(memory::desc(dims, dt, inputs[i]->format())); } - auto dst_dims = paddle::framework::vectorize(output->dims()); + auto dst_dims = framework::vectorize(output->dims()); auto dst_md = memory::desc(dst_dims, dt, MKLDNNMemoryFormat::any); this->AcquireForwardPrimitiveDescriptor(dst_md, concat_axis, srcs_md); @@ -81,14 +79,13 @@ class ConcatMKLDNNHandler // (jczaja) concat oneDNN prim is not having .desc attribute so // we cannot use base AcquireForwardPrimitiveDescriptor void AcquireForwardPrimitiveDescriptor( - const mkldnn::memory::desc& dst_md, const int concat_axis, - const std::vector& srcs_md) { + const memory::desc& dst_md, const int concat_axis, + const std::vector& srcs_md) { this->fwd_pd_.reset(new dnnl::concat::primitive_desc( dst_md, concat_axis, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory( - const framework::Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); From 4ad092a1a4b32e20ac87a520eb416e2d2501351e Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Thu, 19 Aug 2021 14:52:39 +0200 Subject: [PATCH 18/18] - some other fixes from review --- paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index ae30fe67787518..57a56776736ff9 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -118,7 +118,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { public: void Compute(const paddle::framework::ExecutionContext& ctx) const override { auto& dev_ctx = - ctx.template device_context(); + ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); // If any of the multiple inputs of concat has an input size of 0, the // actual size of the multi_input will change