Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added log
Binary file not shown.
6 changes: 2 additions & 4 deletions paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,9 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,

if ((in_format != out_format) || always_copy) {
void* in_data = GetDataFromTensor(in, in_type);
std::string key =
platform::CreateKey(*dev_ctx, in_tz, in_format, out_format, in_type);

platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, *dev_ctx,
cpu_engine, key);
platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
cpu_engine);

auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
auto reorder_dst_memory_p =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {

auto tz = paddle::framework::vectorize<int64_t>(dout->dims());
memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
std::string key = platform::CreateKey(dev_ctx, tz, dout->format(),
dout->format(), dout_type);
platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, dev_ctx,
onednn_engine, key);
platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
onednn_engine);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
auto reorder_src_memory_p = handler.AcquireSrcMemory(
Expand Down
8 changes: 3 additions & 5 deletions paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,9 @@ class CastMKLDNNKernel : public framework::OpKernel<T> {

auto x_tz = framework::vectorize(x->dims());

std::string key =
platform::CreateKey(dev_ctx, x_tz, x->format(), x->format(), x_type);
platform::ReorderMKLDNNHandler reorder_handler(
x_tz, x_paddle_type, x_type, out_paddle_type, out_type, dev_ctx,
dev_ctx.GetEngine(), key);
platform::ReorderMKLDNNHandler reorder_handler(x_tz, x_paddle_type, x_type,
out_paddle_type, out_type,
dev_ctx.GetEngine());

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
x->format(), platform::to_void_cast(x->data<T>()));
Expand Down
8 changes: 2 additions & 6 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1125,12 +1125,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
mkldnn::memory::format_tag out_format =
weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw
: mkldnn::memory::format_tag::goihw;
std::string key = platform::CreateKey(dev_ctx, weights_tz, filter_fmt,
out_format, in_type);
key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);

platform::ReorderMKLDNNHandler handler(
weights_tz, filter->type(), in_type, dev_ctx, mkldnn_engine, key);
platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(),
in_type, mkldnn_engine);
auto reorder_dst_memory_p =
handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace());

Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
if (dout_vec_dims == dx_vec_dims) {
mkldnn::memory::data_type dout_type =
paddle::framework::ToMKLDNNDataType(dout->type());
std::string key = paddle::platform::CreateKey(
dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type);
paddle::platform::ReorderMKLDNNHandler reorder_handler(
dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
dout_vec_dims, dout->type(), dout_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
dout->format(), paddle::platform::to_void_cast(dout->data<T>()));
Expand Down
5 changes: 1 addition & 4 deletions paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,8 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx,

memory::data_type input_type =
paddle::framework::ToMKLDNNDataType(input->type());
std::string key = paddle::platform::CreateKey(
dev_ctx, input_dims, input->format(), input->format(), input_type);
paddle::platform::ReorderMKLDNNHandler reorder_handler(
output_dims, input->type(), input_type, dev_ctx, dev_ctx.GetEngine(),
key);
output_dims, input->type(), input_type, dev_ctx.GetEngine());

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
memory::format_tag::abc,
Expand Down
13 changes: 4 additions & 9 deletions paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
}

mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
std::string key =
platform::CreateKey(dev_ctx, x_vec_dims, x->format(), x_type);
platform::ReorderMKLDNNHandler reorder_handler(
x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
x_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
x->format(), platform::to_void_cast(x->data<T>()));
Expand Down Expand Up @@ -253,11 +251,8 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T> {

mkldnn::memory::data_type dout_type =
framework::ToMKLDNNDataType(dout->type());
std::string key =
platform::CreateKey(dev_ctx, dout_vec_dims, this->getPlainFormatTag(dx),
dx->format(), dout_type);
platform::ReorderMKLDNNHandler reorder_handler(
dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
dout_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
dout->format(), platform::to_void_cast(dout->data<T>()));
Expand Down
17 changes: 6 additions & 11 deletions paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,16 @@ class SliceMKLDNNKernel : public framework::OpKernel<T> {
out->Resize(framework::make_ddim(slice_dims));

mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
auto key = platform::CreateKey(dev_ctx, x_vec_dims, axes, starts, ends,
x->format(), x_type);

platform::ReorderMKLDNNHandler reorder_handler(
x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
x_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
x->format(), platform::to_void_cast(x->data<T>()));
auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
reorder_src_memory_p);
auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
out, slice_dims, 0, get_plain_format_tag(x), ctx.GetPlace());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, revert it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad when resolving conflicts. Good catch! Thanks

out, slice_dims, get_plain_format_tag(x), ctx.GetPlace());

auto reorder_p =
reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);
Expand Down Expand Up @@ -201,16 +199,13 @@ class SliceGradMKLDNNKernel : public framework::OpKernel<T> {
mkldnn::memory::format_tag reorder_format_tag =
platform::GetMKLDNNFormat(md.reshape(slice_dims));

auto key = platform::CreateKey(dev_ctx, dout_vec_dims, axes, starts, ends,
reorder_format_tag, dout_type);

platform::ReorderMKLDNNHandler reorder_handler(
slice_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(),
dout_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
reorder_format_tag, platform::to_void_cast(dout->data<T>()));
auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
dx, dx_vec_dims, 0, reorder_format_tag, ctx.GetPlace());
dx, dx_vec_dims, reorder_format_tag, ctx.GetPlace());
memset(dx->data<T>(), 0, reorder_dst_memory_p->get_desc().get_size());

auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
Expand Down
14 changes: 6 additions & 8 deletions paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,27 +91,25 @@ class SplitMKLDNNKernel : public framework::OpKernel<T> {
auto x_vec_dims = framework::vectorize(x_dims);

mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
auto key = platform::CreateKey(dev_ctx, x_vec_dims, axis, num, sections,
x->format(), x_type);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

std::vector<int64_t> offset(x_vec_dims.size(), 0);

platform::ReorderMKLDNNHandler reorder_handler(
x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
x_type, onednn_engine);
auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
x->format(), platform::to_void_cast(x->data<T>()));

for (size_t i = 0; i < outs_number; ++i) {
auto out_vec_dims = framework::vectorize(outs[i]->dims());
auto slice_mem_p = reorder_handler.AcquireSubmemory(
out_vec_dims, offset, reorder_src_memory_p, i);
auto slice_mem_p = reorder_handler.AcquireSubmemory(out_vec_dims, offset,
reorder_src_memory_p);

auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
outs[i], out_vec_dims, i, x->format(), ctx.GetPlace());
outs[i], out_vec_dims, x->format(), ctx.GetPlace());
auto reorder_p =
reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p, i);
reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);

reorder_p->execute(astream, *slice_mem_p, *reorder_dst_memory_p);

Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// For in-place execution which sum does not have we need to fake it
// so from oneDNN dst memory we reorder data into input
if (in_place) {
const std::string reorder_key =
platform::CreateKey(dev_ctx, framework::vectorize(output->dims()),
ctx.OutputName("Out") + "-I");

auto& in_out = in_vars[0]->Get<framework::LoDTensor>();
auto output_tz = framework::vectorize<int64_t>(output->dims());
platform::ReorderMKLDNNHandler reorder_handler(
output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()),
dev_ctx, dev_ctx.GetEngine(), reorder_key);
dev_ctx.GetEngine());

auto target_mem = reorder_handler.AcquireDstMemory(
output, in_out.format(), ctx.GetPlace());
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
if (input_dims == output_dims) {
mkldnn::memory::data_type input_type =
framework::ToMKLDNNDataType(input->type());
std::string key = platform::CreateKey(
dev_ctx, input_dims, input->format(), input->format(), input_type);
platform::ReorderMKLDNNHandler reorder_handler(
input_dims, input->type(), input_type, dev_ctx, onednn_engine, key);
platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(),
input_type, onednn_engine);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
input->format(), platform::to_void_cast(input->data<T>()));
Expand Down
113 changes: 24 additions & 89 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -1071,138 +1071,73 @@ class ActivationMKLDNNHandler
}
};

class ReorderMKLDNNHandler : public MKLDNNHandler {
class ReorderMKLDNNHandler {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would encourage you to extract this class to separate file. It's not a template so it would make a binary size smaller and compilation faster.

public:
ReorderMKLDNNHandler(std::vector<int64_t>& dims, // NOLINT
framework::proto::VarType::Type vtype,
mkldnn::memory::data_type dtype,
const platform::MKLDNNDeviceContext& dev_ctx,
mkldnn::engine engine, const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
dims_(dims),
mkldnn::memory::data_type dtype, mkldnn::engine engine)
: dims_(dims),
vtype_(vtype),
vtype_dst_(vtype),
dtype_(dtype),
dtype_dst_(dtype) {}
dtype_dst_(dtype),
engine_(engine) {}

ReorderMKLDNNHandler(std::vector<int64_t>& dims, // NOLINT
framework::proto::VarType::Type vtype,
mkldnn::memory::data_type dtype,
framework::proto::VarType::Type vtype_dst,
mkldnn::memory::data_type dtype_dst,
const platform::MKLDNNDeviceContext& dev_ctx,
mkldnn::engine engine, const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
dims_(dims),
mkldnn::engine engine)
: dims_(dims),
vtype_(vtype),
vtype_dst_(vtype_dst),
dtype_(dtype),
dtype_dst_(dtype_dst) {}
dtype_dst_(dtype_dst),
engine_(engine) {}

std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
const MKLDNNMemoryFormat& fmt, void* ptr) {
return this->AcquireMemory(dims_, dtype_, fmt, ptr, "@user_src_mem_p");
auto md = mkldnn::memory::desc(dims_, dtype_, fmt);
return std::make_shared<mkldnn::memory>(md, engine_, ptr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're no longer caching those dnnl::memory objects then why returning them wrapped in shared_ptr? Why not just returning the plain object?

}

std::shared_ptr<mkldnn::memory> AcquireSubmemory(
const std::vector<int64_t>& dims, const std::vector<int64_t>& offset,
const std::shared_ptr<mkldnn::memory>& mem_p, int submemory_number = 0) {
std::string local_key = key_;
local_key.append("@submem")
.append(std::to_string(submemory_number))
.append("_p");

auto sub_mem_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
if (sub_mem_p == nullptr) {
auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
mem_p->get_data_handle());
dev_ctx_.SetBlob(local_key, sub_mem_p);
} else {
sub_mem_p->set_data_handle(mem_p->get_data_handle());
}
const std::shared_ptr<mkldnn::memory>& mem_p) {
auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
auto sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
mem_p->get_data_handle());
return sub_mem_p;
}

std::shared_ptr<mkldnn::memory> AcquireDstMemory(
framework::Tensor* output, const MKLDNNMemoryFormat& fmt,
platform::Place place) {
auto local_key = key_ + "@user_dst_mem_p";
auto mem_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
if (mem_p == nullptr) {
auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
auto dst_data =
output->mutable_data(place, vtype_dst_, dst_md.get_size());

mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
dev_ctx_.SetBlob(local_key, mem_p);
} else {
// Even if memory object exists , we may be using it for diffrent tensor
auto dst_data =
output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
mem_p->set_data_handle(dst_data);
}
return mem_p;
auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
}

std::shared_ptr<mkldnn::memory> AcquireDstMemory(
framework::Tensor* output, const std::vector<int64_t>& dims,
const int memory_number, const MKLDNNMemoryFormat& fmt,
platform::Place place) {
auto local_key =
key_ + "@user_dst_mem" + std::to_string(memory_number) + "_p";
auto mem_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
if (mem_p == nullptr) {
auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
auto dst_data =
output->mutable_data(place, vtype_dst_, dst_md.get_size());

mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
dev_ctx_.SetBlob(local_key, mem_p);
} else {
// Even if memory object exists , we may be using it for diffrent tensor
auto dst_data =
output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
mem_p->set_data_handle(dst_data);
}
return mem_p;
}

std::shared_ptr<mkldnn::reorder> AcquireReorder(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> src_memory_p, int reorder_number) {
auto prim_key = key_ + "@reorder" + std::to_string(reorder_number) + "_p";
auto reorder_p =
std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
if (reorder_p == nullptr) {
reorder_p =
std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
dev_ctx_.SetBlob(prim_key, reorder_p);
}
return reorder_p;
const MKLDNNMemoryFormat& fmt, platform::Place place) {
auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the above overload of this function you use this class member dims_, but here you use the parameter dims Why this discrepancy? Why can you create the ReorderMKLDNNHandler class object with a dims passed in constructor and then use it with different dims? Why then store output dims as a member?

auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
}

std::shared_ptr<mkldnn::reorder> AcquireReorder(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> src_memory_p) {
auto prim_key = key_ + "@reorder_p";
auto reorder_p =
std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
if (reorder_p == nullptr) {
reorder_p =
std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
dev_ctx_.SetBlob(prim_key, reorder_p);
}
return reorder_p;
return std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
}

private:
std::vector<int64_t> dims_;
framework::proto::VarType::Type vtype_, vtype_dst_;
mkldnn::memory::data_type dtype_, dtype_dst_;
Comment on lines 1138 to 1139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the benefit of storing both vtype_ and dtype_? Why not just vtype_ since we can easily convert VarType::Type to oneDNN type?

mkldnn::engine engine_;
};

template <typename T>
Expand Down