From 1ad247846327a79c63fa1e02c4e9bb8514f96aaa Mon Sep 17 00:00:00 2001 From: wozna Date: Wed, 2 Jun 2021 13:23:04 +0200 Subject: [PATCH 1/6] Small changes related to BF16 fusion_gru and fusion_lstm --- paddle/fluid/framework/ir/CMakeLists.txt | 2 + paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 9 +- .../framework/ir/fc_gru_fuse_pass_tester.cc | 71 +------------- .../framework/ir/fc_gru_fuse_pass_tester.h | 93 ++++++++++++++++++ .../fluid/framework/ir/fc_lstm_fuse_pass.cc | 8 +- .../framework/ir/fc_lstm_fuse_pass_tester.cc | 71 +------------- .../framework/ir/fc_lstm_fuse_pass_tester.h | 96 +++++++++++++++++++ .../framework/ir/graph_pattern_detector.cc | 10 +- .../mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc | 70 ++++++++++++++ .../analyzer_lexical_analysis_gru_tester.cc | 1 - .../fluid/operators/fused/fusion_lstm_op.cc | 5 + .../mkldnn/test_fusion_gru_bf16_mkldnn_op.py | 6 +- .../mkldnn/test_fusion_gru_int8_mkldnn_op.py | 2 + .../mkldnn/test_fusion_lstm_bf16_mkldnn_op.py | 6 +- .../mkldnn/test_fusion_lstm_int8_mkldnn_op.py | 2 + 15 files changed, 304 insertions(+), 148 deletions(-) create mode 100644 paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h create mode 100644 paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h create mode 100644 paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index fb478bb6e8978e..8202f9ecc6173c 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -188,4 +188,6 @@ endif() cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) cc_test(test_multi_gru_fuse_pass SRCS mkldnn/multi_gru_fuse_pass_tester.cc DEPS multi_gru_fuse_pass) cc_test(test_multi_gru_seq_fuse_pass SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc DEPS multi_gru_seq_fuse_pass) + set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass mkldnn_placement_pass) + cc_test(test_fc_rnn_mkldnn_fuse_pass SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc DEPS ${TEST_FC_RNN_PASS_DEPS}) endif () diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index b1c62d40d4d7c7..ad3b8deba42b93 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -48,7 +48,8 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, // Create New OpDesc auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, - Node* bias, Node* hidden, Node* fc_bias) { + Node* bias, Node* hidden, Node* fc_bias, + const bool& use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_gru"); @@ -67,6 +68,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru->Op()->GetAttrIfExists("origin_mode")); // TODO(TJ): This should be a option for infer op_desc.SetAttr("use_seq", true); + op_desc.SetAttr("use_mkldnn", use_mkldnn); op_desc.SetAttr("activation", gru->Op()->GetAttr("activation")); op_desc.SetAttr("gate_activation", gru->Op()->GetAttr("gate_activation")); @@ -149,6 +151,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True."; return; } + const bool use_mkldnn = mul->Op()->GetAttrIfExists("use_mkldnn"); if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); @@ -156,14 +159,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); - gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias); + gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, elementwise_add, fc_out, mul_out, BatchGate, BatchResetHiddenPrev, BatchHidden}); GraphSafeRemoveNodes(graph, marked_nodes); } else { - gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr); + gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden}); diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc index 70351b8aafffa1..6ec47fae26a932 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc @@ -12,77 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" - -#include -#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h" namespace paddle { namespace framework { namespace ir { -void AddVarToScope(Scope* param_scope, const std::string& name, - const DDim& dims) { - auto* tensor = param_scope->Var(name)->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); -} - -Scope* CreateParamScope() { - auto param_scope = new Scope(); - AddVarToScope(param_scope, "gru_fc_w", {}); - AddVarToScope(param_scope, "gru_fc_b", {}); - AddVarToScope(param_scope, "gru_w", {}); - AddVarToScope(param_scope, "gru_b", {}); - AddVarToScope(param_scope, "gru_batch_gate_0", {}); - AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {}); - AddVarToScope(param_scope, "gru_batch_hidden_0", {}); - AddVarToScope(param_scope, "gru_hidden_0", {}); - AddVarToScope(param_scope, "gru_batch_gate_1", {}); - AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {}); - AddVarToScope(param_scope, "gru_batch_hidden_1", {}); - AddVarToScope(param_scope, "gru_hidden_1", {}); - return param_scope; -} - -TEST(FCFusePass, basic) { - // inputs operator output - // -------------------------------------------------------- - // (a, gru_fc_w) mul -> fc_0_tmp_0 - // (fc_0_tmp_0, gru_fc_b) elementwise_add -> fc_0_tmp_1 - // (fc_0_tmp_1,gru_w,gru_b gru -> gru_out_0 - - // (b, gru_fc_w) mul -> fc_1_tmp_0 - // (fc_1_tmp_0, gru_fc_b) elementwise_add -> fc_1_tmp_1 - // (fc_1_tmp_1,gru_w,gru_b) gru -> gru_out_1 - Layers layers; - auto* a = layers.data("a"); - auto* b = layers.data("b"); - auto* fc_w = layers.data("gru_fc_w", {}, true); - auto* fc_b = layers.data("gru_fc_b", {}, true); - auto* gru_w = layers.data("gru_w", {}, true); - auto* gru_b = layers.data("gru_b", {}, true); - auto* fc_0_tmp0 = layers.mul(a, fc_w); - auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); - auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false); - auto* gru_batch_reset_hidden_prev_0 = - layers.data("gru_batch_reset_hidden_prev_0", {}, false); - auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false); - auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false); - layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0, - gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0); - - auto* fc_1_tmp0 = layers.mul(b, fc_w); - auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); - auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false); - auto* gru_batch_reset_hidden_prev_1 = - layers.data("gru_batch_reset_hidden_prev_1", {}, false); - auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false); - auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false); - layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1, - gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1); - - std::unique_ptr graph(new ir::Graph(layers.main_program())); +namespace fc_gru_test { +TEST(FcGruFusePass, basic) { + std::unique_ptr graph = PrepareGraph(); auto pass = PassRegistry::Instance().Get("fc_gru_fuse_pass"); pass->Set("use_gpu", new bool(true)); graph->Set("__param_scope__", CreateParamScope()); @@ -109,6 +47,7 @@ TEST(FCFusePass, basic) { "expectations after fuse")); } +} // namespace fc_gru_test } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h new file mode 100644 index 00000000000000..b9288318fcfea6 --- /dev/null +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h @@ -0,0 +1,93 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" + +#include +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace fc_gru_test { +void AddVarToScope(Scope* param_scope, const std::string& name, + const DDim& dims) { + auto* tensor = param_scope->Var(name)->GetMutable(); + tensor->Resize(dims); + tensor->mutable_data(platform::CPUPlace()); +} + +Scope* CreateParamScope() { + auto param_scope = new Scope(); + AddVarToScope(param_scope, "gru_fc_w", {}); + AddVarToScope(param_scope, "gru_fc_b", {}); + AddVarToScope(param_scope, "gru_w", {}); + AddVarToScope(param_scope, "gru_b", {}); + AddVarToScope(param_scope, "gru_batch_gate_0", {}); + AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {}); + AddVarToScope(param_scope, "gru_batch_hidden_0", {}); + AddVarToScope(param_scope, "gru_hidden_0", {}); + AddVarToScope(param_scope, "gru_batch_gate_1", {}); + AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {}); + AddVarToScope(param_scope, "gru_batch_hidden_1", {}); + AddVarToScope(param_scope, "gru_hidden_1", {}); + return param_scope; +} + +std::unique_ptr PrepareGraph() { + // inputs operator output + // -------------------------------------------------------- + // (a, gru_fc_w) mul -> fc_0_tmp_0 + // (fc_0_tmp_0, gru_fc_b) elementwise_add -> fc_0_tmp_1 + // (fc_0_tmp_1,gru_w,gru_b gru -> gru_out_0 + + // (b, gru_fc_w) mul -> fc_1_tmp_0 + // (fc_1_tmp_0, gru_fc_b) elementwise_add -> fc_1_tmp_1 + // (fc_1_tmp_1,gru_w,gru_b) gru -> gru_out_1 + Layers layers; + auto* a = layers.data("a"); + auto* b = layers.data("b"); + auto* fc_w = layers.data("gru_fc_w", {}, true); + auto* fc_b = layers.data("gru_fc_b", {}, true); + auto* gru_w = layers.data("gru_w", {}, true); + auto* gru_b = layers.data("gru_b", {}, true); + auto* fc_0_tmp0 = layers.mul(a, fc_w); + auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); + auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false); + auto* gru_batch_reset_hidden_prev_0 = + layers.data("gru_batch_reset_hidden_prev_0", {}, false); + auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false); + auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false); + layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0, + gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0); + + auto* fc_1_tmp0 = layers.mul(b, fc_w); + auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); + auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false); + auto* gru_batch_reset_hidden_prev_1 = + layers.data("gru_batch_reset_hidden_prev_1", {}, false); + auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false); + auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false); + layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1, + gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1); + + std::unique_ptr graph(new ir::Graph(layers.main_program())); + return std::move(graph); +} +} // namespace fc_gru_test +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 1c1289124506ab..1823df615665b2 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, // Create New OpDesc auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x, Node* weight_h, Node* bias, Node* hidden, Node* cell, - Node* xx, Node* fc_bias) { + Node* xx, Node* fc_bias, const bool& use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_lstm"); #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()}); @@ -88,6 +88,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, op_desc.SetOutput("XX", {xx->Name()}); op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse")); op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes")); + op_desc.SetAttr("use_mkldnn", use_mkldnn); // TODO(TJ): get from attr op_desc.SetAttr("use_seq", true); @@ -148,13 +149,14 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); + const bool use_mkldnn = mul->Op()->GetAttrIfExists("use_mkldnn"); if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, - fc_bias); + fc_bias, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, lstm, elementwise_add, mul_out, BatchGate, BatchCellPreAct}); @@ -162,7 +164,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, } else { GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern); lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, - nullptr); + nullptr, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, lstm, BatchGate, BatchCellPreAct}); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc index 0de8d4684fecd4..92de86e52bc0a5 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc @@ -12,77 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" - -#include -#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" namespace paddle { namespace framework { namespace ir { -void AddVarToScope(Scope* param_scope, const std::string& name, - const DDim& dims) { - auto* tensor = param_scope->Var(name)->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); -} - -Scope* CreateParamScope() { - auto param_scope = new Scope(); - AddVarToScope(param_scope, "lstm_fc_w", {}); - AddVarToScope(param_scope, "lstm_fc_b", {}); - AddVarToScope(param_scope, "lstm_w", {}); - AddVarToScope(param_scope, "lstm_b", {}); - AddVarToScope(param_scope, "lstm_cell_0", {}); - AddVarToScope(param_scope, "lstm_batch_gate_0", {}); - AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {}); - AddVarToScope(param_scope, "lstm_hidden_0", {}); - AddVarToScope(param_scope, "lstm_cell_1", {}); - AddVarToScope(param_scope, "lstm_batch_gate_1", {}); - AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {}); - AddVarToScope(param_scope, "lstm_hidden_1", {}); - return param_scope; -} - -TEST(FCLSTMFusePass, basic) { - // inputs operator output - // -------------------------------------------------------- - // (a, lstm_fc_w) mul -> fc_0_tmp_0 - // (fc_0_tmp_0, lstm_fc_b) elementwise_add -> fc_0_tmp_1 - // fc_0_tmp_1,lstm_w,lstm_b lstm -> lstm_out_0 - - // (b, lstm_fc_w) mul -> fc_1_tmp_0 - // (fc_1_tmp_0, lstm_fc_b) elementwise_add -> fc_1_tmp_1 - // (fc_1_tmp_1,lstm_w,lstm_b) lstm -> lstm_out_1 - Layers layers; - auto* a = layers.data("a"); - auto* b = layers.data("b"); - auto* fc_w = layers.data("lstm_fc_w", {}, true); - auto* fc_b = layers.data("lstm_fc_b", {}, true); - auto* lstm_w = layers.data("lstm_w", {}, true); - auto* lstm_b = layers.data("lstm_b", {}, true); - auto* fc_0_tmp0 = layers.mul(a, fc_w); - auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); - auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false); - auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false); - auto* lstm_batch_cell_pre_gate_0 = - layers.data("lstm_batch_cell_pre_gate_0", {}, false); - auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false); - layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0, - lstm_hidden_0, lstm_batch_cell_pre_gate_0); +namespace fc_lstm_test { - auto* fc_1_tmp0 = layers.mul(b, fc_w); - auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); - auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false); - auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false); - auto* lstm_batch_cell_pre_gate_1 = - layers.data("lstm_batch_cell_pre_gate_1", {}, false); - auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false); - layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1, - lstm_hidden_1, lstm_batch_cell_pre_gate_1); - - std::unique_ptr graph(new ir::Graph(layers.main_program())); +TEST(FcLstmFusePass, basic) { + std::unique_ptr graph = PrepareGraph(); auto pass = PassRegistry::Instance().Get("fc_lstm_fuse_pass"); pass->Set("use_gpu", new bool(false)); graph->Set("__param_scope__", CreateParamScope()); @@ -108,7 +47,7 @@ TEST(FCLSTMFusePass, basic) { "The number of fusion_gru nodes does " "not meet expectations after fuse")); } - +} // namespace fc_lstm_test } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h new file mode 100644 index 00000000000000..9c1e064e7380bf --- /dev/null +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h @@ -0,0 +1,96 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" + +#include +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace fc_lstm_test { + +void AddVarToScope(Scope* param_scope, const std::string& name, + const DDim& dims) { + auto* tensor = param_scope->Var(name)->GetMutable(); + tensor->Resize(dims); + tensor->mutable_data(platform::CPUPlace()); +} + +Scope* CreateParamScope() { + auto param_scope = new Scope(); + AddVarToScope(param_scope, "lstm_fc_w", {}); + AddVarToScope(param_scope, "lstm_fc_b", {}); + AddVarToScope(param_scope, "lstm_w", {}); + AddVarToScope(param_scope, "lstm_b", {}); + AddVarToScope(param_scope, "lstm_cell_0", {}); + AddVarToScope(param_scope, "lstm_batch_gate_0", {}); + AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {}); + AddVarToScope(param_scope, "lstm_hidden_0", {}); + AddVarToScope(param_scope, "lstm_cell_1", {}); + AddVarToScope(param_scope, "lstm_batch_gate_1", {}); + AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {}); + AddVarToScope(param_scope, "lstm_hidden_1", {}); + return param_scope; +} + +std::unique_ptr PrepareGraph() { + // inputs operator output + // -------------------------------------------------------- + // (a, lstm_fc_w) mul -> fc_0_tmp_0 + // (fc_0_tmp_0, lstm_fc_b) elementwise_add -> fc_0_tmp_1 + // fc_0_tmp_1,lstm_w,lstm_b lstm -> lstm_out_0 + + // (b, lstm_fc_w) mul -> fc_1_tmp_0 + // (fc_1_tmp_0, lstm_fc_b) elementwise_add -> fc_1_tmp_1 + // (fc_1_tmp_1,lstm_w,lstm_b) lstm -> lstm_out_1 + Layers layers; + auto* a = layers.data("a"); + auto* b = layers.data("b"); + auto* fc_w = layers.data("lstm_fc_w", {}, true); + auto* fc_b = layers.data("lstm_fc_b", {}, true); + auto* lstm_w = layers.data("lstm_w", {}, true); + auto* lstm_b = layers.data("lstm_b", {}, true); + auto* fc_0_tmp0 = layers.mul(a, fc_w); + auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); + auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false); + auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false); + auto* lstm_batch_cell_pre_gate_0 = + layers.data("lstm_batch_cell_pre_gate_0", {}, false); + auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false); + layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0, + lstm_hidden_0, lstm_batch_cell_pre_gate_0); + + auto* fc_1_tmp0 = layers.mul(b, fc_w); + auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); + auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false); + auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false); + auto* lstm_batch_cell_pre_gate_1 = + layers.data("lstm_batch_cell_pre_gate_1", {}, false); + auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false); + layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1, + lstm_hidden_1, lstm_batch_cell_pre_gate_1); + + std::unique_ptr graph(new ir::Graph(layers.main_program())); + return std::move(graph); +} + +} // namespace fc_lstm_test +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 064da3d941602e..3476ce8610ee34 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2262,11 +2262,11 @@ PDNode *patterns::QuantizePlacement::operator()( PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { std::unordered_set supported_op_types = - std::unordered_set({"concat", "conv2d", "conv2d_transpose", - "elementwise_add", "elementwise_mul", - "fc", "fusion_gru", "gelu", "layer_norm", - "matmul", "pool2d", "relu", "reshape2", - "softmax", "sum", "transpose2"}); + std::unordered_set( + {"concat", "conv2d", "conv2d_transpose", "elementwise_add", + "elementwise_mul", "fc", "fusion_gru", "fusion_lstm", "gelu", + "layer_norm", "matmul", "pool2d", "relu", "reshape2", "softmax", + "sum", "transpose2"}); if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc new file mode 100644 index 00000000000000..2feb9312a9483a --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h" +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +void TestFcRNNFusePass(const std::string& pass_name) { + std::unique_ptr graph = + (pass_name == "fc_gru_fuse_pass" ? fc_gru_test::PrepareGraph() + : fc_lstm_test::PrepareGraph()); + auto mkldnn_placement_pass_ = + PassRegistry::Instance().Get("mkldnn_placement_pass"); + mkldnn_placement_pass_->Set("mkldnn_enabled_op_types", + new std::unordered_set({})); + graph->Set("__param_scope__", (pass_name == "fc_gru_fuse_pass" + ? fc_gru_test::CreateParamScope() + : fc_lstm_test::CreateParamScope())); + graph.reset(mkldnn_placement_pass_->Apply(graph.release())); + + auto check_num_mkldnn_nodes = [&](const std::unique_ptr& graph) { + int nodes_cout = 0; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->GetAttrIfExists("use_mkldnn")) nodes_cout++; + } + } + return nodes_cout; + }; + int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph); + + auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name); + graph.reset(fc_rnn_fuse_pass_->Apply(graph.release())); + int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph); + + PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before, num_mkldnn_nodes_after, + platform::errors::PreconditionNotMet( + "The number of nodes with \"use_mkldnn\" attr before " + "and after the fuse are not equal")); +} + +TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); } + +TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); } + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(mkldnn_placement_pass); +USE_PASS(fc_gru_fuse_pass); +USE_PASS(fc_lstm_fuse_pass); diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc index 024313837e0b63..720c90090cf746 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc @@ -38,7 +38,6 @@ void SetAnalysisConfig(AnalysisConfig *cfg, cfg->SwitchSpecifyInputNames(false); cfg->SetCpuMathLibraryNumThreads(num_threads); cfg->EnableMKLDNN(); - cfg->pass_builder()->AppendPass("mkldnn_placement_pass"); } std::vector ReadSentenceLod(std::ifstream &file, size_t offset, diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 6cca6b5a9729a7..42bf784b2af4fb 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -249,6 +249,11 @@ void FusionLSTMOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddAttr("Scale_data", "Scale to be used for int8 input/output data." "Only used with MKL-DNN INT8.") diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py index 7320efd259f459..fa9a93452dffde 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py @@ -27,7 +27,7 @@ "place does not support BF16 evaluation") class TestFusionGRUBF16MKLDNNOp(OpTest): def set_confs(self): - self.mkldnn_data_type = False + pass def test_check_output(self): for use_seq in {True, False}: @@ -48,6 +48,7 @@ def setUp(self): self.act_gate = 'sigmoid' self.origin_mode = False self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" self.force_fp32_output = False self.weights_dtype = 'fp32' self.set_confs() @@ -113,7 +114,8 @@ def setUp(self): 'is_reverse': self.is_reverse, 'origin_mode': self.origin_mode, 'force_fp32_output': self.force_fp32_output, - 'use_mkldnn': self.use_mkldnn + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py index 2d3caf0be97c95..4fda51e9e05f48 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py @@ -35,6 +35,7 @@ def setUp(self): self.act_gate = 'sigmoid' self.origin_mode = True self.use_mkldnn = True + self.mkldnn_data_type = "int8" self.force_fp32_output = True self.error_margin = 1e-5 self.set_confs() @@ -115,6 +116,7 @@ def setUp(self): 'is_reverse': self.is_reverse, 'origin_mode': self.origin_mode, 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, 'force_fp32_output': self.force_fp32_output, 'Scale_data': scale_data, 'Shift_data': shift_data, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py index d65919aa434c38..d07eda3259960c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py @@ -27,7 +27,7 @@ "place does not support BF16 evaluation") class TestFusionLSTMBF16ONEDNNOp(OpTest): def set_confs(self): - self.mkldnn_data_type = False + pass def test_check_output(self): for use_seq in {True, False}: @@ -48,6 +48,7 @@ def setUp(self): self.act_cell = 'tanh' self.act_cand = 'tanh' self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" self.force_fp32_output = False self.weights_dtype = 'fp32' self.set_confs() @@ -130,7 +131,8 @@ def setUp(self): 'cell_activation': self.act_cell, 'candidate_activation': self.act_cand, 'force_fp32_output': self.force_fp32_output, - 'use_mkldnn': self.use_mkldnn + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py index 93dc45f2650f53..12f8c01783d9c3 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py @@ -34,6 +34,7 @@ def setUp(self): self.act_cand = 'tanh' self.use_peepholes = False # LSTM u8 doesn't support peepholes self.use_mkldnn = True + self.mkldnn_data_type = "int8" self.force_fp32_output = False self.error_margin = 1e-5 self.set_confs() @@ -117,6 +118,7 @@ def setUp(self): 'is_reverse': self.is_reverse, 'use_peepholes': self.use_peepholes, 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, 'force_fp32_output': self.force_fp32_output, 'Scale_data': scale_data, 'Shift_data': shift_data, From ad5c8b063f896103846cdb8f142b3366e09a0854 Mon Sep 17 00:00:00 2001 From: wozna Date: Wed, 2 Jun 2021 14:21:17 +0200 Subject: [PATCH 2/6] Correct to pass arg by value --- paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 2 +- paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index ad3b8deba42b93..a650bb00638066 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -49,7 +49,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, // Create New OpDesc auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, Node* bias, Node* hidden, Node* fc_bias, - const bool& use_mkldnn) { + const bool use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_gru"); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 1823df615665b2..5c5e6840bd02c9 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, // Create New OpDesc auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x, Node* weight_h, Node* bias, Node* hidden, Node* cell, - Node* xx, Node* fc_bias, const bool& use_mkldnn) { + Node* xx, Node* fc_bias, const bool use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_lstm"); #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()}); From 419d486856c012da28144908935d0fe1e881cfe1 Mon Sep 17 00:00:00 2001 From: wozna Date: Mon, 7 Jun 2021 10:59:26 +0200 Subject: [PATCH 3/6] Add conditions to rnn op --- paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 5 ++++- paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index a650bb00638066..eba8399eef4259 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -151,7 +151,10 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True."; return; } - const bool use_mkldnn = mul->Op()->GetAttrIfExists("use_mkldnn"); + const bool use_mkldnn = + mul->Op()->GetAttrIfExists("use_mkldnn") && + gru->Op()->GetAttrIfExists("activation") == "tahn" && + gru->Op()->GetAttrIfExists("gate_activation") == "sigmoid"; if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 5c5e6840bd02c9..154ca2e45f012e 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -149,7 +149,14 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); - const bool use_mkldnn = mul->Op()->GetAttrIfExists("use_mkldnn"); + const bool use_mkldnn = + mul->Op()->GetAttrIfExists("use_mkldnn") && + lstm->Op()->GetAttrIfExists("gate_activation") == + "sigmoid" && + lstm->Op()->GetAttrIfExists("cell_activation") == "tahn" && + lstm->Op()->GetAttrIfExists("candidate_activation") == + "tahn"; + if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); From 0c1ba3b4edd84bbad6c1b5585a498008d9836cb5 Mon Sep 17 00:00:00 2001 From: wozna Date: Wed, 9 Jun 2021 15:59:59 +0200 Subject: [PATCH 4/6] Correct the spelling mistake --- paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index eba8399eef4259..5345d0cae953f8 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -47,7 +47,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru_pattern(fc_out); // Create New OpDesc - auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, + auto gru_creator = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, Node* bias, Node* hidden, Node* fc_bias, const bool use_mkldnn) { OpDesc op_desc; @@ -162,14 +162,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); - gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn); + gru_creator(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, elementwise_add, fc_out, mul_out, BatchGate, BatchResetHiddenPrev, BatchHidden}); GraphSafeRemoveNodes(graph, marked_nodes); } else { - gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn); + gru_creator(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden}); From e552ac31c61dde17f0d39db6c94b05b1e0fb9376 Mon Sep 17 00:00:00 2001 From: wozna Date: Thu, 10 Jun 2021 12:12:54 +0200 Subject: [PATCH 5/6] Improving the test with checking activation --- paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 7 ++-- .../framework/ir/fc_gru_fuse_pass_tester.h | 9 +++-- .../fluid/framework/ir/fc_lstm_fuse_pass.cc | 13 ++++---- .../framework/ir/fc_lstm_fuse_pass_tester.h | 12 ++++--- .../mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc | 33 +++++++++++++++---- .../fluid/framework/ir/pass_tester_helper.h | 10 ++++-- 6 files changed, 59 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index 5345d0cae953f8..921e1ea513961d 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -152,9 +152,10 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, return; } const bool use_mkldnn = - mul->Op()->GetAttrIfExists("use_mkldnn") && - gru->Op()->GetAttrIfExists("activation") == "tahn" && - gru->Op()->GetAttrIfExists("gate_activation") == "sigmoid"; + (mul->Op()->GetAttrIfExists("use_mkldnn") && + gru->Op()->GetAttrIfExists("activation") == "tanh" && + gru->Op()->GetAttrIfExists("gate_activation") == + "sigmoid"); if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h index b9288318fcfea6..a862755d604e44 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h @@ -47,7 +47,8 @@ Scope* CreateParamScope() { return param_scope; } -std::unique_ptr PrepareGraph() { +std::unique_ptr PrepareGraph( + std::string activation = "tanh", std::string gate_activation = "sigmoid") { // inputs operator output // -------------------------------------------------------- // (a, gru_fc_w) mul -> fc_0_tmp_0 @@ -72,7 +73,8 @@ std::unique_ptr PrepareGraph() { auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false); auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false); layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0, - gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0); + gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0, + nullptr, false, false, activation, gate_activation); auto* fc_1_tmp0 = layers.mul(b, fc_w); auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); @@ -82,7 +84,8 @@ std::unique_ptr PrepareGraph() { auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false); auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false); layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1, - gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1); + gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1, + nullptr, false, false, activation, gate_activation); std::unique_ptr graph(new ir::Graph(layers.main_program())); return std::move(graph); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 154ca2e45f012e..6bd956ef0d53c9 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -150,12 +150,13 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); const bool use_mkldnn = - mul->Op()->GetAttrIfExists("use_mkldnn") && - lstm->Op()->GetAttrIfExists("gate_activation") == - "sigmoid" && - lstm->Op()->GetAttrIfExists("cell_activation") == "tahn" && - lstm->Op()->GetAttrIfExists("candidate_activation") == - "tahn"; + (mul->Op()->GetAttrIfExists("use_mkldnn") && + lstm->Op()->GetAttrIfExists("gate_activation") == + "sigmoid" && + lstm->Op()->GetAttrIfExists("cell_activation") == + "tanh" && + lstm->Op()->GetAttrIfExists("candidate_activation") == + "tanh"); if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h index 9c1e064e7380bf..f681a2b7ff8eb0 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h @@ -49,7 +49,10 @@ Scope* CreateParamScope() { return param_scope; } -std::unique_ptr PrepareGraph() { +std::unique_ptr PrepareGraph( + std::string gate_activation = "sigmoid", + std::string cell_activation = "tanh", + std::string candidate_activation = "tanh") { // inputs operator output // -------------------------------------------------------- // (a, lstm_fc_w) mul -> fc_0_tmp_0 @@ -74,8 +77,8 @@ std::unique_ptr PrepareGraph() { layers.data("lstm_batch_cell_pre_gate_0", {}, false); auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false); layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0, - lstm_hidden_0, lstm_batch_cell_pre_gate_0); - + lstm_hidden_0, lstm_batch_cell_pre_gate_0, nullptr, nullptr, true, + false, gate_activation, cell_activation, candidate_activation); auto* fc_1_tmp0 = layers.mul(b, fc_w); auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false); @@ -84,7 +87,8 @@ std::unique_ptr PrepareGraph() { layers.data("lstm_batch_cell_pre_gate_1", {}, false); auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false); layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1, - lstm_hidden_1, lstm_batch_cell_pre_gate_1); + lstm_hidden_1, lstm_batch_cell_pre_gate_1, nullptr, nullptr, true, + false, gate_activation, cell_activation, candidate_activation); std::unique_ptr graph(new ir::Graph(layers.main_program())); return std::move(graph); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc index 2feb9312a9483a..c4770a322db50c 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc @@ -22,10 +22,15 @@ namespace paddle { namespace framework { namespace ir { -void TestFcRNNFusePass(const std::string& pass_name) { +void TestFcRNNFusePass(const std::string& pass_name, + std::string activation = "tanh", + std::string gate_activation = "sigmoid", + std::string candidate_activation = "tanh") { std::unique_ptr graph = - (pass_name == "fc_gru_fuse_pass" ? fc_gru_test::PrepareGraph() - : fc_lstm_test::PrepareGraph()); + (pass_name == "fc_gru_fuse_pass" + ? fc_gru_test::PrepareGraph(activation, gate_activation) + : fc_lstm_test::PrepareGraph(gate_activation, activation, + candidate_activation)); auto mkldnn_placement_pass_ = PassRegistry::Instance().Get("mkldnn_placement_pass"); mkldnn_placement_pass_->Set("mkldnn_enabled_op_types", @@ -46,21 +51,37 @@ void TestFcRNNFusePass(const std::string& pass_name) { return nodes_cout; }; int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph); + int removed_mkldnn_nodes = 2; + + // OneDNN fusion_gru and fusion_lstm supports only sigmoid as a gate + // activation and tanh as an activation and candidate_activation + if (activation != "tanh" || gate_activation != "sigmoid" || + candidate_activation != "tanh") + removed_mkldnn_nodes += 2; auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name); graph.reset(fc_rnn_fuse_pass_->Apply(graph.release())); int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph); - PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before, num_mkldnn_nodes_after, + PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before - removed_mkldnn_nodes, + num_mkldnn_nodes_after, platform::errors::PreconditionNotMet( - "The number of nodes with \"use_mkldnn\" attr before " - "and after the fuse are not equal")); + "The number of nodes with \"use_mkldnn\" attr after " + "passes is not as expected")); } TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); } +TEST(FcGruFusePass, gru_unsupported_activations) { + TestFcRNNFusePass("fc_gru_fuse_pass", "relu", "sigmoid"); +} + TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); } +TEST(FcLstmFusePass, lstm_unsupported_activations) { + TestFcRNNFusePass("fc_lstm_fuse_pass", "tanh", "relu", "tanh"); +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h index 6b187e538d1c08..70ee714cba457c 100644 --- a/paddle/fluid/framework/ir/pass_tester_helper.h +++ b/paddle/fluid/framework/ir/pass_tester_helper.h @@ -194,14 +194,18 @@ struct Layers { } VarDesc* mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr, - int x_num_col_dims = 1) { + int x_num_col_dims = 1, bool use_mkldnn = false) { AttributeMap attrs; attrs["x_num_col_dims"] = 1; + attrs["use_mkldnn"] = use_mkldnn; return binary_op("mul", x, y, out, &attrs); } - VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr) { - return binary_op("elementwise_add", x, y, out); + VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr, + bool use_mkldnn = false) { + AttributeMap attrs; + attrs["use_mkldnn"] = use_mkldnn; + return binary_op("elementwise_add", x, y, out, &attrs); } VarDesc* elementwise_mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr, From e731d53868abcc618eadd3c9a87a9a6e9f7c8370 Mon Sep 17 00:00:00 2001 From: wozna Date: Thu, 10 Jun 2021 13:51:09 +0200 Subject: [PATCH 6/6] Trigger CI