From 1ad247846327a79c63fa1e02c4e9bb8514f96aaa Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Wed, 2 Jun 2021 13:23:04 +0200
Subject: [PATCH 1/6] Small changes related to BF16 fusion_gru and fusion_lstm

---
 paddle/fluid/framework/ir/CMakeLists.txt      |  2 +
 paddle/fluid/framework/ir/fc_gru_fuse_pass.cc |  9 +-
 .../framework/ir/fc_gru_fuse_pass_tester.cc   | 71 +-------------
 .../framework/ir/fc_gru_fuse_pass_tester.h    | 93 ++++++++++++++++++
 .../fluid/framework/ir/fc_lstm_fuse_pass.cc   |  8 +-
 .../framework/ir/fc_lstm_fuse_pass_tester.cc  | 71 +-------------
 .../framework/ir/fc_lstm_fuse_pass_tester.h   | 96 +++++++++++++++++++
 .../framework/ir/graph_pattern_detector.cc    | 10 +-
 .../mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc  | 70 ++++++++++++++
 .../analyzer_lexical_analysis_gru_tester.cc   |  1 -
 .../fluid/operators/fused/fusion_lstm_op.cc   |  5 +
 .../mkldnn/test_fusion_gru_bf16_mkldnn_op.py  |  6 +-
 .../mkldnn/test_fusion_gru_int8_mkldnn_op.py  |  2 +
 .../mkldnn/test_fusion_lstm_bf16_mkldnn_op.py |  6 +-
 .../mkldnn/test_fusion_lstm_int8_mkldnn_op.py |  2 +
 15 files changed, 304 insertions(+), 148 deletions(-)
 create mode 100644 paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
 create mode 100644 paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
 create mode 100644 paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index fb478bb6e8978e..8202f9ecc6173c 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -188,4 +188,6 @@ endif()
     cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
     cc_test(test_multi_gru_fuse_pass SRCS mkldnn/multi_gru_fuse_pass_tester.cc DEPS multi_gru_fuse_pass)
     cc_test(test_multi_gru_seq_fuse_pass SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc DEPS multi_gru_seq_fuse_pass)
+    set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass mkldnn_placement_pass)
+    cc_test(test_fc_rnn_mkldnn_fuse_pass SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc DEPS ${TEST_FC_RNN_PASS_DEPS})
 endif ()
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index b1c62d40d4d7c7..ad3b8deba42b93 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -48,7 +48,8 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
 
   // Create New OpDesc
   auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
-                         Node* bias, Node* hidden, Node* fc_bias) {
+                         Node* bias, Node* hidden, Node* fc_bias,
+                         const bool& use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_gru");
 
@@ -67,6 +68,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
                     gru->Op()->GetAttrIfExists<bool>("origin_mode"));
     // TODO(TJ): This should be a option for infer
     op_desc.SetAttr("use_seq", true);
+    op_desc.SetAttr("use_mkldnn", use_mkldnn);
     op_desc.SetAttr("activation", gru->Op()->GetAttr("activation"));
     op_desc.SetAttr("gate_activation", gru->Op()->GetAttr("gate_activation"));
 
@@ -149,6 +151,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True.";
       return;
     }
+    const bool use_mkldnn = mul->Op()->GetAttrIfExists<bool>("use_mkldnn");
 
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
@@ -156,14 +159,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
 
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias);
+      gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, elementwise_add, fc_out, mul_out, BatchGate,
            BatchResetHiddenPrev, BatchHidden});
       GraphSafeRemoveNodes(graph, marked_nodes);
     } else {
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr);
+      gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden});
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
index 70351b8aafffa1..6ec47fae26a932 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
@@ -12,77 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
-
-#include <gtest/gtest.h>
-#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h"
 
 namespace paddle {
 namespace framework {
 namespace ir {
 
-void AddVarToScope(Scope* param_scope, const std::string& name,
-                   const DDim& dims) {
-  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
-  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
-}
-
-Scope* CreateParamScope() {
-  auto param_scope = new Scope();
-  AddVarToScope(param_scope, "gru_fc_w", {});
-  AddVarToScope(param_scope, "gru_fc_b", {});
-  AddVarToScope(param_scope, "gru_w", {});
-  AddVarToScope(param_scope, "gru_b", {});
-  AddVarToScope(param_scope, "gru_batch_gate_0", {});
-  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {});
-  AddVarToScope(param_scope, "gru_batch_hidden_0", {});
-  AddVarToScope(param_scope, "gru_hidden_0", {});
-  AddVarToScope(param_scope, "gru_batch_gate_1", {});
-  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {});
-  AddVarToScope(param_scope, "gru_batch_hidden_1", {});
-  AddVarToScope(param_scope, "gru_hidden_1", {});
-  return param_scope;
-}
-
-TEST(FCFusePass, basic) {
-  // inputs                     operator            output
-  // --------------------------------------------------------
-  // (a, gru_fc_w)                mul         ->   fc_0_tmp_0
-  // (fc_0_tmp_0, gru_fc_b)  elementwise_add  ->   fc_0_tmp_1
-  // (fc_0_tmp_1,gru_w,gru_b      gru         ->   gru_out_0
-
-  // (b, gru_fc_w)                mul         ->   fc_1_tmp_0
-  // (fc_1_tmp_0, gru_fc_b)  elementwise_add  ->   fc_1_tmp_1
-  // (fc_1_tmp_1,gru_w,gru_b)     gru         ->   gru_out_1
-  Layers layers;
-  auto* a = layers.data("a");
-  auto* b = layers.data("b");
-  auto* fc_w = layers.data("gru_fc_w", {}, true);
-  auto* fc_b = layers.data("gru_fc_b", {}, true);
-  auto* gru_w = layers.data("gru_w", {}, true);
-  auto* gru_b = layers.data("gru_b", {}, true);
-  auto* fc_0_tmp0 = layers.mul(a, fc_w);
-  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
-  auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false);
-  auto* gru_batch_reset_hidden_prev_0 =
-      layers.data("gru_batch_reset_hidden_prev_0", {}, false);
-  auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false);
-  auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false);
-  layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0,
-             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0);
-
-  auto* fc_1_tmp0 = layers.mul(b, fc_w);
-  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
-  auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false);
-  auto* gru_batch_reset_hidden_prev_1 =
-      layers.data("gru_batch_reset_hidden_prev_1", {}, false);
-  auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false);
-  auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false);
-  layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1,
-             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1);
-
-  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+namespace fc_gru_test {
+TEST(FcGruFusePass, basic) {
+  std::unique_ptr<ir::Graph> graph = PrepareGraph();
   auto pass = PassRegistry::Instance().Get("fc_gru_fuse_pass");
   pass->Set("use_gpu", new bool(true));
   graph->Set("__param_scope__", CreateParamScope());
@@ -109,6 +47,7 @@ TEST(FCFusePass, basic) {
                         "expectations after fuse"));
 }
 
+}  // namespace fc_gru_test
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
new file mode 100644
index 00000000000000..b9288318fcfea6
--- /dev/null
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
@@ -0,0 +1,93 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+namespace fc_gru_test {
+void AddVarToScope(Scope* param_scope, const std::string& name,
+                   const DDim& dims) {
+  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
+  tensor->Resize(dims);
+  tensor->mutable_data<float>(platform::CPUPlace());
+}
+
+Scope* CreateParamScope() {
+  auto param_scope = new Scope();
+  AddVarToScope(param_scope, "gru_fc_w", {});
+  AddVarToScope(param_scope, "gru_fc_b", {});
+  AddVarToScope(param_scope, "gru_w", {});
+  AddVarToScope(param_scope, "gru_b", {});
+  AddVarToScope(param_scope, "gru_batch_gate_0", {});
+  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {});
+  AddVarToScope(param_scope, "gru_batch_hidden_0", {});
+  AddVarToScope(param_scope, "gru_hidden_0", {});
+  AddVarToScope(param_scope, "gru_batch_gate_1", {});
+  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {});
+  AddVarToScope(param_scope, "gru_batch_hidden_1", {});
+  AddVarToScope(param_scope, "gru_hidden_1", {});
+  return param_scope;
+}
+
+std::unique_ptr<ir::Graph> PrepareGraph() {
+  // inputs                     operator            output
+  // --------------------------------------------------------
+  // (a, gru_fc_w)                mul         ->   fc_0_tmp_0
+  // (fc_0_tmp_0, gru_fc_b)  elementwise_add  ->   fc_0_tmp_1
+  // (fc_0_tmp_1,gru_w,gru_b      gru         ->   gru_out_0
+
+  // (b, gru_fc_w)                mul         ->   fc_1_tmp_0
+  // (fc_1_tmp_0, gru_fc_b)  elementwise_add  ->   fc_1_tmp_1
+  // (fc_1_tmp_1,gru_w,gru_b)     gru         ->   gru_out_1
+  Layers layers;
+  auto* a = layers.data("a");
+  auto* b = layers.data("b");
+  auto* fc_w = layers.data("gru_fc_w", {}, true);
+  auto* fc_b = layers.data("gru_fc_b", {}, true);
+  auto* gru_w = layers.data("gru_w", {}, true);
+  auto* gru_b = layers.data("gru_b", {}, true);
+  auto* fc_0_tmp0 = layers.mul(a, fc_w);
+  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
+  auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false);
+  auto* gru_batch_reset_hidden_prev_0 =
+      layers.data("gru_batch_reset_hidden_prev_0", {}, false);
+  auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false);
+  auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false);
+  layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0,
+             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0);
+
+  auto* fc_1_tmp0 = layers.mul(b, fc_w);
+  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
+  auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false);
+  auto* gru_batch_reset_hidden_prev_1 =
+      layers.data("gru_batch_reset_hidden_prev_1", {}, false);
+  auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false);
+  auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false);
+  layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1,
+             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1);
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  return std::move(graph);
+}
+}  // namespace fc_gru_test
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 1c1289124506ab..1823df615665b2 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
   // Create New OpDesc
   auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x,
                           Node* weight_h, Node* bias, Node* hidden, Node* cell,
-                          Node* xx, Node* fc_bias) {
+                          Node* xx, Node* fc_bias, const bool& use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_lstm");
 #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
@@ -88,6 +88,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     op_desc.SetOutput("XX", {xx->Name()});
     op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
     op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
+    op_desc.SetAttr("use_mkldnn", use_mkldnn);
     // TODO(TJ): get from attr
     op_desc.SetAttr("use_seq", true);
 
@@ -148,13 +149,14 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
+    const bool use_mkldnn = mul->Op()->GetAttrIfExists<bool>("use_mkldnn");
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
       lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
-                   fc_bias);
+                   fc_bias, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, lstm, elementwise_add, mul_out, BatchGate, BatchCellPreAct});
@@ -162,7 +164,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     } else {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
       lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
-                   nullptr);
+                   nullptr, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, lstm, BatchGate, BatchCellPreAct});
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
index 0de8d4684fecd4..92de86e52bc0a5 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
@@ -12,77 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
-
-#include <gtest/gtest.h>
-#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h"
 
 namespace paddle {
 namespace framework {
 namespace ir {
 
-void AddVarToScope(Scope* param_scope, const std::string& name,
-                   const DDim& dims) {
-  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
-  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
-}
-
-Scope* CreateParamScope() {
-  auto param_scope = new Scope();
-  AddVarToScope(param_scope, "lstm_fc_w", {});
-  AddVarToScope(param_scope, "lstm_fc_b", {});
-  AddVarToScope(param_scope, "lstm_w", {});
-  AddVarToScope(param_scope, "lstm_b", {});
-  AddVarToScope(param_scope, "lstm_cell_0", {});
-  AddVarToScope(param_scope, "lstm_batch_gate_0", {});
-  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {});
-  AddVarToScope(param_scope, "lstm_hidden_0", {});
-  AddVarToScope(param_scope, "lstm_cell_1", {});
-  AddVarToScope(param_scope, "lstm_batch_gate_1", {});
-  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {});
-  AddVarToScope(param_scope, "lstm_hidden_1", {});
-  return param_scope;
-}
-
-TEST(FCLSTMFusePass, basic) {
-  // inputs                     operator            output
-  // --------------------------------------------------------
-  // (a, lstm_fc_w)                mul         ->   fc_0_tmp_0
-  // (fc_0_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_0_tmp_1
-  // fc_0_tmp_1,lstm_w,lstm_b     lstm         ->   lstm_out_0
-
-  // (b, lstm_fc_w)                mul         ->   fc_1_tmp_0
-  // (fc_1_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_1_tmp_1
-  // (fc_1_tmp_1,lstm_w,lstm_b)   lstm         ->   lstm_out_1
-  Layers layers;
-  auto* a = layers.data("a");
-  auto* b = layers.data("b");
-  auto* fc_w = layers.data("lstm_fc_w", {}, true);
-  auto* fc_b = layers.data("lstm_fc_b", {}, true);
-  auto* lstm_w = layers.data("lstm_w", {}, true);
-  auto* lstm_b = layers.data("lstm_b", {}, true);
-  auto* fc_0_tmp0 = layers.mul(a, fc_w);
-  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
-  auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false);
-  auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false);
-  auto* lstm_batch_cell_pre_gate_0 =
-      layers.data("lstm_batch_cell_pre_gate_0", {}, false);
-  auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false);
-  layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0,
-              lstm_hidden_0, lstm_batch_cell_pre_gate_0);
+namespace fc_lstm_test {
 
-  auto* fc_1_tmp0 = layers.mul(b, fc_w);
-  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
-  auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false);
-  auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false);
-  auto* lstm_batch_cell_pre_gate_1 =
-      layers.data("lstm_batch_cell_pre_gate_1", {}, false);
-  auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false);
-  layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1,
-              lstm_hidden_1, lstm_batch_cell_pre_gate_1);
-
-  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+TEST(FcLstmFusePass, basic) {
+  std::unique_ptr<ir::Graph> graph = PrepareGraph();
   auto pass = PassRegistry::Instance().Get("fc_lstm_fuse_pass");
   pass->Set("use_gpu", new bool(false));
   graph->Set("__param_scope__", CreateParamScope());
@@ -108,7 +47,7 @@ TEST(FCLSTMFusePass, basic) {
                         "The number of fusion_gru nodes does "
                         "not meet expectations after fuse"));
 }
-
+}  // namespace fc_lstm_test
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
new file mode 100644
index 00000000000000..9c1e064e7380bf
--- /dev/null
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
@@ -0,0 +1,96 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+namespace fc_lstm_test {
+
+void AddVarToScope(Scope* param_scope, const std::string& name,
+                   const DDim& dims) {
+  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
+  tensor->Resize(dims);
+  tensor->mutable_data<float>(platform::CPUPlace());
+}
+
+Scope* CreateParamScope() {
+  auto param_scope = new Scope();
+  AddVarToScope(param_scope, "lstm_fc_w", {});
+  AddVarToScope(param_scope, "lstm_fc_b", {});
+  AddVarToScope(param_scope, "lstm_w", {});
+  AddVarToScope(param_scope, "lstm_b", {});
+  AddVarToScope(param_scope, "lstm_cell_0", {});
+  AddVarToScope(param_scope, "lstm_batch_gate_0", {});
+  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {});
+  AddVarToScope(param_scope, "lstm_hidden_0", {});
+  AddVarToScope(param_scope, "lstm_cell_1", {});
+  AddVarToScope(param_scope, "lstm_batch_gate_1", {});
+  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {});
+  AddVarToScope(param_scope, "lstm_hidden_1", {});
+  return param_scope;
+}
+
+std::unique_ptr<ir::Graph> PrepareGraph() {
+  // inputs                     operator            output
+  // --------------------------------------------------------
+  // (a, lstm_fc_w)                mul         ->   fc_0_tmp_0
+  // (fc_0_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_0_tmp_1
+  // fc_0_tmp_1,lstm_w,lstm_b     lstm         ->   lstm_out_0
+
+  // (b, lstm_fc_w)                mul         ->   fc_1_tmp_0
+  // (fc_1_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_1_tmp_1
+  // (fc_1_tmp_1,lstm_w,lstm_b)   lstm         ->   lstm_out_1
+  Layers layers;
+  auto* a = layers.data("a");
+  auto* b = layers.data("b");
+  auto* fc_w = layers.data("lstm_fc_w", {}, true);
+  auto* fc_b = layers.data("lstm_fc_b", {}, true);
+  auto* lstm_w = layers.data("lstm_w", {}, true);
+  auto* lstm_b = layers.data("lstm_b", {}, true);
+  auto* fc_0_tmp0 = layers.mul(a, fc_w);
+  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
+  auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false);
+  auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false);
+  auto* lstm_batch_cell_pre_gate_0 =
+      layers.data("lstm_batch_cell_pre_gate_0", {}, false);
+  auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false);
+  layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0,
+              lstm_hidden_0, lstm_batch_cell_pre_gate_0);
+
+  auto* fc_1_tmp0 = layers.mul(b, fc_w);
+  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
+  auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false);
+  auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false);
+  auto* lstm_batch_cell_pre_gate_1 =
+      layers.data("lstm_batch_cell_pre_gate_1", {}, false);
+  auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false);
+  layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1,
+              lstm_hidden_1, lstm_batch_cell_pre_gate_1);
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  return std::move(graph);
+}
+
+}  // namespace fc_lstm_test
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 064da3d941602e..3476ce8610ee34 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -2262,11 +2262,11 @@ PDNode *patterns::QuantizePlacement::operator()(
 PDNode *patterns::Bfloat16Placement::operator()(
     const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
   std::unordered_set<std::string> supported_op_types =
-      std::unordered_set<std::string>({"concat", "conv2d", "conv2d_transpose",
-                                       "elementwise_add", "elementwise_mul",
-                                       "fc", "fusion_gru", "gelu", "layer_norm",
-                                       "matmul", "pool2d", "relu", "reshape2",
-                                       "softmax", "sum", "transpose2"});
+      std::unordered_set<std::string>(
+          {"concat", "conv2d", "conv2d_transpose", "elementwise_add",
+           "elementwise_mul", "fc", "fusion_gru", "fusion_lstm", "gelu",
+           "layer_norm", "matmul", "pool2d", "relu", "reshape2", "softmax",
+           "sum", "transpose2"});
   if (!bfloat16_enabled_op_types.empty()) {
     supported_op_types = bfloat16_enabled_op_types;
   }
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
new file mode 100644
index 00000000000000..2feb9312a9483a
--- /dev/null
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h"
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h"
+#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void TestFcRNNFusePass(const std::string& pass_name) {
+  std::unique_ptr<ir::Graph> graph =
+      (pass_name == "fc_gru_fuse_pass" ? fc_gru_test::PrepareGraph()
+                                       : fc_lstm_test::PrepareGraph());
+  auto mkldnn_placement_pass_ =
+      PassRegistry::Instance().Get("mkldnn_placement_pass");
+  mkldnn_placement_pass_->Set("mkldnn_enabled_op_types",
+                              new std::unordered_set<std::string>({}));
+  graph->Set("__param_scope__", (pass_name == "fc_gru_fuse_pass"
+                                     ? fc_gru_test::CreateParamScope()
+                                     : fc_lstm_test::CreateParamScope()));
+  graph.reset(mkldnn_placement_pass_->Apply(graph.release()));
+
+  auto check_num_mkldnn_nodes = [&](const std::unique_ptr<ir::Graph>& graph) {
+    int nodes_cout = 0;
+    for (auto* node : graph->Nodes()) {
+      if (node->IsOp()) {
+        auto* op = node->Op();
+        if (op->GetAttrIfExists<bool>("use_mkldnn")) nodes_cout++;
+      }
+    }
+    return nodes_cout;
+  };
+  int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph);
+
+  auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name);
+  graph.reset(fc_rnn_fuse_pass_->Apply(graph.release()));
+  int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph);
+
+  PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before, num_mkldnn_nodes_after,
+                    platform::errors::PreconditionNotMet(
+                        "The number of nodes with \"use_mkldnn\" attr before "
+                        "and after the fuse are not equal"));
+}
+
+TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); }
+
+TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); }
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(mkldnn_placement_pass);
+USE_PASS(fc_gru_fuse_pass);
+USE_PASS(fc_lstm_fuse_pass);
diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
index 024313837e0b63..720c90090cf746 100644
--- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
@@ -38,7 +38,6 @@ void SetAnalysisConfig(AnalysisConfig *cfg,
   cfg->SwitchSpecifyInputNames(false);
   cfg->SetCpuMathLibraryNumThreads(num_threads);
   cfg->EnableMKLDNN();
-  cfg->pass_builder()->AppendPass("mkldnn_placement_pass");
 }
 
 std::vector<size_t> ReadSentenceLod(std::ifstream &file, size_t offset,
diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc
index 6cca6b5a9729a7..42bf784b2af4fb 100644
--- a/paddle/fluid/operators/fused/fusion_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc
@@ -249,6 +249,11 @@ void FusionLSTMOpMaker::Make() {
   AddAttr<bool>("use_mkldnn",
                 "(bool, default false) Only used in mkldnn kernel")
       .SetDefault(false);
+  AddAttr<std::string>(
+      "mkldnn_data_type",
+      "(string, default \"float32\"). Data type of mkldnn kernel")
+      .SetDefault("float32")
+      .InEnum({"float32", "int8", "bfloat16"});
   AddAttr<float>("Scale_data",
                  "Scale to be used for int8 input/output data."
                  "Only used with MKL-DNN INT8.")
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
index 7320efd259f459..fa9a93452dffde 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
@@ -27,7 +27,7 @@
                  "place does not support BF16 evaluation")
 class TestFusionGRUBF16MKLDNNOp(OpTest):
     def set_confs(self):
-        self.mkldnn_data_type = False
+        pass
 
     def test_check_output(self):
         for use_seq in {True, False}:
@@ -48,6 +48,7 @@ def setUp(self):
         self.act_gate = 'sigmoid'
         self.origin_mode = False
         self.use_mkldnn = True
+        self.mkldnn_data_type = "bfloat16"
         self.force_fp32_output = False
         self.weights_dtype = 'fp32'
         self.set_confs()
@@ -113,7 +114,8 @@ def setUp(self):
             'is_reverse': self.is_reverse,
             'origin_mode': self.origin_mode,
             'force_fp32_output': self.force_fp32_output,
-            'use_mkldnn': self.use_mkldnn
+            'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
         }
 
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
index 2d3caf0be97c95..4fda51e9e05f48 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
@@ -35,6 +35,7 @@ def setUp(self):
         self.act_gate = 'sigmoid'
         self.origin_mode = True
         self.use_mkldnn = True
+        self.mkldnn_data_type = "int8"
         self.force_fp32_output = True
         self.error_margin = 1e-5
         self.set_confs()
@@ -115,6 +116,7 @@ def setUp(self):
             'is_reverse': self.is_reverse,
             'origin_mode': self.origin_mode,
             'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
             'force_fp32_output': self.force_fp32_output,
             'Scale_data': scale_data,
             'Shift_data': shift_data,
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
index d65919aa434c38..d07eda3259960c 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
@@ -27,7 +27,7 @@
                  "place does not support BF16 evaluation")
 class TestFusionLSTMBF16ONEDNNOp(OpTest):
     def set_confs(self):
-        self.mkldnn_data_type = False
+        pass
 
     def test_check_output(self):
         for use_seq in {True, False}:
@@ -48,6 +48,7 @@ def setUp(self):
         self.act_cell = 'tanh'
         self.act_cand = 'tanh'
         self.use_mkldnn = True
+        self.mkldnn_data_type = "bfloat16"
         self.force_fp32_output = False
         self.weights_dtype = 'fp32'
         self.set_confs()
@@ -130,7 +131,8 @@ def setUp(self):
             'cell_activation': self.act_cell,
             'candidate_activation': self.act_cand,
             'force_fp32_output': self.force_fp32_output,
-            'use_mkldnn': self.use_mkldnn
+            'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
         }
 
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
index 93dc45f2650f53..12f8c01783d9c3 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
@@ -34,6 +34,7 @@ def setUp(self):
         self.act_cand = 'tanh'
         self.use_peepholes = False  # LSTM u8 doesn't support peepholes
         self.use_mkldnn = True
+        self.mkldnn_data_type = "int8"
         self.force_fp32_output = False
         self.error_margin = 1e-5
         self.set_confs()
@@ -117,6 +118,7 @@ def setUp(self):
             'is_reverse': self.is_reverse,
             'use_peepholes': self.use_peepholes,
             'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
             'force_fp32_output': self.force_fp32_output,
             'Scale_data': scale_data,
             'Shift_data': shift_data,

From ad5c8b063f896103846cdb8f142b3366e09a0854 Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Wed, 2 Jun 2021 14:21:17 +0200
Subject: [PATCH 2/6] Correct to pass arg by value

---
 paddle/fluid/framework/ir/fc_gru_fuse_pass.cc  | 2 +-
 paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index ad3b8deba42b93..a650bb00638066 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -49,7 +49,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
   // Create New OpDesc
   auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
                          Node* bias, Node* hidden, Node* fc_bias,
-                         const bool& use_mkldnn) {
+                         const bool use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_gru");
 
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 1823df615665b2..5c5e6840bd02c9 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
   // Create New OpDesc
   auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x,
                           Node* weight_h, Node* bias, Node* hidden, Node* cell,
-                          Node* xx, Node* fc_bias, const bool& use_mkldnn) {
+                          Node* xx, Node* fc_bias, const bool use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_lstm");
 #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});

From 419d486856c012da28144908935d0fe1e881cfe1 Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Mon, 7 Jun 2021 10:59:26 +0200
Subject: [PATCH 3/6] Add conditions to rnn op

---
 paddle/fluid/framework/ir/fc_gru_fuse_pass.cc  | 5 ++++-
 paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc | 9 ++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index a650bb00638066..eba8399eef4259 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -151,7 +151,10 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True.";
       return;
     }
-    const bool use_mkldnn = mul->Op()->GetAttrIfExists<bool>("use_mkldnn");
+    const bool use_mkldnn =
+        mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+        gru->Op()->GetAttrIfExists<std::string>("activation") == "tahn" &&
+        gru->Op()->GetAttrIfExists<std::string>("gate_activation") == "sigmoid";
 
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 5c5e6840bd02c9..154ca2e45f012e 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -149,7 +149,14 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
-    const bool use_mkldnn = mul->Op()->GetAttrIfExists<bool>("use_mkldnn");
+    const bool use_mkldnn =
+        mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+        lstm->Op()->GetAttrIfExists<std::string>("gate_activation") ==
+            "sigmoid" &&
+        lstm->Op()->GetAttrIfExists<std::string>("cell_activation") == "tahn" &&
+        lstm->Op()->GetAttrIfExists<std::string>("candidate_activation") ==
+            "tahn";
+
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);

From 0c1ba3b4edd84bbad6c1b5585a498008d9836cb5 Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Wed, 9 Jun 2021 15:59:59 +0200
Subject: [PATCH 4/6] Correct the spelling mistake

---
 paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index eba8399eef4259..5345d0cae953f8 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -47,7 +47,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
   gru_pattern(fc_out);
 
   // Create New OpDesc
-  auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
+  auto gru_creator = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
                          Node* bias, Node* hidden, Node* fc_bias,
                          const bool use_mkldnn) {
     OpDesc op_desc;
@@ -162,14 +162,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
 
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn);
+      gru_creator(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, elementwise_add, fc_out, mul_out, BatchGate,
            BatchResetHiddenPrev, BatchHidden});
       GraphSafeRemoveNodes(graph, marked_nodes);
     } else {
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn);
+      gru_creator(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden});

From e552ac31c61dde17f0d39db6c94b05b1e0fb9376 Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Thu, 10 Jun 2021 12:12:54 +0200
Subject: [PATCH 5/6] Improving the test with checking activation

---
 paddle/fluid/framework/ir/fc_gru_fuse_pass.cc |  7 ++--
 .../framework/ir/fc_gru_fuse_pass_tester.h    |  9 +++--
 .../fluid/framework/ir/fc_lstm_fuse_pass.cc   | 13 ++++----
 .../framework/ir/fc_lstm_fuse_pass_tester.h   | 12 ++++---
 .../mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc  | 33 +++++++++++++++----
 .../fluid/framework/ir/pass_tester_helper.h   | 10 ++++--
 6 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index 5345d0cae953f8..921e1ea513961d 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -152,9 +152,10 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       return;
     }
     const bool use_mkldnn =
-        mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
-        gru->Op()->GetAttrIfExists<std::string>("activation") == "tahn" &&
-        gru->Op()->GetAttrIfExists<std::string>("gate_activation") == "sigmoid";
+        (mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+         gru->Op()->GetAttrIfExists<std::string>("activation") == "tanh" &&
+         gru->Op()->GetAttrIfExists<std::string>("gate_activation") ==
+             "sigmoid");
 
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
index b9288318fcfea6..a862755d604e44 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
@@ -47,7 +47,8 @@ Scope* CreateParamScope() {
   return param_scope;
 }
 
-std::unique_ptr<ir::Graph> PrepareGraph() {
+std::unique_ptr<ir::Graph> PrepareGraph(
+    std::string activation = "tanh", std::string gate_activation = "sigmoid") {
   // inputs                     operator            output
   // --------------------------------------------------------
   // (a, gru_fc_w)                mul         ->   fc_0_tmp_0
@@ -72,7 +73,8 @@ std::unique_ptr<ir::Graph> PrepareGraph() {
   auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false);
   auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false);
   layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0,
-             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0);
+             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0,
+             nullptr, false, false, activation, gate_activation);
 
   auto* fc_1_tmp0 = layers.mul(b, fc_w);
   auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
@@ -82,7 +84,8 @@ std::unique_ptr<ir::Graph> PrepareGraph() {
   auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false);
   auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false);
   layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1,
-             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1);
+             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1,
+             nullptr, false, false, activation, gate_activation);
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   return std::move(graph);
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 154ca2e45f012e..6bd956ef0d53c9 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -150,12 +150,13 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
     const bool use_mkldnn =
-        mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
-        lstm->Op()->GetAttrIfExists<std::string>("gate_activation") ==
-            "sigmoid" &&
-        lstm->Op()->GetAttrIfExists<std::string>("cell_activation") == "tahn" &&
-        lstm->Op()->GetAttrIfExists<std::string>("candidate_activation") ==
-            "tahn";
+        (mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+         lstm->Op()->GetAttrIfExists<std::string>("gate_activation") ==
+             "sigmoid" &&
+         lstm->Op()->GetAttrIfExists<std::string>("cell_activation") ==
+             "tanh" &&
+         lstm->Op()->GetAttrIfExists<std::string>("candidate_activation") ==
+             "tanh");
 
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
index 9c1e064e7380bf..f681a2b7ff8eb0 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
@@ -49,7 +49,10 @@ Scope* CreateParamScope() {
   return param_scope;
 }
 
-std::unique_ptr<ir::Graph> PrepareGraph() {
+std::unique_ptr<ir::Graph> PrepareGraph(
+    std::string gate_activation = "sigmoid",
+    std::string cell_activation = "tanh",
+    std::string candidate_activation = "tanh") {
   // inputs                     operator            output
   // --------------------------------------------------------
   // (a, lstm_fc_w)                mul         ->   fc_0_tmp_0
@@ -74,8 +77,8 @@ std::unique_ptr<ir::Graph> PrepareGraph() {
       layers.data("lstm_batch_cell_pre_gate_0", {}, false);
   auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false);
   layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0,
-              lstm_hidden_0, lstm_batch_cell_pre_gate_0);
-
+              lstm_hidden_0, lstm_batch_cell_pre_gate_0, nullptr, nullptr, true,
+              false, gate_activation, cell_activation, candidate_activation);
   auto* fc_1_tmp0 = layers.mul(b, fc_w);
   auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
   auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false);
@@ -84,7 +87,8 @@ std::unique_ptr<ir::Graph> PrepareGraph() {
       layers.data("lstm_batch_cell_pre_gate_1", {}, false);
   auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false);
   layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1,
-              lstm_hidden_1, lstm_batch_cell_pre_gate_1);
+              lstm_hidden_1, lstm_batch_cell_pre_gate_1, nullptr, nullptr, true,
+              false, gate_activation, cell_activation, candidate_activation);
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   return std::move(graph);
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
index 2feb9312a9483a..c4770a322db50c 100644
--- a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
@@ -22,10 +22,15 @@ namespace paddle {
 namespace framework {
 namespace ir {
 
-void TestFcRNNFusePass(const std::string& pass_name) {
+void TestFcRNNFusePass(const std::string& pass_name,
+                       std::string activation = "tanh",
+                       std::string gate_activation = "sigmoid",
+                       std::string candidate_activation = "tanh") {
   std::unique_ptr<ir::Graph> graph =
-      (pass_name == "fc_gru_fuse_pass" ? fc_gru_test::PrepareGraph()
-                                       : fc_lstm_test::PrepareGraph());
+      (pass_name == "fc_gru_fuse_pass"
+           ? fc_gru_test::PrepareGraph(activation, gate_activation)
+           : fc_lstm_test::PrepareGraph(gate_activation, activation,
+                                        candidate_activation));
   auto mkldnn_placement_pass_ =
       PassRegistry::Instance().Get("mkldnn_placement_pass");
   mkldnn_placement_pass_->Set("mkldnn_enabled_op_types",
@@ -46,21 +51,37 @@ void TestFcRNNFusePass(const std::string& pass_name) {
     return nodes_cout;
   };
   int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph);
+  int removed_mkldnn_nodes = 2;
+
+  // OneDNN fusion_gru and fusion_lstm supports only sigmoid as a gate
+  // activation and tanh as an activation and candidate_activation
+  if (activation != "tanh" || gate_activation != "sigmoid" ||
+      candidate_activation != "tanh")
+    removed_mkldnn_nodes += 2;
 
   auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name);
   graph.reset(fc_rnn_fuse_pass_->Apply(graph.release()));
   int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph);
 
-  PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before, num_mkldnn_nodes_after,
+  PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before - removed_mkldnn_nodes,
+                    num_mkldnn_nodes_after,
                     platform::errors::PreconditionNotMet(
-                        "The number of nodes with \"use_mkldnn\" attr before "
-                        "and after the fuse are not equal"));
+                        "The number of nodes with \"use_mkldnn\" attr after "
+                        "passes is not as expected"));
 }
 
 TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); }
 
+TEST(FcGruFusePass, gru_unsupported_activations) {
+  TestFcRNNFusePass("fc_gru_fuse_pass", "relu", "sigmoid");
+}
+
 TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); }
 
+TEST(FcLstmFusePass, lstm_unsupported_activations) {
+  TestFcRNNFusePass("fc_lstm_fuse_pass", "tanh", "relu", "tanh");
+}
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h
index 6b187e538d1c08..70ee714cba457c 100644
--- a/paddle/fluid/framework/ir/pass_tester_helper.h
+++ b/paddle/fluid/framework/ir/pass_tester_helper.h
@@ -194,14 +194,18 @@ struct Layers {
   }
 
   VarDesc* mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr,
-               int x_num_col_dims = 1) {
+               int x_num_col_dims = 1, bool use_mkldnn = false) {
     AttributeMap attrs;
     attrs["x_num_col_dims"] = 1;
+    attrs["use_mkldnn"] = use_mkldnn;
     return binary_op("mul", x, y, out, &attrs);
   }
 
-  VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr) {
-    return binary_op("elementwise_add", x, y, out);
+  VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr,
+                           bool use_mkldnn = false) {
+    AttributeMap attrs;
+    attrs["use_mkldnn"] = use_mkldnn;
+    return binary_op("elementwise_add", x, y, out, &attrs);
   }
 
   VarDesc* elementwise_mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr,

From e731d53868abcc618eadd3c9a87a9a6e9f7c8370 Mon Sep 17 00:00:00 2001
From: wozna <joanna.wozna@intel.com>
Date: Thu, 10 Jun 2021 13:51:09 +0200
Subject: [PATCH 6/6] Trigger CI