PaddlePaddle · zyfncg · Feb 6, 2023 · Dec 16, 2022 · Dec 19, 2022 · Dec 21, 2022
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -59,9 +59,6 @@ void SetOp(ProgramDesc* prog,
       op->SetAttr("fuse_residual_connection", false);
     }
     op->SetOutput("Output", {outputs[0]});
-    op->SetAttr("Scale_in", 1.0f);
-    op->SetAttr("Scale_out", 1.0f);
-    op->SetAttr("Scale_weights", std::vector<float>{1.0f});
   } else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
              type == "nearest_interp" || type == "nearest_interp_v2") {
     op->SetInput("X", {inputs[0]});

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
@@ -354,7 +354,9 @@ void CPUQuantizeSquashPass::OpDequantSquash(Graph* graph) const {
           FindOutputNameByVarName(any_op->Op(), dequant_in->Name());
 
       if (output_name.empty()) return;
-
+      if (any_op->Op()->Type() == "conv2d") {
+        any_op->Op()->SetType("fused_conv2d");
+      }
       any_op->Op()->SetAttr("force_fp32_output", true);
       any_op->Op()->SetOutput(output_name,
                               std::vector<std::string>({dequant_out->Name()}));

diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
@@ -411,6 +411,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
   std::string input_name = "";
   if (quantized_op_type == "conv2d" ||
       quantized_op_type == "depthwise_conv2d" ||
+      quantized_op_type == "fused_conv2d" ||
       quantized_op_type == "conv2d_fusion" ||
       quantized_op_type == "conv2d_transpose") {
     weight_name = "Filter";
@@ -424,9 +425,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
     input_name = "Input";
   } else {
     PADDLE_THROW(platform::errors::Unimplemented(
-        "QuantDequantFuse: We only support conv2d, conv2d_fusion, "
-        "conv2d_transpose, fc, mul, matmul, matmul_v2 for "
-        "now."));
+        "QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
+        "conv2d_transpose, fc, mul, matmul, matmul_v2 for now, but received: "
+        "%s.",
+        quantized_op_type));
   }
   const std::string pattern_name = "dequant_fuse";
   GraphPatternDetector gpd;
@@ -559,6 +561,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
           }
         }
       } else if (quantized_op_type == "conv2d" ||
+                 quantized_op_type == "fusd_conv2d" ||
                  quantized_op_type == "depthwise_conv2d") {
         PADDLE_ENFORCE_EQ(
             dequant_type,
@@ -642,6 +645,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
     new_op_desc.SetType(quantized_op_type);
     new_op_desc.SetAttr("enable_int8", true);
     if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
+        quantized_op_type == "fused_conv2d" ||
         quantized_op_type == "depthwise_conv2d" ||
         quantized_op_type == "conv2d_transpose") {
       new_op_desc.SetInput("Input", {new_input});
@@ -677,6 +681,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
       "fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};
   std::unordered_set<std::string> quantized_op_types = {
       "conv2d",
+      "fused_conv2d",
       "mul",
       "matmul",
       "depthwise_conv2d",

diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
@@ -671,6 +671,11 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
   if (extra_attr_iter != extra_attr_map.end()) {
     is_runtime_attr = true;
     attrs_ptr = &(this->runtime_attrs_);
+    // When an attribute is found in both attrs and runtime_attrs, it must
+    // be a runtime attribute, so it's value in attrs should be removed.
+    if (this->attrs_.find(name) != this->attrs_.end()) {
+      this->attrs_.erase(name);
+    }
   }
   // NOTICE(minqiyang): pybind11 will take the empty list in python as
   // the std::vector<int> type in C++; so we have to change the attr's type

diff --git a/paddle/fluid/operators/compat/conv2d.pbtxt b/paddle/fluid/operators/compat/conv2d.pbtxt
@@ -6,12 +6,6 @@ def {
   inputs {
     name: "Filter"
   }
-  inputs {
-    name: "Bias"
-  }
-  inputs {
-    name: "ResidualData"
-  }
   outputs {
     name: "Output"
   }
@@ -69,54 +63,10 @@ extra {
     name: "skip_quant"
     type: BOOLEAN
   }
-  attrs {
-    name: "fuse_relu_before_depthwise_conv"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_relu"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_activation"
-    type: STRING
-  }
-  attrs {
-    name: "fuse_alpha"
-    type: FLOAT
-  }
-  attrs {
-    name: "fuse_beta"
-    type: FLOAT
-  }
   attrs {
     name: "use_addto"
     type: BOOLEAN
   }
-  attrs {
-    name: "fuse_residual_connection"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "Scale_in"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_out"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_in_eltwise"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_weights"
-    type: FLOATS
-  }
-  attrs {
-    name: "force_fp32_output"
-    type: BOOLEAN
-  }
   attrs {
     name: "workspace_size_MB"
     type: INT

diff --git a/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt b/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt
@@ -6,12 +6,6 @@ def {
   inputs {
     name: "Filter"
   }
-  inputs {
-    name: "Bias"
-  }
-  inputs {
-    name: "ResidualData"
-  }
   outputs {
     name: "Output"
   }
@@ -65,50 +59,10 @@ extra {
     name: "fuse_relu_before_depthwise_conv"
     type: BOOLEAN
   }
-  attrs {
-    name: "fuse_relu"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_activation"
-    type: STRING
-  }
-  attrs {
-    name: "fuse_alpha"
-    type: FLOAT
-  }
-  attrs {
-    name: "fuse_beta"
-    type: FLOAT
-  }
   attrs {
     name: "use_addto"
     type: BOOLEAN
   }
-  attrs {
-    name: "fuse_residual_connection"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "Scale_in"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_out"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_in_eltwise"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_weights"
-    type: FLOATS
-  }
-  attrs {
-    name: "force_fp32_output"
-    type: BOOLEAN
-  }
   attrs {
     name: "workspace_size_MB"
     type: INT

diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc
@@ -251,18 +251,6 @@ void Conv2DOpMaker::Make() {
            "H is the height of the filter, and W is the width of the filter. "
            "If the groups attribute is greater than 1, C equals the number of "
            "input image channels divided by the groups.");
-  AddInput("Bias",
-           "(Tensor) Bias to be added to each output of filter application."
-           "The format of output tensor is X (one-dimensional) of size equal"
-           "to the number of output channels. Only used with MKL-DNN.")
-      .AsDispensable()
-      .AsExtra();
-  AddInput("ResidualData",
-           "(Tensor) Tensor with residual data "
-           "to which convolution output will be added."
-           "Used with fuse_residual_connection fusion.")
-      .AsDispensable()
-      .AsExtra();
   AddOutput("Output",
             "(Tensor) The output tensor of convolution operator. "
             "It has same data fromat and data type as the Input.");

diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc
@@ -32,6 +32,16 @@ namespace operators {
 class Conv2DFusionOpMaker : public Conv2DOpMaker {
  protected:
   void Apply() override {
+    AddInput("Bias",
+             "(Tensor) Bias to be added to each output of filter application."
+             "The format of output tensor is X (one-dimensional) of size equal"
+             "to the number of output channels. Only used with MKL-DNN.")
+        .AsDispensable();
+    AddInput("ResidualData",
+             "(Tensor) Tensor with residual data "
+             "to which convolution output will be added."
+             "Used with fuse_residual_connection fusion.")
+        .AsDispensable();
     AddAttr<std::string>(
         "activation",
         "The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' "

diff --git a/paddle/fluid/operators/fused/fused_conv2d_op.cc b/paddle/fluid/operators/fused/fused_conv2d_op.cc
@@ -23,6 +23,16 @@ namespace operators {
 class FusedConvOpMaker : public Conv2DOpMaker {
  protected:
   void Apply() override {
+    AddInput("Bias",
+             "(Tensor) Bias to be added to each output of filter application."
+             "The format of output tensor is X (one-dimensional) of size equal"
+             "to the number of output channels. Only used with MKL-DNN.")
+        .AsDispensable();
+    AddInput("ResidualData",
+             "(Tensor) Tensor with residual data "
+             "to which convolution output will be added."
+             "Used with fuse_residual_connection fusion.")
+        .AsDispensable();
     AddAttr<std::string>(
         "mkldnn_data_type",
         "(string, default \"float32\"). Data type of mkldnn kernel")

diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h
@@ -224,9 +224,7 @@ class ExtraInfoUtils {
 
   // TODO(chenweihang): move these extra inputs into op_compat.yaml
   std::unordered_map<std::string, std::vector<std::string>>
-      g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
-                                  {"conv2d_transpose", {"Bias"}},
-                                  {"conv2d_grad", {"Bias"}},
+      g_extra_input_names_map_ = {{"conv2d_transpose", {"Bias"}},
                                   {"matmul_v2", {"ResidualData"}}};
   std::vector<std::string> empty_extra_input_names_;
 };

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -210,11 +210,8 @@
 - op : conv2d
   backward : conv2d_grad
   extra :
-    attrs : [bool is_test = false, bool use_cudnn = true, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false,
-             bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
-             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
-             bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
-             float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
+    attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false,
+             str mkldnn_data_type = "float32", bool force_fp32_output = false,
              int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 
 - op : conv2d_fusion
@@ -556,6 +553,11 @@
   extra :
     attrs : [bool use_mkldnn = false]
 
+- op : fused_conv2d
+  extra :
+    attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
+             float Scale_out = 1.0f, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}']
+
 - op : gather
   backward : gather_grad
   extra :

diff --git a/paddle/phi/kernels/onednn/conv_kernel.cc b/paddle/phi/kernels/onednn/conv_kernel.cc
@@ -41,29 +41,16 @@ void ConvKernel(const Context& dev_ctx,
                              dev_ctx.GetDnnAttr("mkldnn_data_type")) ==
                 "bfloat16"
           : false;
-  const auto* bias =
-      dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr;
-  const auto* residual_param = dev_ctx.HasDnnInput("ResidualData")
-                                   ? dev_ctx.GetDnnInput("ResidualData")
-                                   : nullptr;
-  bool fuse_residual_conn =
-      dev_ctx.HasDnnAttr("fuse_residual_connection")
-          ? PADDLE_GET_CONST(bool,
-                             dev_ctx.GetDnnAttr("fuse_residual_connection"))
-          : false;
-  const std::string& fuse_activation =
-      dev_ctx.HasDnnAttr("fuse_activation")
-          ? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
-          : "";
   bool force_fp32_output =
       dev_ctx.HasDnnAttr("force_fp32_output")
           ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
           : false;
+
   ConvOnednn<T>(dev_ctx,
                 &input,
                 &filter,
-                bias,
-                residual_param,
+                nullptr,
+                nullptr,
                 strides,
                 paddings,
                 padding_algorithm,
@@ -72,8 +59,8 @@ void ConvKernel(const Context& dev_ctx,
                 data_format,
                 is_test,
                 is_BFLOAT16,
-                fuse_activation,
-                fuse_residual_conn,
+                "",
+                false,
                 force_fp32_output,
                 out);
 }

diff --git a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py
@@ -164,7 +164,7 @@ def _transform_to_conv_mkldnn(self, graph, op_node):
         }
 
         conv_op_node = graph.create_op_node(
-            op_type='conv2d',
+            op_type='fused_conv2d',
             attrs=attrs,
             inputs={'Input': input_var_node, 'Filter': weight_var_node},
             outputs={'Output': output_var_node},

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
@@ -104,6 +104,7 @@ def setUp(self):
         }
 
         if self.fuse_residual:
+            self.op_type = "fused_conv2d"
             self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
                 convert_float_to_uint16(self.input_residual)
             )

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
@@ -158,6 +158,9 @@ def residual_helper(init_low, init_high, output_):
                 input_residual
             )
 
+        if self.fuse_activation != "" or self.fuse_residual:
+            self.op_type = "fused_conv2d"
+
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
@@ -341,6 +344,7 @@ def init_group(self):
 
 
 def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
+    self.op_type = "fused_conv2d"
     self.srctype = input_dt
     self.dsttype = np.uint8 if fuse_activation == "relu" else np.int8
 

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
@@ -99,6 +99,13 @@ def setUp(self):
             output = np.minimum(np.maximum(output, 0), self.fuse_alpha).astype(
                 self.dsttype
             )
+        if (
+            self.fuse_activation != ""
+            or self.fuse_bias
+            or self.fuse_residual_connection
+        ):
+            self.op_type = 'fused_conv2d'
+
         output = output.astype(self.dtype)
 
         self.attrs['fuse_bias'] = self.fuse_bias