From c6870b6f922bb4d3a66af0ec986771fa371cef5a Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <hiteezsf@163.com>
Date: Tue, 31 Aug 2021 06:34:57 +0000
Subject: [PATCH 1/4] add anchor_generator test

---
 .../test_trt_convert_anchor_generator.py      | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
new file mode 100644
index 00000000000000..c48a531b8e6d77
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+from program_config import TensorConfig
+import numpy as np
+import paddle.inference as paddle_infer
+
+
+class TrtConvertDropoutTest(TrtLayerAutoScanTest):
+    def setUp(self):
+        self.ops_config = [{
+            "op_type": "anchor_generator",
+            "op_inputs": {
+                "Input": ["input_data"],
+            },
+            "op_outputs": {
+                "Anchors": ["output_anchors"],
+                "Variances": ["output_variances"]
+            },
+            "op_attrs": {
+                "anchor_sizes": [[64.0, 128.0, 256.0, 512.0]],
+                "aspect_ratios": [[0.5, 1 ,2]],
+                "variances": [[1.0, 1.0, 1.0, 1.0]],
+                "stride": [[16.0, 16.0]],
+                "offset": [0.5]
+            }
+        }]
+        self.batch_size_set = [1, 2, 4]
+
+    def update_program_input_and_weight_with_attr(self, op_attr_list):
+        input_data = TensorConfig(shape=[-1, 3, 64, 64])
+        self.program_weights = {}
+        self.program_inputs = {"input_data": input_data}
+        self.program_outputs = ["output_anchors", "output_variances"]
+
+    def test_check_fp32_output(self):
+        self.trt_param.precision == paddle_infer.PrecisionType.Float32
+        # the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
+        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-5)
+
+    def test_check_fp16_output(self):
+        self.trt_param.precision == paddle_infer.PrecisionType.Half
+        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-2)
+
+    def test_dynamic_shape_fp32_check_output(self):
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
+        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
+        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-5)
+
+    def test_dynamic_shape_fp16_check_output(self):
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
+        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
+        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 892b62c52298478be2b3b178527d1748341f02e9 Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <hiteezsf@163.com>
Date: Tue, 31 Aug 2021 06:47:06 +0000
Subject: [PATCH 2/4] add anchor_generator test

---
 .../framework/ir/map_matmul_to_mul_pass.cc    |  25 ++-
 paddle/scripts/paddle_build.bat               |   1 +
 .../quantization/imperative/fuse_utils.py     | 175 ++++++++++++++++++
 .../slim/quantization/imperative/ptq.py       |  17 +-
 .../slim/tests/imperative_test_utils.py       |  60 ++++++
 .../contrib/slim/tests/test_imperative_ptq.py |  94 +++++++++-
 6 files changed, 353 insertions(+), 19 deletions(-)
 create mode 100644 python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py

diff --git a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
index 613768284735c1..b8666c1c73ee02 100644
--- a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
+++ b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
@@ -264,10 +264,6 @@ void Squeeze2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
     VLOG(4) << "fuse squeeze2+matmul to mul";
-    if (!IsCompat(subgraph, g)) {
-      LOG(WARNING) << "Pass in op compat failed.";
-      return;
-    }
     GET_IR_NODE_FROM_SUBGRAPH(squeeze2_in_x, squeeze2_in_x, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(squeeze2_op, squeeze2_op, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(matmul_in_x, matmul_in_x, fuse_pattern);
@@ -299,6 +295,10 @@ void Squeeze2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
            next_ops[0]->Name() == "elementwise_add";
 
     if (flag) {
+      if (!IsCompat(subgraph, g)) {
+        LOG(WARNING) << "Pass in op compat failed.";
+        return;
+      }
       OpDesc desc;
       desc.SetType("mul");
       desc.SetInput("X", {squeeze2_in_x->Name()});
@@ -403,10 +403,6 @@ void Reshape2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
   int found_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    if (!IsCompat(subgraph, g)) {
-      LOG(WARNING) << "Pass in op compat failed.";
-      return;
-    }
     VLOG(4) << "fuse reshape2+matmul to mul";
     GET_IR_NODE_FROM_SUBGRAPH(reshape2_in_x, reshape2_in_x, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(reshape2_op, reshape2_op, fuse_pattern);
@@ -441,6 +437,10 @@ void Reshape2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
            next_ops[0]->Name() == "elementwise_add";
 
     if (flag) {
+      if (!IsCompat(subgraph, g)) {
+        LOG(WARNING) << "Pass in op compat failed.";
+        return;
+      }
       OpDesc desc;
       desc.SetType("mul");
       desc.SetInput("X", {reshape2_in_x->Name()});
@@ -483,11 +483,6 @@ void Flatten2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
   int found_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    if (!IsCompat(subgraph, g)) {
-      LOG(WARNING) << "Pass in op compat failed.";
-      return;
-    }
-
     VLOG(4) << "fuse flatten2+matmul to mul";
     GET_IR_NODE_FROM_SUBGRAPH(flatten2_in_x, flatten2_in_x, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(flatten2_op, flatten2_op, fuse_pattern);
@@ -527,6 +522,10 @@ void Flatten2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
                     next_ops[0]->Name() == "elementwise_add";
 
     if (pattern_found) {
+      if (!IsCompat(subgraph, g)) {
+        LOG(WARNING) << "Pass in op compat failed.";
+        return;
+      }
       OpDesc desc;
       desc.SetType("mul");
       desc.SetInput("X", {flatten2_in_x->Name()});
diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat
index 75871fe6d36eaa..0ad2d7999603de 100644
--- a/paddle/scripts/paddle_build.bat
+++ b/paddle/scripts/paddle_build.bat
@@ -433,6 +433,7 @@ if %ERRORLEVEL% NEQ 0 (
 echo Build third_party successfully!
 
 set build_times=1
+
 :build_paddle
 :: reset clcache zero stats for collect PR's actual hit rate
 rem clcache.exe -z
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py
new file mode 100644
index 00000000000000..14282df23d3650
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py
@@ -0,0 +1,175 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import paddle
+import paddle.nn as nn
+from . import utils
+
+
+class Identity(nn.Layer):
+    '''a layer to replace bn or relu layers'''
+
+    def __init__(self, *args, **kwargs):
+        super(Identity, self).__init__()
+
+    def forward(self, input):
+        return input
+
+
+def fuse_layers(model, layers_to_fuse, inplace=False):
+    '''
+       fuse layers in layers_to_fuse
+
+       Args:
+           model(paddle.nn.Layer): The model to be fused.
+           layers_to_fuse(list): The layers' names to be fused. For
+               example,"fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]".
+               A TypeError would be raised if "fuse" was set as
+               True but "fuse_list" was None.
+                                 Default: None.
+           inplace(bool): Whether apply fusing to the input model.
+                          Default: False.
+
+       Return
+           fused_model(paddle.nn.Layer): The fused model.
+    '''
+    if inplace == False:
+        model = copy.deepcopy(model)
+    for layers in layers_to_fuse:
+        _fuse_layers(model, layers)
+    return model
+
+
+def _fuse_layers(model, layers_list):
+    '''fuse all the layers in layers_list'''
+    layer_list = []
+    for layer_name in layers_list:
+        parent_layer, sub_name = utils.find_parent_layer_and_sub_name(
+            model, layer_name)
+        layer_list.append(getattr(parent_layer, sub_name))
+    new_layers = _fuse_func(layer_list)
+    for i, item in enumerate(layers_list):
+        parent_layer, sub_name = utils.find_parent_layer_and_sub_name(model,
+                                                                      item)
+        setattr(parent_layer, sub_name, new_layers[i])
+
+
+def _fuse_func(layer_list):
+    '''choose the fuser method and fuse layers'''
+    types = tuple(type(m) for m in layer_list)
+    fusion_method = types_to_fusion_method.get(types, None)
+    new_layers = [None] * len(layer_list)
+    fused_layer = fusion_method(*layer_list)
+    for handle_id, pre_hook_fn in layer_list[0]._forward_pre_hooks.items():
+        fused_layer.register_forward_pre_hook(pre_hook_fn)
+        del layer_list[0]._forward_pre_hooks[handle_id]
+    for handle_id, hook_fn in layer_list[-1]._forward_post_hooks.items():
+        fused_layer.register_forward_post_hook(hook_fn)
+        del layer_list[-1]._forward_post_hooks[handle_id]
+    new_layers[0] = fused_layer
+    for i in range(1, len(layer_list)):
+        identity = Identity()
+        identity.training = layer_list[0].training
+        new_layers[i] = identity
+    return new_layers
+
+
+def _fuse_conv_bn(conv, bn):
+    '''fuse conv and bn for train or eval'''
+    assert(conv.training == bn.training),\
+        "Conv and BN both must be in the same mode (train or eval)."
+    if conv.training:
+        assert bn._num_features == conv._out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d'
+        raise NotImplementedError
+    else:
+        return _fuse_conv_bn_eval(conv, bn)
+
+
+def _fuse_conv_bn_eval(conv, bn):
+    '''fuse conv and bn for eval'''
+    assert (not (conv.training or bn.training)), "Fusion only for eval!"
+    fused_conv = copy.deepcopy(conv)
+
+    fused_weight, fused_bias = _fuse_conv_bn_weights(
+        fused_conv.weight, fused_conv.bias, bn._mean, bn._variance, bn._epsilon,
+        bn.weight, bn.bias)
+    fused_conv.weight.set_value(fused_weight)
+    if fused_conv.bias is None:
+        fused_conv.bias = paddle.create_parameter(
+            shape=[fused_conv._out_channels], is_bias=True, dtype=bn.bias.dtype)
+    fused_conv.bias.set_value(fused_bias)
+    return fused_conv
+
+
+def _fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b):
+    '''fuse weights and bias of conv and bn'''
+    if conv_b is None:
+        conv_b = paddle.zeros_like(bn_rm)
+    if bn_w is None:
+        bn_w = paddle.ones_like(bn_rm)
+    if bn_b is None:
+        bn_b = paddle.zeros_like(bn_rm)
+    bn_var_rsqrt = paddle.rsqrt(bn_rv + bn_eps)
+    conv_w = conv_w * \
+        (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1))
+    conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b
+    return conv_w, conv_b
+
+
+def _fuse_linear_bn(linear, bn):
+    '''fuse linear and bn'''
+    assert (linear.training == bn.training),\
+        "Linear and BN both must be in the same mode (train or eval)."
+    if linear.training:
+        assert bn._num_features == linear.weight.shape[
+            1], 'Output channel of Linear must match num_features of BatchNorm'
+        raise NotImplementedError
+    else:
+        return _fuse_linear_bn_eval(linear, bn)
+
+
+def _fuse_linear_bn_eval(linear, bn):
+    '''fuse linear and bn for eval'''
+    assert (not (linear.training or bn.training)), "Fusion only for eval!"
+    fused_linear = copy.deepcopy(linear)
+
+    fused_weight, fused_bias = _fuse_linear_bn_weights(
+        fused_linear.weight, fused_linear.bias, bn._mean, bn._variance,
+        bn._epsilon, bn.weight, bn.bias)
+    fused_linear.weight.set_value(fused_weight)
+    if fused_linear.bias is None:
+        fused_linear.bias = paddle.create_parameter(
+            shape=[fused_linear.weight.shape[1]],
+            is_bias=True,
+            dtype=bn.bias.dtype)
+    fused_linear.bias.set_value(fused_bias)
+    return fused_linear
+
+
+def _fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w,
+                            bn_b):
+    '''fuse weights and bias of linear and bn'''
+    if linear_b is None:
+        linear_b = paddle.zeros_like(bn_rm)
+    bn_scale = bn_w * paddle.rsqrt(bn_rv + bn_eps)
+    fused_w = linear_w * bn_scale.unsqueeze(-1)
+    fused_b = (linear_b - bn_rm) * bn_scale + bn_b
+    return fused_w, fused_b
+
+
+types_to_fusion_method = {
+    (nn.Conv2D, nn.BatchNorm2D): _fuse_conv_bn,
+    (nn.Linear, nn.BatchNorm1D): _fuse_linear_bn,
+}
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
index 3a536ab1d20376..64d9cd321016c5 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
@@ -22,6 +22,7 @@
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 
+from . import fuse_utils
 from . import utils
 from . import ptq_hooks
 from . import ptq_config
@@ -55,7 +56,7 @@ def __init__(self, quant_config=ptq_config.default_ptq_config):
 
         self._quant_config = quant_config
 
-    def quantize(self, model, inplace=False):
+    def quantize(self, model, inplace=False, fuse=False, fuse_list=None):
         """
         Add quant config and hook to the target layer.
 
@@ -63,15 +64,23 @@ def quantize(self, model, inplace=False):
             model(paddle.nn.Layer): The model to be quantized.
             inplace(bool): Whether apply quantization to the input model.
                            Default: False.
-        Returns:
+            fuse(bool): Whether to fuse layers.
+                        Default: False.
+            fuse_list(list): The layers' names to be fused. For example,
+                "fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]".
+                A TypeError would be raised if "fuse" was set as
+                True but "fuse_list" was None.
+                Default: None.
+        Return
             quantized_model(paddle.nn.Layer): The quantized model.
         """
         assert isinstance(model, paddle.nn.Layer), \
             "The model must be the instance of paddle.nn.Layer."
-
         if not inplace:
             model = copy.deepcopy(model)
-
+        if fuse:
+            model.eval()
+            model = fuse_utils.fuse_layers(model, fuse_list)
         for name, layer in model.named_sublayers():
             if PTQRegistry.is_supported_layer(layer) \
                 and utils.is_leaf_layer(layer) \
diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
index 5c91f01d0bdda4..466cc14eae0984 100644
--- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
+++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
@@ -20,6 +20,7 @@
 from paddle.fluid.dygraph.container import Sequential
 from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
 from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
+from paddle.nn import BatchNorm1D
 
 from paddle.fluid.log_helper import get_logger
 
@@ -43,6 +44,15 @@ def fix_model_dict(model):
     return model
 
 
+def pre_hook(layer, input):
+    input_return = (input[0] * 2)
+    return input_return
+
+
+def post_hook(layer, input, output):
+    return output * 2
+
+
 def train_lenet(lenet, reader, optimizer):
     loss_list = []
     lenet.train()
@@ -224,3 +234,53 @@ def forward(self, inputs):
         x = self.softmax_0(x)
 
         return x
+
+
+class ImperativeLinearBn(fluid.dygraph.Layer):
+    def __init__(self):
+        super(ImperativeLinearBn, self).__init__()
+
+        fc_w_attr = paddle.ParamAttr(
+            name="fc_weight",
+            initializer=paddle.nn.initializer.Constant(value=0.5))
+        fc_b_attr = paddle.ParamAttr(
+            name="fc_bias",
+            initializer=paddle.nn.initializer.Constant(value=1.0))
+        bn_w_attr = paddle.ParamAttr(
+            name="bn_weight",
+            initializer=paddle.nn.initializer.Constant(value=0.5))
+
+        self.linear = Linear(
+            in_features=10,
+            out_features=10,
+            weight_attr=fc_w_attr,
+            bias_attr=fc_b_attr)
+        self.bn = BatchNorm1D(10, weight_attr=bn_w_attr)
+
+    def forward(self, inputs):
+        x = self.linear(inputs)
+        x = self.bn(x)
+
+        return x
+
+
+class ImperativeLinearBn_hook(fluid.dygraph.Layer):
+    def __init__(self):
+        super(ImperativeLinearBn_hook, self).__init__()
+
+        fc_w_attr = paddle.ParamAttr(
+            name="linear_weight",
+            initializer=paddle.nn.initializer.Constant(value=0.5))
+
+        self.linear = Linear(
+            in_features=10, out_features=10, weight_attr=fc_w_attr)
+        self.bn = BatchNorm1D(10)
+
+        forward_pre = self.linear.register_forward_pre_hook(pre_hook)
+        forward_post = self.bn.register_forward_post_hook(post_hook)
+
+    def forward(self, inputs):
+        x = self.linear(inputs)
+        x = self.bn(x)
+
+        return x
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
index 575a91642a7e76..fb92b12cb0d870 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
@@ -23,18 +23,48 @@
 import copy
 import logging
 
+import paddle.nn as nn
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.contrib.slim.quantization import *
 from paddle.fluid.log_helper import get_logger
 from paddle.dataset.common import download
 
-from imperative_test_utils import fix_model_dict, ImperativeLenet
+from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn
+from imperative_test_utils import ImperativeLinearBn_hook
 
 _logger = get_logger(
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
+class TestFuseLinearBn(unittest.TestCase):
+    """
+    Fuse the linear and bn layers, and then quantize the model.
+    """
+
+    def test_fuse(self):
+        model = ImperativeLinearBn()
+        model_h = ImperativeLinearBn_hook()
+        inputs = paddle.randn((3, 10), dtype="float32")
+        config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer())
+        ptq = ImperativePTQ(config)
+        f_l = [['linear', 'bn']]
+        quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l)
+        quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l)
+        for name, layer in quant_model.named_sublayers():
+            if name in f_l:
+                assert not (isinstance(layer, nn.BatchNorm1D) or
+                            isinstance(layer, nn.BatchNorm2D))
+        out = model(inputs)
+        out_h = model_h(inputs)
+        out_quant = quant_model(inputs)
+        out_quant_h = quant_h(inputs)
+        cos_sim_func = nn.CosineSimilarity(axis=0)
+        print('fuse linear+bn',
+              cos_sim_func(out.flatten(), out_quant.flatten()))
+        print(cos_sim_func(out_h.flatten(), out_quant_h.flatten()))
+
+
 class TestImperativePTQ(unittest.TestCase):
     """
     """
@@ -177,7 +207,6 @@ def test_ptq(self):
         model = ImperativeLenet()
         model_state_dict = paddle.load(params_path)
         model.set_state_dict(model_state_dict)
-
         # Quantize, calibrate and save
         quant_model = self.ptq.quantize(model)
         before_acc_top1 = self.model_test(quant_model, self.batch_num,
@@ -216,6 +245,67 @@ def test_ptq(self):
         print("total time: %ss \n" % (end_time - start_time))
 
 
+class TestImperativePTQfuse(TestImperativePTQ):
+    def test_ptq(self):
+        start_time = time.time()
+
+        self.set_vars()
+
+        # Load model
+        params_path = self.download_model(self.lenet_url, self.lenet_md5,
+                                          "lenet")
+        params_path += "/lenet_pretrained/lenet.pdparams"
+
+        model = ImperativeLenet()
+        model_state_dict = paddle.load(params_path)
+        model.set_state_dict(model_state_dict)
+        # Quantize, calibrate and save
+        f_l = [['features.0', 'features.1'], ['features.4', 'features.5']]
+        quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l)
+        for name, layer in quant_model.named_sublayers():
+            if name in f_l:
+                assert not (isinstance(layer, nn.BatchNorm1D) or
+                            isinstance(layer, nn.BatchNorm2D))
+        before_acc_top1 = self.model_test(quant_model, self.batch_num,
+                                          self.batch_size)
+
+        input_spec = [
+            paddle.static.InputSpec(
+                shape=[None, 1, 28, 28], dtype='float32')
+        ]
+        self.ptq.save_quantized_model(
+            model=quant_model, path=self.save_path, input_spec=input_spec)
+        print('Quantized model saved in {%s}' % self.save_path)
+
+        after_acc_top1 = self.model_test(quant_model, self.batch_num,
+                                         self.batch_size)
+
+        paddle.enable_static()
+        infer_acc_top1 = self.program_test(self.save_path, self.batch_num,
+                                           self.batch_size)
+        paddle.disable_static()
+
+        # Check
+        print('Before converted acc_top1: %s' % before_acc_top1)
+        print('After converted acc_top1: %s' % after_acc_top1)
+        print('Infer acc_top1: %s' % infer_acc_top1)
+
+        #Check whether the quant_model is correct after converting.
+        #The acc of quantized model should be higher than 0.95.
+        self.assertTrue(
+            after_acc_top1 >= self.eval_acc_top1,
+            msg="The test acc {%f} is less than {%f}." %
+            (after_acc_top1, self.eval_acc_top1))
+        #Check the saved infer_model.The acc of infer model 
+        #should not be lower than the one of dygraph model.
+        self.assertTrue(
+            infer_acc_top1 >= after_acc_top1,
+            msg='The acc is lower after converting model.')
+
+        end_time = time.time()
+        print("total time: %ss \n" % (end_time - start_time))
+
+
 class TestImperativePTQHist(TestImperativePTQ):
     def set_vars(self):
         config = PTQConfig(HistQuantizer(), AbsmaxQuantizer())

From 324cf9945fc07bed20dc78945759f2825973aee0 Mon Sep 17 00:00:00 2001
From: JZZ-NOTE <jzz2446071400@163.com>
Date: Mon, 13 Sep 2021 11:52:56 +0000
Subject: [PATCH 3/4] add clip convert test

---
 paddle/fluid/inference/tensorrt/op_teller.cc  |  24 +++
 .../ir/inference/test_trt_convert_clip.py     | 154 ++++++++++++++++++
 2 files changed, 178 insertions(+)
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 9ece7b39c99aa0..dc0ec4a547d3e7 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -770,6 +770,30 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
       if (!with_dynamic_shape && shape[0] == -1) return false;
     }
 
+    if (op_type == "clip") {
+      // Paddle-TRT does not support the input tensors: Min and Max
+      auto clip_inputs = desc.Inputs();
+      if (clip_inputs.find("Min") != clip_inputs.end()) {
+        if (desc.Input("Min").size() >= 1) {
+          return false;
+        }
+      }
+      if (clip_inputs.find("Max") != clip_inputs.end()) {
+        if (desc.Input("Max").size() >= 1) {
+          return false;
+        }
+      }
+
+      auto* block = desc.Block();
+      auto x_var_name = desc.Input("X")[0];
+      auto* x_var_desc = block->FindVar(x_var_name);
+      const auto x_shape = x_var_desc->GetShape();
+      if (x_shape.size() == 1) {
+        VLOG(3) << "clip op does not support input's dim is 1 in tensorrt.";
+        return false;
+      }
+    }
+
     if (op_type == "reduce_sum" || op_type == "reduce_mean") {
       if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
             desc.HasAttr("reduce_all"))) {
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py
new file mode 100644
index 00000000000000..95b4fb83d5bfde
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+
+
+class TrtConvertClipTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
+            if dims == 1:
+                return np.ones([64]).astype(np.float32)
+            elif dims == 2:
+                return np.ones([3, 64]).astype(np.float32)
+            elif dims == 3:
+                return np.ones([3, 64, 64]).astype(np.float32)
+            else:
+                return np.ones([batch, 3, 64, 64]).astype(np.float32)
+
+        def generate_weight1(attrs: List[Dict[str, Any]]):
+            return np.array([np.random.uniform(1, 10)]).astype("float32")
+
+        def generate_weight2(attrs: List[Dict[str, Any]]):
+            return np.array([np.random.uniform(10, 20)]).astype("float32")
+
+        for dims in [1, 2, 3, 4]:
+            for batch in [1, 2, 4]:
+                for op_inputs in [{
+                        "X": ["input_data"]
+                }, {
+                        "X": ["input_data"],
+                        "Min": ["Min_"],
+                        "Max": ["Max_"]
+                }]:
+                    self.input_num = len(op_inputs)
+                    self.dims = dims
+                    dics = [{
+                        "min": np.random.uniform(1, 10),
+                        "max": np.random.uniform(10, 20)
+                    }, {
+                        "op_inputs": op_inputs
+                    }]
+                    ops_config = [{
+                        "op_type": "clip",
+                        "op_inputs": op_inputs,
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    }]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={
+                            "Min_": TensorConfig(data_gen=partial(
+                                generate_weight1, dics)),
+                            "Max_": TensorConfig(data_gen=partial(
+                                generate_weight2, dics))
+                        },
+                        inputs={
+                            "input_data": TensorConfig(data_gen=partial(
+                                generate_input1, dims, batch, dics))
+                        },
+                        outputs=["output_data"])
+
+                    yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [128]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [64]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 32]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 64]}
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 32, 32]}
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [10, 64, 64]
+                }
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 64, 64]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 32, 32]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 3, 64, 64]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if self.input_num == 3 or self.dims == 1:
+                return 0, 3
+            else:
+                return 1, 2
+
+        attrs = [
+            program_config.ops[i].attrs
+            for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-5
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()

From cc3b4b3c56b67335f9a76ebb7419a6a64ebf07f3 Mon Sep 17 00:00:00 2001
From: JZZ-NOTE <jzz2446071400@163.com>
Date: Tue, 14 Sep 2021 07:12:42 +0000
Subject: [PATCH 4/4] delete wrong file

---
 .../test_trt_convert_anchor_generator.py      | 73 -------------------
 1 file changed, 73 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
deleted file mode 100644
index c48a531b8e6d77..00000000000000
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from trt_layer_auto_scan_test import TrtLayerAutoScanTest
-from program_config import TensorConfig
-import numpy as np
-import paddle.inference as paddle_infer
-
-
-class TrtConvertDropoutTest(TrtLayerAutoScanTest):
-    def setUp(self):
-        self.ops_config = [{
-            "op_type": "anchor_generator",
-            "op_inputs": {
-                "Input": ["input_data"],
-            },
-            "op_outputs": {
-                "Anchors": ["output_anchors"],
-                "Variances": ["output_variances"]
-            },
-            "op_attrs": {
-                "anchor_sizes": [[64.0, 128.0, 256.0, 512.0]],
-                "aspect_ratios": [[0.5, 1 ,2]],
-                "variances": [[1.0, 1.0, 1.0, 1.0]],
-                "stride": [[16.0, 16.0]],
-                "offset": [0.5]
-            }
-        }]
-        self.batch_size_set = [1, 2, 4]
-
-    def update_program_input_and_weight_with_attr(self, op_attr_list):
-        input_data = TensorConfig(shape=[-1, 3, 64, 64])
-        self.program_weights = {}
-        self.program_inputs = {"input_data": input_data}
-        self.program_outputs = ["output_anchors", "output_variances"]
-
-    def test_check_fp32_output(self):
-        self.trt_param.precision == paddle_infer.PrecisionType.Float32
-        # the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
-        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-5)
-
-    def test_check_fp16_output(self):
-        self.trt_param.precision == paddle_infer.PrecisionType.Half
-        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-2)
-
-    def test_dynamic_shape_fp32_check_output(self):
-        self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
-        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
-        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-5)
-
-    def test_dynamic_shape_fp16_check_output(self):
-        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
-        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
-        self.run_test(trt_engine_num=1, paddle_op_num=3, threshold=1e-2)
-
-
-if __name__ == "__main__":
-    unittest.main()