PaddlePaddle · juncaipeng · Mar 24, 2021 · Mar 22, 2021 · Mar 22, 2021 · Mar 22, 2021
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -251,8 +251,8 @@ def __init__(self,
         super(ImperativeQuantizeInputs, self).__init__()
 
         self._quantizable_layer_type = tuple(
-            utils.supported_quant_layers_map[layer]
-            if layer in utils.supported_quant_layers_map else layer
+            utils.quant_input_layers_map[layer]
+            if layer in utils.quant_input_layers_map else layer
             for layer in quantizable_layer_type)
         for layer in self._quantizable_layer_type:
             assert not isinstance(layer, str), \
@@ -324,12 +324,11 @@ def apply(self, model):
             target = name[last_idx:idx]
 
             quant_layer = self._get_quantized_layer(layer)
-            setattr(quant_layer, "layer_name", layer.full_name())
             setattr(obj, target, quant_layer)
 
     def _get_quantized_layer(self, layer):
         quant_layer_name = None
-        for key, value in utils.supported_quant_layers_map.items():
+        for key, value in utils.quant_input_layers_map.items():
             if isinstance(layer, value):
                 quant_layer_name = 'Quantized' + key
                 break
@@ -372,6 +371,9 @@ def apply(self, model):
         """
         assert isinstance(model, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
+
+        # Calculate the target ops's output scale, and don't consider
+        # the skip_quant attr
         for _, layer in model.named_sublayers():
             if self._is_target_layer(layer):
                 self._init_scale_params(layer)
@@ -411,24 +413,21 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
         assert isinstance(layer, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
 
-        # remove handles and collect output scales
+        self._gather_output_scale(layer)
+
         with dygraph.guard():
             layer.eval()
             for handle in self._register_hook_handle_list:
                 handle.remove()
-            for _, sub_layer in layer.named_sublayers():
-                if self._is_target_layer(sub_layer):
-                    if hasattr(sub_layer, "layer_name"):
-                        layer_name = sub_layer.layer_name
-                    else:
-                        layer_name = sub_layer.full_name()
-                    if hasattr(sub_layer, "_quant_out_scale"):
-                        self._out_scale_dict[layer_name] = float(
-                            sub_layer._quant_out_scale)
-
-        # save the quantized model that doesn't have output scales
         paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
 
+        if len(self._out_scale_dict) == 0:
+            warnings.warn("Warning: No Layer of the model while to be " \
+                          "saved contains the out_threshold attribute, so the " \
+                          "generated inference model would not contain the " \
+                          "out_threshold.")
+            return
+
         # load static model
         is_dynamic_mode = False
         if paddle.in_dynamic_mode():
@@ -443,96 +442,103 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
         basename = os.path.basename(path)
         model_filename = basename + INFER_MODEL_SUFFIX
         params_filename = basename + INFER_PARAMS_SUFFIX
-        [inference_program, feed_target_names, fetch_targets] = (
+
+        [infer_program, feed_target_names, fetch_targets] = (
             load_inference_model(
                 dirname=dirname,
                 executor=exe,
                 model_filename=model_filename,
                 params_filename=params_filename))
 
+        # TODO(jc): analyse whether the dygraph model has
+        # several blocks before applying qat
+        assert infer_program.num_blocks == 1, \
+            "Quantization aware training (QAT) requires the program " \
+            "only has a block for now. When the model has if-else or " \
+            "while, the program will have several blocks."
+
         # set output scales to the static model
-        check_behind_op = False
-        op_count = 0
-        ops_list = [key for key, _ in self._out_scale_dict.items()]
-        if len(ops_list) == 0:
-            warnings.warn(
-                "Warning: No Layer of the model while to be saved contains "
-                "the out_threshold attribute, so the generated inference "
-                "model would not contain the out_threshold.")
-        else:
-            # Because the Layer in dygraph may correspond to multiple ops
-            # in static program after being saved. To ensure correctness,
-            # the outscale collected for output of dygraph Layer can only
-            # be set to the last op in the corresponding ops in static program.
-            #
-            # We can judge the execution order of the ops which corresponding
-            # to dygraph Layer by check_behind_op
-            forward_op = None
-            for block in inference_program.blocks:
-                for op in block.ops:
-                    if op.type in utils.op_real_in_out_name:
-                        if op_count > len(ops_list):
-                            warnings.warn(
-                                "The number of Layer which has "
-                                "out_threshold attribute should be bigger than "
-                                "the op in inference model")
-                            break
-                        if check_behind_op:
-                            check_behind_op = False
-                            if op.type == "elementwise_add":
-                                if self._is_op_matched(ops_list[op_count], op,
-                                                       block):
-                                    op._set_attr("out_threshold",
-                                                 self._out_scale_dict[ops_list[
-                                                     op_count]])
-                                    op_count += 1
-                                    forward_op = None
-                                continue
-                            else:
-                                if forward_op is None:
-                                    raise ValueError(
-                                        "forward_op should not be None")
-                                if self._is_op_matched(ops_list[op_count],
-                                                       forward_op, block):
-                                    forward_op._set_attr(
-                                        "out_threshold", self._out_scale_dict[
-                                            ops_list[op_count]])
-                                    op_count += 1
-                                    forward_op = None
-
-                        if op.type in ["conv2d", "depthwise_conv2d", "matmul"]:
-                            check_behind_op = True
-                            forward_op = op
-                            continue
-                        if op_count >= len(ops_list):
-                            warnings.warn(
-                                "The number of Layer which has out_threshold attribute should be bigger than the op in inference model"
-                            )
-                            break
-                        if self._is_op_matched(ops_list[op_count], op, block):
-                            op._set_attr(
-                                "out_threshold",
-                                self._out_scale_dict[ops_list[op_count]])
-                            op_count += 1
-
-            self._set_skip_quant_attr(inference_program)
+        self._save_output_scale(infer_program)
+
+        # process skip quant
+        self._set_skip_quant_attr(infer_program)
 
         # save the final quantized model that has output scales
         save_inference_model(
             dirname=dirname,
             feeded_var_names=feed_target_names,
             target_vars=fetch_targets,
             executor=exe,
-            main_program=inference_program.clone(),
+            main_program=infer_program.clone(),
             model_filename=model_filename,
             params_filename=params_filename)
 
         if is_dynamic_mode:
             paddle.disable_static()
 
+    def _gather_output_scale(self, layer):
+        """
+        Gather all output scales to self._out_scale_dict
+        """
+        with dygraph.guard():
+            layer.eval()
+            for _, sub_layer in layer.named_sublayers():
+                if self._is_target_layer(sub_layer):
+                    layer_name = sub_layer.full_name()
+                    if hasattr(sub_layer, "_quant_out_scale"):
+                        self._out_scale_dict[layer_name] = float(
+                            sub_layer._quant_out_scale)
+
+    def _save_output_scale(self, infer_program):
+        """
+        Save all output scales to the corresponding ops in static
+        inference program.
+
+        Because the Layer in dygraph may correspond to multiple ops
+        in static program after being saved. To ensure correctness,
+        the outscale collected for output of dygraph Layer can only
+        be set to the last op in the corresponding ops in static program.
+        """
+        assert infer_program.num_blocks == 1, \
+            "The inference program should only have a block."
+
+        global_block = infer_program.global_block()
+        target_ops = global_block.ops
+
+        scale_idx = 0
+        op_idx = 0
+        attr_name = "out_threshold"
+
+        for scale_name, scale_value in self._out_scale_dict.items():
+            while True:
+                if op_idx >= len(target_ops):
+                    break
+
+                op = target_ops[op_idx]
+
+                if not self._is_scale_op_matched(scale_name, op, global_block):
+                    op_idx += 1
+                else:
+                    weight_ops = ["conv2d", "depthwise_conv2d", "matmul"]
+                    if op.type in weight_ops and op_idx + 1 < len(target_ops) \
+                        and target_ops[op_idx+1].type == "elementwise_add":
+                        target_ops[op_idx + 1]._set_attr(attr_name, scale_value)
+                        op_idx += 2
+                    else:
+                        op._set_attr(attr_name, scale_value)
+                        op_idx += 1
+                    scale_idx += 1
+                    break
+
+        if scale_idx != len(self._out_scale_dict):
+            _logger.warning("Warning: the model have %s output scales, "\
+                "but it only saves %s output scales." \
+                % (len(self._out_scale_dict), scale_idx))
+
     def _is_target_layer(self, layer):
-        return isinstance(layer, utils.out_scale_layers_list) \
-            or 'quantized_' in layer.full_name()
+        return isinstance(layer, tuple(utils.quant_output_layers_map.values())) \
+            or ('quantized_' in layer.full_name() and \
+                'quantized_noweight' not in layer.full_name())
 
     def _init_scale_params(self, layer, name=None):
         """
@@ -570,27 +576,37 @@ def _create_param(in_layer, first_name, last_name, dtype):
         layer._quant_out_accum = _create_param(layer, name, "accum", dtype)
         layer._quant_out_accum.stop_gradient = True
 
-    # Judge whether the op in program matches the Layer in dynamic model
-    def _is_op_matched(self, layer_name, op, block):
-        output_var_names = quantization_pass._get_op_output_var_names(op)
-        for output_var_name in output_var_names:
-            output_var_tensor = block.var(output_var_name)
-            if output_var_tensor.dtype not in [
-                    core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32
-            ]:
-                return False
-
-        # Because the naming styles of static and dynamic graph are different,
-        # in order to avoid mistakes, we unify the name here.
-        op_type = output_var_names[0].split(".")[0]
-        op_type = op_type.rsplit("_", 1)[0]
-        if op_type == 'depthwise_conv2d':
-            op_type = 'conv2d'
-        if 'prelu' in op_type:
-            op_type = op_type.replace('prelu', 'p_re_lu')
-        if 'relu' in op_type:
-            op_type = op_type.replace('relu', 're_lu')
-        return op_type in layer_name
+    def _is_scale_op_matched(self, scale_name, op, block):
+        """
+        Based on the op name and attrs to judge whether the op in
+        program matches the scale_name. We must know the corresponding
+        name between dgraph and static model.
+        """
+        fp_type = [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]
+        if op.type in quantization_pass._op_real_in_out_name.keys():
+            output_var_names = quantization_pass._get_op_output_var_names(op)
+            for output_var_name in output_var_names:
+                output_var_tensor = block.var(output_var_name)
+                if output_var_tensor.dtype not in fp_type:
+                    return False
+
+        # Note that, the items have priority in corresponding_dict
+        corresponding_dict = {
+            'conv2d_tranpose': [['conv2d_tranpose'], None],
+            'conv2d': [['conv2d', 'depthwise_conv2d'], None],
+            'linear': [['matmul'], None],
+            're_lu6': [['relu6'], None],
+            'p_re_lu': [['prelu'], None],
+            'leaky_re_lu': [['leaky_relu'], None],
+            're_lu': [['relu'], None],
+        }
+
+        for key, value in corresponding_dict.items():
+            if key in scale_name:
+                return (op.type in value[0]) and \
+                    (len(value) == 1 or value[1] is None or value[1](op))
+
+        return op.type in scale_name
 
     def _set_skip_quant_attr(self, program):
         block = program.global_block()

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -30,7 +30,7 @@
     "swish": [["X"], ["Out"]],
 }
 
-supported_quant_layers_map = {
+quant_input_layers_map = {
     'Conv2D': paddle.nn.Conv2D,
     'Linear': paddle.nn.Linear,
     'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
@@ -58,8 +58,28 @@
     "fake_quantize_dequantize_moving_average_abs_max"
 ]
 
-out_scale_layers_list = (
-    paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.MaxPool2D,
-    paddle.nn.BatchNorm, paddle.nn.BatchNorm2D, paddle.nn.SyncBatchNorm,
-    paddle.nn.LeakyReLU, paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6,
-    paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Tanh, paddle.nn.Swish)
+quant_output_layers_map = {
+    'Conv2D': paddle.nn.Conv2D,
+    'Conv2DTranspose': paddle.nn.Conv2DTranspose,
+    'Linear': paddle.nn.Linear,
+    'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
+    'AdaptiveMaxPool2D': paddle.nn.AdaptiveMaxPool2D,
+    'AvgPool2D': paddle.nn.AvgPool2D,
+    'MaxPool2D': paddle.nn.MaxPool2D,
+    'BatchNorm': paddle.nn.BatchNorm,
+    'BatchNorm2D': paddle.nn.BatchNorm2D,
+    'GroupNorm': paddle.nn.GroupNorm,
+    'InstanceNorm2D': paddle.nn.InstanceNorm2D,
+    'LayerNorm': paddle.nn.LayerNorm,
+    'SyncBatchNorm': paddle.nn.SyncBatchNorm,
+    'ELU': paddle.nn.ELU,
+    'GELU': paddle.nn.GELU,
+    'LeakyReLU': paddle.nn.LeakyReLU,
+    'PReLU': paddle.nn.PReLU,
+    'ReLU': paddle.nn.ReLU,
+    'ReLU6': paddle.nn.ReLU6,
+    'Sigmoid': paddle.nn.Sigmoid,
+    'Softmax': paddle.nn.Softmax,
+    'Tanh': paddle.nn.Tanh,
+    'Swish': paddle.nn.Swish,
+}