From 7ab63d8b4da99087d55a8b7a23e28b99e853c619 Mon Sep 17 00:00:00 2001
From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com>
Date: Mon, 12 Apr 2021 09:34:56 +0000
Subject: [PATCH 1/3] add new post-quant methods

---
 .../post_training_quantization.py             | 177 ++++++++++++++----
 .../slim/quantization/quantization_pass.py    |  38 +++-
 2 files changed, 174 insertions(+), 41 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index aba6005f0cfdf0..b89e1ae84765f4 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -55,7 +55,7 @@ def _set_variable_data(scope, place, var_name, np_value):
     Set the value of var node by name, if the node exits,
     '''
     assert isinstance(np_value, np.ndarray), \
-        'The type of value should be numpy array.'
+       'The type of value should be numpy array.'
     var_node = scope.find_var(var_name)
     if var_node != None:
         tensor = var_node.get_tensor()
@@ -138,8 +138,10 @@ def __init__(self,
                  batch_size=10,
                  batch_nums=None,
                  algo="KL",
+                 hist_perc=0.99999,
                  quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
                  is_full_quantize=False,
+                 bias_correct=False,
                  activation_bits=8,
                  weight_bits=8,
                  activation_quantize_type='range_abs_max',
@@ -180,7 +182,13 @@ def __init__(self,
                 get the KL threshold for quantized activations and get the abs_max
                 value for quantized weights. If algo='abs_max', get the abs max 
                 value for activations and weights. If algo= 'min_max', get the min 
-                and max value for quantized activations and weights. Default is KL.
+                and max value for quantized activations and weights. If algo='avg',
+                get the average value among the max values for activations. If 
+                algo= 'hist', get the value of 'hist_perc' quantile as the threshold.
+                If algo='mse', get the value which makes the quantization mse loss 
+                minimal. Default is KL.
+            hist_perc(float, optional): The threshold of algo 'hist' for activations.
+                Default is 0.99999.
             quantizable_op_type(list[str], optional): List the type of ops 
                 that will be quantized. Default is ["conv2d", "depthwise_conv2d", 
                 "mul"].
@@ -188,6 +196,8 @@ def __init__(self,
                 apply quantization to all supported quantizable op type. If set
                 is_full_quantized as False, only apply quantization to the op type 
                 according to the input quantizable_op_type.
+            bias_correct(bool, optional): If set as True, use the bias correction
+            method of https://arxiv.org/abs/1810.05723. Default is False.
             activation_bits(int): quantization bit number for activation.
             weight_bits(int, optional): quantization bit number for weights.
             activation_quantize_type(str): quantization type for activation,
@@ -255,7 +265,7 @@ def __init__(self,
             'range_abs_max', 'moving_average_abs_max', 'abs_max'
         ]
         self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max']
-        self._support_algo_type = ['KL', 'abs_max', 'min_max']
+        self._support_algo_type = ['KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max']
         self._dynamic_quantize_op_type = ['lstm']
         self._support_quantize_op_type = \
             list(set(QuantizationTransformPass._supported_quantizable_op_type +
@@ -270,7 +280,7 @@ def __init__(self,
             "cannot be None in the same time."
         assert batch_size > 0, "The batch_size should be greater than 0."
         assert algo in self._support_algo_type, \
-            "The algo should be KL, abs_max or min_max."
+            "The algo should be KL, hist, mse, avg, abs_max or min_max."
         assert activation_quantize_type in self._support_activation_quantize_type, \
             "The activation_quantize_type ({}) should in ({}).".format(
             activation_quantize_type, self._support_activation_quantize_type)
@@ -279,6 +289,7 @@ def __init__(self,
             weight_quantize_type, self._support_weight_quantize_type)
 
         # Save input params
+        self._bias_correct = bias_correct
         self._executor = executor
         self._scope = global_scope() if scope == None else scope
         self._model_dir = model_dir
@@ -289,6 +300,7 @@ def __init__(self,
         self._batch_size = batch_size
         self._batch_nums = batch_nums
         self._algo = algo
+        self._hist_perc = hist_perc
         self._activation_bits = activation_bits
         self._weight_bits = weight_bits
         self._activation_quantize_type = activation_quantize_type
@@ -318,13 +330,17 @@ def __init__(self,
         self._sampling_act_abs_min_max = {}
         self._sampling_act_histogram = {}
         self._sampling_data = {}
-        self._quantized_var_kl_threshold = {}
+        self._quantized_var_threshold = {}
         self._histogram_bins = 2048
         # The vars for algo = min_max
         self._quantized_var_min = {}
         self._quantized_var_max = {}
-        # The vars for algo = abs_max
-        self._quantized_var_abs_max = {}
+        # The vars for algo = avg
+        self._quantized_var_avg = {}
+        # The best loss of algo = mse
+        self._best_mse_loss = {}
+        # The threshold for algo = abs_max, mse or avg
+        self._quantized_threshold = {}
 
     def quantize(self):
         '''
@@ -341,7 +357,7 @@ def quantize(self):
         self._collect_target_varnames()
         self._set_activation_persistable()
 
-        if self._algo == "KL":
+        if self._algo in ["KL", "hist"]:
             _logger.info("Preparation stage ...")
             batch_id = 0
             for data in self._data_loader():
@@ -373,14 +389,17 @@ def quantize(self):
             batch_id += 1
             if self._batch_nums and batch_id >= self._batch_nums:
                 break
+        
+        if self._algo == 'avg':
+            for var_name in self._quantized_act_var_name:
+                self._quantized_threshold[var_name] = np.array(self._quantized_var_avg[var_name]).mean()
         _logger.info("Finish sampling stage, all batch: " + str(batch_id))
 
         self._reset_activation_persistable()
 
-        if self._algo == "KL":
-            self._calculate_kl_threshold()
-
-        if self._algo in ["KL", "abs_max"]:
+        if self._algo in ["KL", "hist"]:
+            self._calculate_kl_hist_threshold()
+        if self._algo in ["KL", "abs_max", "hist", "avg", "mse"]:
             self._update_program()
         else:
             self._save_input_threhold()
@@ -524,16 +543,18 @@ def _sampling(self):
         '''
         Sample the min/max, abs_max or histogram in every iterations.
         '''
-        if self._algo == "abs_max":
-            self._sample_abs_max()
+        if self._algo in ["avg", "abs_max"]:
+            self._sample_abs_max_avg()
         elif self._algo == "min_max":
             self._sample_min_max()
-        elif self._algo == "KL":
+        elif self._algo == "mse":
+            self._sample_mse()
+        elif self._algo in ["KL", "hist"]:
             self._sample_histogram()
 
-    def _sample_abs_max(self):
+    def _sample_mse(self):
         # Only calculate abs_max value for weight for once
-        if self._quantized_var_abs_max == {}:
+        if self._quantized_threshold == {}:
             for var_name in self._quantized_weight_var_name:
                 var_tensor = _load_variable_data(self._scope, var_name)
                 if self._weight_quantize_type == "abs_max":
@@ -549,14 +570,60 @@ def _sample_abs_max(self):
                         for i in range(var_tensor.shape[0]):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
-                self._quantized_var_abs_max[var_name] = abs_max_value
+                self._quantized_threshold[var_name] = abs_max_value
+        _logger.info("MSE searching stage ...")
+        for var_name in self._quantized_act_var_name:
+            var_tensor = _load_variable_data(self._scope, var_name)
+            var_tensor = var_tensor.flatten()
+            abs_max_value = float(np.max(np.abs(var_tensor)))
+            s = 0.3
+            best_scale = 0.0
+            if var_name not in self._best_mse_loss:
+                self._best_mse_loss[var_name] = 100000.0
+            while s <= 1.0:
+                scale = s * abs_max_value
+                s += 0.02
+                bins = 2 ** (self._activation_bits -1) - 1 
+                quant_dequant_var = np.round(np.clip(var_tensor, 0.0, scale) / scale * bins) / bins * scale
+                mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean()
+                if mse_loss <= self._best_mse_loss[var_name]:
+                    self._best_mse_loss[var_name] = mse_loss
+                    best_scale = scale
+            if best_scale > 0.0:
+                self._quantized_threshold[var_name] = best_scale
+    
+    def _sample_abs_max_avg(self):
+        # Only calculate abs_max value for weight for once
+        if self._quantized_threshold == {}:
+            for var_name in self._quantized_weight_var_name:
+                var_tensor = _load_variable_data(self._scope, var_name)
+                if self._weight_quantize_type == "abs_max":
+                    abs_max_value = float(np.max(np.abs(var_tensor)))
+                elif self._weight_quantize_type == "channel_wise_abs_max":
+                    abs_max_value = []
+                    if self._weight_op_pairs[
+                            var_name] in _channelwise_quant_axis1_ops:
+                        for i in range(var_tensor.shape[1]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[:, i]))))
+                    else:
+                        for i in range(var_tensor.shape[0]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[i]))))
+                self._quantized_threshold[var_name] = abs_max_value
 
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
             abs_max_value = float(np.max(np.abs(var_tensor)))
-            if (var_name not in self._quantized_var_abs_max) or \
-                (abs_max_value > self._quantized_var_abs_max[var_name]):
-                self._quantized_var_abs_max[var_name] = abs_max_value
+            if self._algo == 'avg':
+                if (var_name not in self._quantized_var_avg):
+                    self._quantized_var_avg[var_name] = []
+                abs_avg_value = float(np.mean(np.max(np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1))))
+                self._quantized_var_avg[var_name].append(abs_avg_value)
+                continue
+            if (var_name not in self._quantized_threshold) or \
+                (abs_max_value > self._quantized_threshold[var_name]):
+                self._quantized_threshold[var_name] = abs_max_value
 
     def _sample_min_max(self):
         if self._quantized_var_min == {} and self._quantized_var_max == {}:
@@ -646,12 +713,12 @@ def _init_sampling_act_histogram(self):
                     [], bins=self._histogram_bins, range=(min_val, max_val))
                 self._sampling_act_histogram[var_name] = [hist, hist_edeges]
 
-    def _calculate_kl_threshold(self):
+    def _calculate_kl_hist_threshold(self):
         '''
-        Calculate the KL threshold of quantized variables.
+        Calculate the KL or hist threshold of quantized variables.
         '''
-        _logger.info("Calculate KL threshold ...")
-        assert self._algo == "KL", "The algo should be KL to calculate kl threshold."
+        _logger.info("Calculate {} threshold ...".format(self._algo))
+        assert self._algo in ["KL", "hist"], "The algo should be KL to calculate kl threshold."
 
         # Abs_max threshold for weights
         for var_name in self._quantized_weight_var_name:
@@ -669,12 +736,16 @@ def _calculate_kl_threshold(self):
                     for i in range(weight_data.shape[0]):
                         weight_threshold.append(
                             float(np.max(np.abs(weight_data[i]))))
-            self._quantized_var_kl_threshold[var_name] = weight_threshold
+            self._quantized_var_threshold[var_name] = weight_threshold
 
         for var_name in self._quantized_act_var_name:
             hist, hist_edeges = self._sampling_act_histogram[var_name]
-            self._quantized_var_kl_threshold[var_name] = \
-                self._get_kl_scaling_factor(hist, hist_edeges)
+            if self._algo == "KL":
+                self._quantized_var_threshold[var_name] = \
+                    self._get_kl_scaling_factor(hist, hist_edeges)
+            elif self._algo == "hist":
+                self._quantized_var_threshold[var_name] = \
+                    self._get_hist_scaling_factor(hist, hist_edeges)
 
     def _update_program(self):
         '''
@@ -712,10 +783,10 @@ def _update_program(self):
         add_quant_dequant_pass.apply(graph)
 
         # save abs_max or KL threshold to scale var node
-        if self._algo == "KL":
-            scale_dict = self._quantized_var_kl_threshold
+        if self._algo in ["KL", "hist"]:
+            scale_dict = self._quantized_var_threshold
         else:
-            scale_dict = self._quantized_var_abs_max
+            scale_dict = self._quantized_threshold
         for key, val in scale_dict.items():
             _set_variable_data(
                 self._scope,
@@ -734,6 +805,7 @@ def _update_program(self):
         freeze_pass = QuantizationFreezePass(
             scope=self._scope,
             place=self._place,
+            bias_correct=self._bias_correct,
             weight_bits=self._weight_bits,
             activation_bits=self._activation_bits,
             weight_quantize_type=self._weight_quantize_type,
@@ -762,19 +834,29 @@ def analysis_and_save_info(op_node, out_var_name):
             if self._algo == "KL":
                 # For compatibility, we save output threshold by two methods.
                 save_info(op_node, out_var_name,
-                          self._quantized_var_kl_threshold, "out_threshold",
+                          self._quantized_var_threshold, "out_threshold",
                           "post_kl")
                 save_info(
-                    op_node, out_var_name, self._quantized_var_kl_threshold,
+                    op_node, out_var_name, self._quantized_var_threshold,
                     argname_index[0] + str(argname_index[1]) + "_threshold",
                     "post_kl")
-            elif self._algo == "abs_max":
-                save_info(op_node, out_var_name, self._quantized_var_abs_max,
-                          "out_threshold", "post_abs_max")
+            elif self._algo == "hist":
+                # For compatibility, we save output threshold by two methods.
+                save_info(op_node, out_var_name,
+                          self._quantized_var_threshold, "out_threshold",
+                          "post_hist")
                 save_info(
-                    op_node, out_var_name, self._quantized_var_abs_max,
+                    op_node, out_var_name, self._quantized_var_threshold,
                     argname_index[0] + str(argname_index[1]) + "_threshold",
-                    "post_kl")
+                    "post_hist")
+
+            elif self._algo in ["avg", "abs_max", "mse"]:
+                save_info(op_node, out_var_name, self._quantized_threshold,
+                          "out_threshold", "post_absmax")
+                save_info(
+                    op_node, out_var_name, self._quantized_threshold,
+                    argname_index[0] + str(argname_index[1]) + "_threshold",
+                    "post_absmax")
             elif self._algo == "min_max":
                 save_info(op_node, out_var_name, self._quantized_var_min,
                           "out_min", "post_min_max")
@@ -817,10 +899,27 @@ def _collect_dynamic_quantize_op_threshold(self, target_ops_type):
                     op._set_attr("quantization_type", quantization_type)
                     op._set_attr("bit_length", self._weight_bits)
 
-    def _get_kl_scaling_factor(self, hist, hist_edeges, num_quantized_bins=255):
+    def _get_hist_scaling_factor(self, hist, hist_edges):
+        '''
+        Using the hist method to get the scaling factor.
+        '''
+        threshold_rate = self._hist_perc 
+        hist = hist / float(sum(hist))
+        hist_sum = 0
+        hist_index = 0
+        for i in range(len(hist)):
+            hist_sum += hist[i]
+            if hist_sum >= threshold_rate:
+                hist_index = i + 1
+                break
+        bin_width = hist_edges[1] - hist_edges[0]
+        return (hist_index - 0.5) * bin_width
+
+    def _get_kl_scaling_factor(self, hist, hist_edeges):
         '''
         Using the KL-divergenc method to get the more precise scaling factor.
         '''
+        num_quantized_bins = 2 ** (self._activation_bits - 1) - 1
         ending_iter = self._histogram_bins - 1
         starting_iter = int(ending_iter * 0.7)
         bin_width = hist_edeges[1] - hist_edeges[0]
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 3f9ff7295dd6bb..3545a7122f0976 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -1070,6 +1070,7 @@ class QuantizationFreezePass(object):
     def __init__(self,
                  scope,
                  place,
+                 bias_correct=False,
                  weight_bits=8,
                  activation_bits=8,
                  weight_quantize_type='abs_max',
@@ -1085,6 +1086,8 @@ def __init__(self,
             scope(fluid.Scope): scope is used to get the weight tensor values.
             place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors.
                 If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
+            bias_correct(bool): whether use bias correction for post-training quantization.
+                 https://arxiv.org/abs/1810.05723.
             weight_bits(int): quantization bit number for weights.
             activation_bits(int): quantization bit number for activation.
             weight_quantize_type(str): quantization type for weights, support 'abs_max' and 
@@ -1098,6 +1101,7 @@ def __init__(self,
         assert place is not None, \
             'The place cannot be set None.'
         self._scope = scope
+        self._bias_correct = bias_correct
         self._place = _get_paddle_place(place)
         self._weight_bits = weight_bits
         self._activation_bits = activation_bits
@@ -1154,7 +1158,35 @@ def apply(self, graph):
                     else:
                         quant_axis = 0
                     quantized_param_v = self._quant(
-                        param_v, scale_v, self._weight_bits, quant_axis)
+                        param_v.copy(), scale_v, self._weight_bits, quant_axis)
+                    if self._bias_correct == True:
+                        eps = 1e-8
+                        bnt = (1 << (self._weight_bits - 1)) - 1
+                        if isinstance(scale_v, list):
+                            if quant_axis == 0:
+                                for i, s in enumerate(scale_v):
+                                    quantized_param_v[i] = quantized_param_v[i] * s / bnt
+                                quant_bias = param_v - quantized_param_v
+                                mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1)
+                                std_orig = param_v.reshape(param_v.shape[0], -1).std(-1)
+                                std_quant = quantized_param_v.reshape(quantized_param_v.shape[0], -1).std(-1)
+                                std_bias = std_orig / (std_quant + eps)
+                             
+                            else:
+                                for i, s in enumerate(scale_v):
+                                    quantized_param_v[:, i] = quantized_param_v[:, i] * s / bnt
+                                quant_bias = param_v - quantized_param_v
+                                mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])])
+                                std_orig = np.array([param_v[:, i].std() for i in range(param_v.shape[1])])
+                                std_quant = np.array([quantized_param_v[:, i].std() for i in range(quantized_param_v.shape[1])])
+                                std_bias = std_orig / (std_quant + eps)
+
+                        if mean_bias.ndim == 1:
+                            std_bias = np.resize(std_bias, param_v.shape)
+                            mean_bias = np.resize(mean_bias, param_v.shape)
+
+                        quantized_param_v = (mean_bias + quantized_param_v) * std_bias
+                        quantized_param_v = self._quant(quantized_param_v, scale_v, self._weight_bits, quant_axis)
                     self._restore_var(input_arg_name, quantized_param_v)
                     self._remove_fake_quant_and_dequant_op(graph, op_node)
 
@@ -1365,7 +1397,7 @@ def _is_float(self, v):
     def _quant(self, x, scale, num_bits, quant_axis):
         assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.'
         bnt = (1 << (num_bits - 1)) - 1
-
+        eps = 1e-8
         def _clip(x, scale):
             x[x > scale] = scale
             x[x < -scale] = -scale
@@ -1373,6 +1405,8 @@ def _clip(x, scale):
 
         if isinstance(scale, list):
             for i, s in enumerate(scale):
+                if s == 0.0:
+                    s = eps
                 if quant_axis == 0:
                     x[i] = _clip(x[i], s)
                     x[i] = np.round(x[i] / s * bnt)

From 216e9469945f69417d0871a2d6cd3c4e0330b92a Mon Sep 17 00:00:00 2001
From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com>
Date: Tue, 13 Apr 2021 09:34:49 +0000
Subject: [PATCH 2/3] add new methods and tests

---
 .../post_training_quantization.py             | 97 ++++++++++++-------
 .../slim/quantization/quantization_pass.py    | 76 ++++++++-------
 .../test_post_training_quantization_mnist.py  | 60 ++++++++++++
 ..._post_training_quantization_mobilenetv1.py | 62 ++++++++++++
 .../slim/tests/test_quantization_pass.py      | 18 +++-
 5 files changed, 242 insertions(+), 71 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index b89e1ae84765f4..5ccde7b4b604e1 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -138,10 +138,10 @@ def __init__(self,
                  batch_size=10,
                  batch_nums=None,
                  algo="KL",
-                 hist_perc=0.99999,
+                 hist_percent=0.99999,
                  quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
                  is_full_quantize=False,
-                 bias_correct=False,
+                 bias_correction=False,
                  activation_bits=8,
                  weight_bits=8,
                  activation_quantize_type='range_abs_max',
@@ -184,10 +184,10 @@ def __init__(self,
                 value for activations and weights. If algo= 'min_max', get the min 
                 and max value for quantized activations and weights. If algo='avg',
                 get the average value among the max values for activations. If 
-                algo= 'hist', get the value of 'hist_perc' quantile as the threshold.
+                algo= 'hist', get the value of 'hist_percent' quantile as the threshold.
                 If algo='mse', get the value which makes the quantization mse loss 
                 minimal. Default is KL.
-            hist_perc(float, optional): The threshold of algo 'hist' for activations.
+            hist_percent(float, optional): The threshold of algo 'hist' for activations.
                 Default is 0.99999.
             quantizable_op_type(list[str], optional): List the type of ops 
                 that will be quantized. Default is ["conv2d", "depthwise_conv2d", 
@@ -196,8 +196,8 @@ def __init__(self,
                 apply quantization to all supported quantizable op type. If set
                 is_full_quantized as False, only apply quantization to the op type 
                 according to the input quantizable_op_type.
-            bias_correct(bool, optional): If set as True, use the bias correction
-            method of https://arxiv.org/abs/1810.05723. Default is False.
+            bias_correction(bool, optional): If set as True, use the bias correction
+                method of https://arxiv.org/abs/1810.05723. Default is False.
             activation_bits(int): quantization bit number for activation.
             weight_bits(int, optional): quantization bit number for weights.
             activation_quantize_type(str): quantization type for activation,
@@ -289,7 +289,7 @@ def __init__(self,
             weight_quantize_type, self._support_weight_quantize_type)
 
         # Save input params
-        self._bias_correct = bias_correct
+        self._bias_correction = bias_correction
         self._executor = executor
         self._scope = global_scope() if scope == None else scope
         self._model_dir = model_dir
@@ -300,7 +300,7 @@ def __init__(self,
         self._batch_size = batch_size
         self._batch_nums = batch_nums
         self._algo = algo
-        self._hist_perc = hist_perc
+        self._hist_percent = hist_percent
         self._activation_bits = activation_bits
         self._weight_bits = weight_bits
         self._activation_quantize_type = activation_quantize_type
@@ -326,7 +326,7 @@ def __init__(self,
         self._quantized_weight_var_name = set()
         self._quantized_act_var_name = set()
         self._weight_op_pairs = {}
-        # The vars for alog = KL
+        # The vars for alog = KL or hist
         self._sampling_act_abs_min_max = {}
         self._sampling_act_histogram = {}
         self._sampling_data = {}
@@ -390,13 +390,14 @@ def quantize(self):
             if self._batch_nums and batch_id >= self._batch_nums:
                 break
         
-        if self._algo == 'avg':
-            for var_name in self._quantized_act_var_name:
-                self._quantized_threshold[var_name] = np.array(self._quantized_var_avg[var_name]).mean()
         _logger.info("Finish sampling stage, all batch: " + str(batch_id))
 
         self._reset_activation_persistable()
-
+        
+        if self._algo == 'avg':
+            for var_name in self._quantized_act_var_name:
+                self._quantized_threshold[var_name] = \
+                np.array(self._quantized_var_avg[var_name]).mean()
         if self._algo in ["KL", "hist"]:
             self._calculate_kl_hist_threshold()
         if self._algo in ["KL", "abs_max", "hist", "avg", "mse"]:
@@ -543,8 +544,10 @@ def _sampling(self):
         '''
         Sample the min/max, abs_max or histogram in every iterations.
         '''
-        if self._algo in ["avg", "abs_max"]:
-            self._sample_abs_max_avg()
+        if self._algo == "abs_max":
+            self._sample_abs_max()
+        elif self._algo == "avg":
+            self._sample_avg()
         elif self._algo == "min_max":
             self._sample_min_max()
         elif self._algo == "mse":
@@ -553,7 +556,6 @@ def _sampling(self):
             self._sample_histogram()
 
     def _sample_mse(self):
-        # Only calculate abs_max value for weight for once
         if self._quantized_threshold == {}:
             for var_name in self._quantized_weight_var_name:
                 var_tensor = _load_variable_data(self._scope, var_name)
@@ -571,15 +573,16 @@ def _sample_mse(self):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
                 self._quantized_threshold[var_name] = abs_max_value
+        
+        #Search for the best threshold for activations
         _logger.info("MSE searching stage ...")
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
             var_tensor = var_tensor.flatten()
             abs_max_value = float(np.max(np.abs(var_tensor)))
             s = 0.3
-            best_scale = 0.0
             if var_name not in self._best_mse_loss:
-                self._best_mse_loss[var_name] = 100000.0
+                self._best_mse_loss[var_name] = float('inf')
             while s <= 1.0:
                 scale = s * abs_max_value
                 s += 0.02
@@ -588,12 +591,9 @@ def _sample_mse(self):
                 mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean()
                 if mse_loss <= self._best_mse_loss[var_name]:
                     self._best_mse_loss[var_name] = mse_loss
-                    best_scale = scale
-            if best_scale > 0.0:
-                self._quantized_threshold[var_name] = best_scale
+                    self._quantized_threshold[var_name] = scale
     
-    def _sample_abs_max_avg(self):
-        # Only calculate abs_max value for weight for once
+    def _sample_avg(self):
         if self._quantized_threshold == {}:
             for var_name in self._quantized_weight_var_name:
                 var_tensor = _load_variable_data(self._scope, var_name)
@@ -611,16 +611,39 @@ def _sample_abs_max_avg(self):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
                 self._quantized_threshold[var_name] = abs_max_value
-
+        
+        for var_name in self._quantized_act_var_name:
+            var_tensor = _load_variable_data(self._scope, var_name)
+            abs_max_value = float(np.max(np.abs(var_tensor)))
+            if (var_name not in self._quantized_var_avg):
+                self._quantized_var_avg[var_name] = []
+            abs_avg_value = float(np.mean(np.max(  \
+            np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1))))
+            self._quantized_var_avg[var_name].append(abs_avg_value)
+            continue
+            
+    def _sample_abs_max(self):        
+        if self._quantized_threshold == {}:
+            for var_name in self._quantized_weight_var_name:
+                var_tensor = _load_variable_data(self._scope, var_name)
+                if self._weight_quantize_type == "abs_max":
+                    abs_max_value = float(np.max(np.abs(var_tensor)))
+                elif self._weight_quantize_type == "channel_wise_abs_max":
+                    abs_max_value = []
+                    if self._weight_op_pairs[
+                            var_name] in _channelwise_quant_axis1_ops:
+                        for i in range(var_tensor.shape[1]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[:, i]))))
+                    else:
+                        for i in range(var_tensor.shape[0]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[i]))))
+                self._quantized_threshold[var_name] = abs_max_value
+        
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
             abs_max_value = float(np.max(np.abs(var_tensor)))
-            if self._algo == 'avg':
-                if (var_name not in self._quantized_var_avg):
-                    self._quantized_var_avg[var_name] = []
-                abs_avg_value = float(np.mean(np.max(np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1))))
-                self._quantized_var_avg[var_name].append(abs_avg_value)
-                continue
             if (var_name not in self._quantized_threshold) or \
                 (abs_max_value > self._quantized_threshold[var_name]):
                 self._quantized_threshold[var_name] = abs_max_value
@@ -718,7 +741,7 @@ def _calculate_kl_hist_threshold(self):
         Calculate the KL or hist threshold of quantized variables.
         '''
         _logger.info("Calculate {} threshold ...".format(self._algo))
-        assert self._algo in ["KL", "hist"], "The algo should be KL to calculate kl threshold."
+        assert self._algo in ["KL", "hist"], "The algo should be KL or hist."
 
         # Abs_max threshold for weights
         for var_name in self._quantized_weight_var_name:
@@ -751,7 +774,7 @@ def _update_program(self):
         '''
         Use QuantizationTransformPass and AddQuantDequantPass to insert 
         fake_quantize, fake_dequantize and fake_quant_dequant op. 
-        Besides, save all kl threshold to the scale var node.
+        Besides, save all threshold to the scale var node.
         '''
         _logger.info("Update the program ...")
         graph = IrGraph(core.Graph(self._program.desc), for_test=True)
@@ -782,7 +805,7 @@ def _update_program(self):
             quantizable_op_type=minor_quantizable_op_types)
         add_quant_dequant_pass.apply(graph)
 
-        # save abs_max or KL threshold to scale var node
+        # save threshold to scale var node
         if self._algo in ["KL", "hist"]:
             scale_dict = self._quantized_var_threshold
         else:
@@ -805,7 +828,7 @@ def _update_program(self):
         freeze_pass = QuantizationFreezePass(
             scope=self._scope,
             place=self._place,
-            bias_correct=self._bias_correct,
+            bias_correction=self._bias_correction,
             weight_bits=self._weight_bits,
             activation_bits=self._activation_bits,
             weight_quantize_type=self._weight_quantize_type,
@@ -852,11 +875,11 @@ def analysis_and_save_info(op_node, out_var_name):
 
             elif self._algo in ["avg", "abs_max", "mse"]:
                 save_info(op_node, out_var_name, self._quantized_threshold,
-                          "out_threshold", "post_absmax")
+                          "out_threshold", "post_" + str(self._algo))
                 save_info(
                     op_node, out_var_name, self._quantized_threshold,
                     argname_index[0] + str(argname_index[1]) + "_threshold",
-                    "post_absmax")
+                    "post_" + str(self._algo))
             elif self._algo == "min_max":
                 save_info(op_node, out_var_name, self._quantized_var_min,
                           "out_min", "post_min_max")
@@ -903,7 +926,7 @@ def _get_hist_scaling_factor(self, hist, hist_edges):
         '''
         Using the hist method to get the scaling factor.
         '''
-        threshold_rate = self._hist_perc 
+        threshold_rate = self._hist_percent 
         hist = hist / float(sum(hist))
         hist_sum = 0
         hist_index = 0
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 3545a7122f0976..b9f8bdcb715a83 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -1070,7 +1070,7 @@ class QuantizationFreezePass(object):
     def __init__(self,
                  scope,
                  place,
-                 bias_correct=False,
+                 bias_correction=False,
                  weight_bits=8,
                  activation_bits=8,
                  weight_quantize_type='abs_max',
@@ -1086,7 +1086,7 @@ def __init__(self,
             scope(fluid.Scope): scope is used to get the weight tensor values.
             place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors.
                 If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
-            bias_correct(bool): whether use bias correction for post-training quantization.
+            bias_correction(bool): whether use bias correction for post-training quantization.
                  https://arxiv.org/abs/1810.05723.
             weight_bits(int): quantization bit number for weights.
             activation_bits(int): quantization bit number for activation.
@@ -1101,7 +1101,7 @@ def __init__(self,
         assert place is not None, \
             'The place cannot be set None.'
         self._scope = scope
-        self._bias_correct = bias_correct
+        self._bias_correction = bias_correction
         self._place = _get_paddle_place(place)
         self._weight_bits = weight_bits
         self._activation_bits = activation_bits
@@ -1159,34 +1159,9 @@ def apply(self, graph):
                         quant_axis = 0
                     quantized_param_v = self._quant(
                         param_v.copy(), scale_v, self._weight_bits, quant_axis)
-                    if self._bias_correct == True:
-                        eps = 1e-8
-                        bnt = (1 << (self._weight_bits - 1)) - 1
-                        if isinstance(scale_v, list):
-                            if quant_axis == 0:
-                                for i, s in enumerate(scale_v):
-                                    quantized_param_v[i] = quantized_param_v[i] * s / bnt
-                                quant_bias = param_v - quantized_param_v
-                                mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1)
-                                std_orig = param_v.reshape(param_v.shape[0], -1).std(-1)
-                                std_quant = quantized_param_v.reshape(quantized_param_v.shape[0], -1).std(-1)
-                                std_bias = std_orig / (std_quant + eps)
-                             
-                            else:
-                                for i, s in enumerate(scale_v):
-                                    quantized_param_v[:, i] = quantized_param_v[:, i] * s / bnt
-                                quant_bias = param_v - quantized_param_v
-                                mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])])
-                                std_orig = np.array([param_v[:, i].std() for i in range(param_v.shape[1])])
-                                std_quant = np.array([quantized_param_v[:, i].std() for i in range(quantized_param_v.shape[1])])
-                                std_bias = std_orig / (std_quant + eps)
-
-                        if mean_bias.ndim == 1:
-                            std_bias = np.resize(std_bias, param_v.shape)
-                            mean_bias = np.resize(mean_bias, param_v.shape)
-
-                        quantized_param_v = (mean_bias + quantized_param_v) * std_bias
-                        quantized_param_v = self._quant(quantized_param_v, scale_v, self._weight_bits, quant_axis)
+                    if self._bias_correction == True:
+                        quantized_param_v = self._bias_correction_w(
+                            param_v, quantized_param_v, scale_v, quant_axis)
                     self._restore_var(input_arg_name, quantized_param_v)
                     self._remove_fake_quant_and_dequant_op(graph, op_node)
 
@@ -1397,7 +1372,6 @@ def _is_float(self, v):
     def _quant(self, x, scale, num_bits, quant_axis):
         assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.'
         bnt = (1 << (num_bits - 1)) - 1
-        eps = 1e-8
         def _clip(x, scale):
             x[x > scale] = scale
             x[x < -scale] = -scale
@@ -1406,7 +1380,7 @@ def _clip(x, scale):
         if isinstance(scale, list):
             for i, s in enumerate(scale):
                 if s == 0.0:
-                    s = eps
+                    s = 1e-8
                 if quant_axis == 0:
                     x[i] = _clip(x[i], s)
                     x[i] = np.round(x[i] / s * bnt)
@@ -1418,6 +1392,42 @@ def _clip(x, scale):
             x = np.round(x / scale * bnt)
         return x
 
+    def _bias_correction_w(self, x, x_quant, scale_v, quant_axis):
+        '''
+        Bias correction for weight
+        '''
+        eps = 1e-8
+        bnt = (1 << (self._weight_bits - 1)) - 1
+        x_dequant = x_quant.copy()
+        if isinstance(scale_v, list):
+            if quant_axis == 0:
+                for i, s in enumerate(scale_v):
+                    x_dequant[i] = x_dequant[i] * s / bnt
+                quant_bias = x - x_dequant
+                mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1)
+                std_orig = x.reshape(x.shape[0], -1).std(-1)
+                std_quant = x_dequant.reshape(x_dequant.shape[0], -1).std(-1)
+                std_bias = std_orig / (std_quant + eps)
+            else:
+                for i, s in enumerate(scale_v):
+                    x_dequant[:, i] = x_quant[:, i] * s / bnt
+                quant_bias = x - x_dequant
+                mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])])
+                std_orig = np.array([x[:, i].std() for i in range(x.shape[1])])
+                std_quant = np.array([x_dequant[:, i].std() for i in range(x_dequant.shape[1])])
+                std_bias = std_orig / (std_quant + eps)
+        else:
+            x_dequant = x_quant * scale_v / bnt
+            mean_bias = (x - x_dequant).mean()
+            std_bias = x.std() / (x_dequant.std() + eps)
+        if mean_bias.ndim == 1:
+            std_bias = np.resize(std_bias, x.shape)
+            mean_bias = np.resize(mean_bias, x.shape)
+
+        x_dequant = (mean_bias + x_dequant) * std_bias
+        quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits, quant_axis)
+        return quantized_param_v
+
 
 class ConvertToInt8Pass(object):
     def __init__(self, scope, place, quantizable_op_type=None):
diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
index 3ea1c84f976a85..da5c5d6dc9441b 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
@@ -204,6 +204,66 @@ def test_post_training_kl(self):
                       quant_iterations)
 
 
+class TestPostTraininghistForMnist(TestPostTrainingQuantization):
+    def test_post_training_hist(self):
+        model_name = "mnist_model"
+        data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
+        data_md5 = "be71d3997ec35ac2a65ae8a145e2887c"
+        algo = "hist"
+        quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.01
+        batch_size = 10
+        infer_iterations = 50
+        quant_iterations = 5
+        self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold, batch_size, infer_iterations,
+                      quant_iterations)
+
+
+class TestPostTrainingmseForMnist(TestPostTrainingQuantization):
+    def test_post_training_mse(self):
+        model_name = "mnist_model"
+        data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
+        data_md5 = "be71d3997ec35ac2a65ae8a145e2887c"
+        algo = "mse"
+        quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.01
+        batch_size = 10
+        infer_iterations = 50
+        quant_iterations = 5
+        self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold, batch_size, infer_iterations,
+                      quant_iterations)
+
+
+class TestPostTrainingavgForMnist(TestPostTrainingQuantization):
+    def test_post_training_avg(self):
+        model_name = "mnist_model"
+        data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
+        data_md5 = "be71d3997ec35ac2a65ae8a145e2887c"
+        algo = "avg"
+        quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.01
+        batch_size = 10
+        infer_iterations = 50
+        quant_iterations = 5
+        self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold, batch_size, infer_iterations,
+                      quant_iterations)
+
+
 class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization):
     def test_post_training_abs_max(self):
         model_name = "mnist_model"
diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
index 18389d9433b9a5..2614804a3cc752 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -327,6 +327,68 @@ def test_post_training_kl_mobilenetv1(self):
                       is_full_quantize, is_use_cache_file, is_optimize_model,
                       diff_threshold)
 
+class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
+    def test_post_training_avg_mobilenetv1(self):
+        model = "MobileNet-V1"
+        algo = "avg"
+        data_urls = [
+            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+        ]
+        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        quantizable_op_type = [
+            "conv2d",
+            "depthwise_conv2d",
+            "mul",
+        ]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.025
+        self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold)
+
+class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
+    def test_post_training_hist_mobilenetv1(self):
+        model = "MobileNet-V1"
+        algo = "hist"
+        data_urls = [
+            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+        ]
+        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        quantizable_op_type = [
+            "conv2d",
+            "depthwise_conv2d",
+            "mul",
+        ]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.025
+        self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold)
+
+class TestPostTrainingmseForMobilenetv1(TestPostTrainingQuantization):
+    def test_post_training_mse_mobilenetv1(self):
+        model = "MobileNet-V1"
+        algo = "mse"
+        data_urls = [
+            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+        ]
+        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        quantizable_op_type = [
+            "conv2d",
+            "depthwise_conv2d",
+            "mul",
+        ]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = True
+        diff_threshold = 0.025
+        self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type,
+                      is_full_quantize, is_use_cache_file, is_optimize_model,
+                      diff_threshold)
 
 class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
     def test_post_training_abs_max_mobilenetv1(self):
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
index 768a9ba7cfc3e7..790213d4b02924 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -257,6 +257,7 @@ def freeze_graph(self,
                      use_cuda,
                      seed,
                      activation_quant_type,
+                     bias_correction=False,
                      weight_quant_type='abs_max',
                      for_ci=True,
                      quant_skip_pattern='skip_quant'):
@@ -355,7 +356,8 @@ def build_program(main, startup, is_test):
 
         # Freeze graph for inference, but the weight of fc/conv is still float type.
         freeze_pass = QuantizationFreezePass(
-            scope=scope, place=place, weight_quantize_type=weight_quant_type)
+            scope=scope, place=place, bias_correction=bias_correction, \
+            weight_quantize_type=weight_quant_type)
         freeze_pass.apply(test_graph)
         if not for_ci:
             marked_nodes = set()
@@ -472,6 +474,13 @@ def test_freeze_graph_cpu_dynamic(self):
     def test_freeze_graph_cuda_static(self):
         if fluid.core.is_compiled_with_cuda():
             with fluid.unique_name.guard():
+                self.freeze_graph(
+                    True,
+                    seed=1,
+                    activation_quant_type='range_abs_max',
+                    bias_correction=True,
+                    weight_quant_type='abs_max',
+                    for_ci=True)
                 self.freeze_graph(
                     True,
                     seed=1,
@@ -496,6 +505,13 @@ def test_freeze_graph_cuda_static(self):
                     activation_quant_type='moving_average_abs_max',
                     weight_quant_type='channel_wise_abs_max',
                     for_ci=True)
+                self.freeze_graph(
+                    True,
+                    seed=1,
+                    activation_quant_type='moving_average_abs_max',
+                    bias_correction=True,
+                    weight_quant_type='channel_wise_abs_max',
+                    for_ci=True)
 
     def test_freeze_graph_cpu_static(self):
         with fluid.unique_name.guard():

From 724e56c35d28e2355922a786adc8ee16746d7ceb Mon Sep 17 00:00:00 2001
From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com>
Date: Tue, 13 Apr 2021 17:54:43 +0000
Subject: [PATCH 3/3] code style changed

---
 .../post_training_quantization.py             | 41 +++++++++----------
 .../slim/quantization/quantization_pass.py    | 11 +++--
 ..._post_training_quantization_mobilenetv1.py | 22 +---------
 3 files changed, 29 insertions(+), 45 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index 5ccde7b4b604e1..bc2e2dc9b6562c 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -265,7 +265,9 @@ def __init__(self,
             'range_abs_max', 'moving_average_abs_max', 'abs_max'
         ]
         self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max']
-        self._support_algo_type = ['KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max']
+        self._support_algo_type = [
+            'KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max'
+        ]
         self._dynamic_quantize_op_type = ['lstm']
         self._support_quantize_op_type = \
             list(set(QuantizationTransformPass._supported_quantizable_op_type +
@@ -389,11 +391,8 @@ def quantize(self):
             batch_id += 1
             if self._batch_nums and batch_id >= self._batch_nums:
                 break
-        
         _logger.info("Finish sampling stage, all batch: " + str(batch_id))
-
         self._reset_activation_persistable()
-        
         if self._algo == 'avg':
             for var_name in self._quantized_act_var_name:
                 self._quantized_threshold[var_name] = \
@@ -573,8 +572,6 @@ def _sample_mse(self):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
                 self._quantized_threshold[var_name] = abs_max_value
-        
-        #Search for the best threshold for activations
         _logger.info("MSE searching stage ...")
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
@@ -586,13 +583,15 @@ def _sample_mse(self):
             while s <= 1.0:
                 scale = s * abs_max_value
                 s += 0.02
-                bins = 2 ** (self._activation_bits -1) - 1 
-                quant_dequant_var = np.round(np.clip(var_tensor, 0.0, scale) / scale * bins) / bins * scale
-                mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean()
+                bins = 2**(self._activation_bits - 1) - 1
+                quant_dequant_var = np.round(
+                    np.clip(var_tensor, 0.0, scale) / scale *
+                    bins) / bins * scale
+                mse_loss = ((var_tensor - quant_dequant_var)**2).mean()
                 if mse_loss <= self._best_mse_loss[var_name]:
                     self._best_mse_loss[var_name] = mse_loss
                     self._quantized_threshold[var_name] = scale
-    
+
     def _sample_avg(self):
         if self._quantized_threshold == {}:
             for var_name in self._quantized_weight_var_name:
@@ -611,7 +610,7 @@ def _sample_avg(self):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
                 self._quantized_threshold[var_name] = abs_max_value
-        
+
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
             abs_max_value = float(np.max(np.abs(var_tensor)))
@@ -621,8 +620,8 @@ def _sample_avg(self):
             np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1))))
             self._quantized_var_avg[var_name].append(abs_avg_value)
             continue
-            
-    def _sample_abs_max(self):        
+
+    def _sample_abs_max(self):
         if self._quantized_threshold == {}:
             for var_name in self._quantized_weight_var_name:
                 var_tensor = _load_variable_data(self._scope, var_name)
@@ -640,7 +639,7 @@ def _sample_abs_max(self):
                             abs_max_value.append(
                                 float(np.max(np.abs(var_tensor[i]))))
                 self._quantized_threshold[var_name] = abs_max_value
-        
+
         for var_name in self._quantized_act_var_name:
             var_tensor = _load_variable_data(self._scope, var_name)
             abs_max_value = float(np.max(np.abs(var_tensor)))
@@ -856,18 +855,16 @@ def analysis_and_save_info(op_node, out_var_name):
                 out_var_name + " is not the output of the op"
             if self._algo == "KL":
                 # For compatibility, we save output threshold by two methods.
-                save_info(op_node, out_var_name,
-                          self._quantized_var_threshold, "out_threshold",
-                          "post_kl")
+                save_info(op_node, out_var_name, self._quantized_var_threshold,
+                          "out_threshold", "post_kl")
                 save_info(
                     op_node, out_var_name, self._quantized_var_threshold,
                     argname_index[0] + str(argname_index[1]) + "_threshold",
                     "post_kl")
             elif self._algo == "hist":
                 # For compatibility, we save output threshold by two methods.
-                save_info(op_node, out_var_name,
-                          self._quantized_var_threshold, "out_threshold",
-                          "post_hist")
+                save_info(op_node, out_var_name, self._quantized_var_threshold,
+                          "out_threshold", "post_hist")
                 save_info(
                     op_node, out_var_name, self._quantized_var_threshold,
                     argname_index[0] + str(argname_index[1]) + "_threshold",
@@ -926,7 +923,7 @@ def _get_hist_scaling_factor(self, hist, hist_edges):
         '''
         Using the hist method to get the scaling factor.
         '''
-        threshold_rate = self._hist_percent 
+        threshold_rate = self._hist_percent
         hist = hist / float(sum(hist))
         hist_sum = 0
         hist_index = 0
@@ -942,7 +939,7 @@ def _get_kl_scaling_factor(self, hist, hist_edeges):
         '''
         Using the KL-divergenc method to get the more precise scaling factor.
         '''
-        num_quantized_bins = 2 ** (self._activation_bits - 1) - 1
+        num_quantized_bins = 2**(self._activation_bits - 1) - 1
         ending_iter = self._histogram_bins - 1
         starting_iter = int(ending_iter * 0.7)
         bin_width = hist_edeges[1] - hist_edeges[0]
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index b9f8bdcb715a83..79aad8c8bc53d3 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -1372,6 +1372,7 @@ def _is_float(self, v):
     def _quant(self, x, scale, num_bits, quant_axis):
         assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.'
         bnt = (1 << (num_bits - 1)) - 1
+
         def _clip(x, scale):
             x[x > scale] = scale
             x[x < -scale] = -scale
@@ -1412,9 +1413,12 @@ def _bias_correction_w(self, x, x_quant, scale_v, quant_axis):
                 for i, s in enumerate(scale_v):
                     x_dequant[:, i] = x_quant[:, i] * s / bnt
                 quant_bias = x - x_dequant
-                mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])])
+                mean_bias = np.array([
+                    quant_bias[:, i].mean() for i in range(quant_bias.shape[1])
+                ])
                 std_orig = np.array([x[:, i].std() for i in range(x.shape[1])])
-                std_quant = np.array([x_dequant[:, i].std() for i in range(x_dequant.shape[1])])
+                std_quant = np.array(
+                    [x_dequant[:, i].std() for i in range(x_dequant.shape[1])])
                 std_bias = std_orig / (std_quant + eps)
         else:
             x_dequant = x_quant * scale_v / bnt
@@ -1425,7 +1429,8 @@ def _bias_correction_w(self, x, x_quant, scale_v, quant_axis):
             mean_bias = np.resize(mean_bias, x.shape)
 
         x_dequant = (mean_bias + x_dequant) * std_bias
-        quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits, quant_axis)
+        quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits,
+                                        quant_axis)
         return quantized_param_v
 
 
diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
index 2614804a3cc752..71611048610060 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -327,6 +327,7 @@ def test_post_training_kl_mobilenetv1(self):
                       is_full_quantize, is_use_cache_file, is_optimize_model,
                       diff_threshold)
 
+
 class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
     def test_post_training_avg_mobilenetv1(self):
         model = "MobileNet-V1"
@@ -348,6 +349,7 @@ def test_post_training_avg_mobilenetv1(self):
                       is_full_quantize, is_use_cache_file, is_optimize_model,
                       diff_threshold)
 
+
 class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
     def test_post_training_hist_mobilenetv1(self):
         model = "MobileNet-V1"
@@ -369,26 +371,6 @@ def test_post_training_hist_mobilenetv1(self):
                       is_full_quantize, is_use_cache_file, is_optimize_model,
                       diff_threshold)
 
-class TestPostTrainingmseForMobilenetv1(TestPostTrainingQuantization):
-    def test_post_training_mse_mobilenetv1(self):
-        model = "MobileNet-V1"
-        algo = "mse"
-        data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
-        ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
-        quantizable_op_type = [
-            "conv2d",
-            "depthwise_conv2d",
-            "mul",
-        ]
-        is_full_quantize = False
-        is_use_cache_file = False
-        is_optimize_model = True
-        diff_threshold = 0.025
-        self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type,
-                      is_full_quantize, is_use_cache_file, is_optimize_model,
-                      diff_threshold)
 
 class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
     def test_post_training_abs_max_mobilenetv1(self):