From 7ab63d8b4da99087d55a8b7a23e28b99e853c619 Mon Sep 17 00:00:00 2001 From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 12 Apr 2021 09:34:56 +0000 Subject: [PATCH 1/3] add new post-quant methods --- .../post_training_quantization.py | 177 ++++++++++++++---- .../slim/quantization/quantization_pass.py | 38 +++- 2 files changed, 174 insertions(+), 41 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index aba6005f0cfdf0..b89e1ae84765f4 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -55,7 +55,7 @@ def _set_variable_data(scope, place, var_name, np_value): Set the value of var node by name, if the node exits, ''' assert isinstance(np_value, np.ndarray), \ - 'The type of value should be numpy array.' + 'The type of value should be numpy array.' var_node = scope.find_var(var_name) if var_node != None: tensor = var_node.get_tensor() @@ -138,8 +138,10 @@ def __init__(self, batch_size=10, batch_nums=None, algo="KL", + hist_perc=0.99999, quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], is_full_quantize=False, + bias_correct=False, activation_bits=8, weight_bits=8, activation_quantize_type='range_abs_max', @@ -180,7 +182,13 @@ def __init__(self, get the KL threshold for quantized activations and get the abs_max value for quantized weights. If algo='abs_max', get the abs max value for activations and weights. If algo= 'min_max', get the min - and max value for quantized activations and weights. Default is KL. + and max value for quantized activations and weights. If algo='avg', + get the average value among the max values for activations. If + algo= 'hist', get the value of 'hist_perc' quantile as the threshold. + If algo='mse', get the value which makes the quantization mse loss + minimal. Default is KL. + hist_perc(float, optional): The threshold of algo 'hist' for activations. + Default is 0.99999. quantizable_op_type(list[str], optional): List the type of ops that will be quantized. Default is ["conv2d", "depthwise_conv2d", "mul"]. @@ -188,6 +196,8 @@ def __init__(self, apply quantization to all supported quantizable op type. If set is_full_quantized as False, only apply quantization to the op type according to the input quantizable_op_type. + bias_correct(bool, optional): If set as True, use the bias correction + method of https://arxiv.org/abs/1810.05723. Default is False. activation_bits(int): quantization bit number for activation. weight_bits(int, optional): quantization bit number for weights. activation_quantize_type(str): quantization type for activation, @@ -255,7 +265,7 @@ def __init__(self, 'range_abs_max', 'moving_average_abs_max', 'abs_max' ] self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max'] - self._support_algo_type = ['KL', 'abs_max', 'min_max'] + self._support_algo_type = ['KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max'] self._dynamic_quantize_op_type = ['lstm'] self._support_quantize_op_type = \ list(set(QuantizationTransformPass._supported_quantizable_op_type + @@ -270,7 +280,7 @@ def __init__(self, "cannot be None in the same time." assert batch_size > 0, "The batch_size should be greater than 0." assert algo in self._support_algo_type, \ - "The algo should be KL, abs_max or min_max." + "The algo should be KL, hist, mse, avg, abs_max or min_max." assert activation_quantize_type in self._support_activation_quantize_type, \ "The activation_quantize_type ({}) should in ({}).".format( activation_quantize_type, self._support_activation_quantize_type) @@ -279,6 +289,7 @@ def __init__(self, weight_quantize_type, self._support_weight_quantize_type) # Save input params + self._bias_correct = bias_correct self._executor = executor self._scope = global_scope() if scope == None else scope self._model_dir = model_dir @@ -289,6 +300,7 @@ def __init__(self, self._batch_size = batch_size self._batch_nums = batch_nums self._algo = algo + self._hist_perc = hist_perc self._activation_bits = activation_bits self._weight_bits = weight_bits self._activation_quantize_type = activation_quantize_type @@ -318,13 +330,17 @@ def __init__(self, self._sampling_act_abs_min_max = {} self._sampling_act_histogram = {} self._sampling_data = {} - self._quantized_var_kl_threshold = {} + self._quantized_var_threshold = {} self._histogram_bins = 2048 # The vars for algo = min_max self._quantized_var_min = {} self._quantized_var_max = {} - # The vars for algo = abs_max - self._quantized_var_abs_max = {} + # The vars for algo = avg + self._quantized_var_avg = {} + # The best loss of algo = mse + self._best_mse_loss = {} + # The threshold for algo = abs_max, mse or avg + self._quantized_threshold = {} def quantize(self): ''' @@ -341,7 +357,7 @@ def quantize(self): self._collect_target_varnames() self._set_activation_persistable() - if self._algo == "KL": + if self._algo in ["KL", "hist"]: _logger.info("Preparation stage ...") batch_id = 0 for data in self._data_loader(): @@ -373,14 +389,17 @@ def quantize(self): batch_id += 1 if self._batch_nums and batch_id >= self._batch_nums: break + + if self._algo == 'avg': + for var_name in self._quantized_act_var_name: + self._quantized_threshold[var_name] = np.array(self._quantized_var_avg[var_name]).mean() _logger.info("Finish sampling stage, all batch: " + str(batch_id)) self._reset_activation_persistable() - if self._algo == "KL": - self._calculate_kl_threshold() - - if self._algo in ["KL", "abs_max"]: + if self._algo in ["KL", "hist"]: + self._calculate_kl_hist_threshold() + if self._algo in ["KL", "abs_max", "hist", "avg", "mse"]: self._update_program() else: self._save_input_threhold() @@ -524,16 +543,18 @@ def _sampling(self): ''' Sample the min/max, abs_max or histogram in every iterations. ''' - if self._algo == "abs_max": - self._sample_abs_max() + if self._algo in ["avg", "abs_max"]: + self._sample_abs_max_avg() elif self._algo == "min_max": self._sample_min_max() - elif self._algo == "KL": + elif self._algo == "mse": + self._sample_mse() + elif self._algo in ["KL", "hist"]: self._sample_histogram() - def _sample_abs_max(self): + def _sample_mse(self): # Only calculate abs_max value for weight for once - if self._quantized_var_abs_max == {}: + if self._quantized_threshold == {}: for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) if self._weight_quantize_type == "abs_max": @@ -549,14 +570,60 @@ def _sample_abs_max(self): for i in range(var_tensor.shape[0]): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) - self._quantized_var_abs_max[var_name] = abs_max_value + self._quantized_threshold[var_name] = abs_max_value + _logger.info("MSE searching stage ...") + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + var_tensor = var_tensor.flatten() + abs_max_value = float(np.max(np.abs(var_tensor))) + s = 0.3 + best_scale = 0.0 + if var_name not in self._best_mse_loss: + self._best_mse_loss[var_name] = 100000.0 + while s <= 1.0: + scale = s * abs_max_value + s += 0.02 + bins = 2 ** (self._activation_bits -1) - 1 + quant_dequant_var = np.round(np.clip(var_tensor, 0.0, scale) / scale * bins) / bins * scale + mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean() + if mse_loss <= self._best_mse_loss[var_name]: + self._best_mse_loss[var_name] = mse_loss + best_scale = scale + if best_scale > 0.0: + self._quantized_threshold[var_name] = best_scale + + def _sample_abs_max_avg(self): + # Only calculate abs_max value for weight for once + if self._quantized_threshold == {}: + for var_name in self._quantized_weight_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + if self._weight_quantize_type == "abs_max": + abs_max_value = float(np.max(np.abs(var_tensor))) + elif self._weight_quantize_type == "channel_wise_abs_max": + abs_max_value = [] + if self._weight_op_pairs[ + var_name] in _channelwise_quant_axis1_ops: + for i in range(var_tensor.shape[1]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[:, i])))) + else: + for i in range(var_tensor.shape[0]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[i])))) + self._quantized_threshold[var_name] = abs_max_value for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) abs_max_value = float(np.max(np.abs(var_tensor))) - if (var_name not in self._quantized_var_abs_max) or \ - (abs_max_value > self._quantized_var_abs_max[var_name]): - self._quantized_var_abs_max[var_name] = abs_max_value + if self._algo == 'avg': + if (var_name not in self._quantized_var_avg): + self._quantized_var_avg[var_name] = [] + abs_avg_value = float(np.mean(np.max(np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1)))) + self._quantized_var_avg[var_name].append(abs_avg_value) + continue + if (var_name not in self._quantized_threshold) or \ + (abs_max_value > self._quantized_threshold[var_name]): + self._quantized_threshold[var_name] = abs_max_value def _sample_min_max(self): if self._quantized_var_min == {} and self._quantized_var_max == {}: @@ -646,12 +713,12 @@ def _init_sampling_act_histogram(self): [], bins=self._histogram_bins, range=(min_val, max_val)) self._sampling_act_histogram[var_name] = [hist, hist_edeges] - def _calculate_kl_threshold(self): + def _calculate_kl_hist_threshold(self): ''' - Calculate the KL threshold of quantized variables. + Calculate the KL or hist threshold of quantized variables. ''' - _logger.info("Calculate KL threshold ...") - assert self._algo == "KL", "The algo should be KL to calculate kl threshold." + _logger.info("Calculate {} threshold ...".format(self._algo)) + assert self._algo in ["KL", "hist"], "The algo should be KL to calculate kl threshold." # Abs_max threshold for weights for var_name in self._quantized_weight_var_name: @@ -669,12 +736,16 @@ def _calculate_kl_threshold(self): for i in range(weight_data.shape[0]): weight_threshold.append( float(np.max(np.abs(weight_data[i])))) - self._quantized_var_kl_threshold[var_name] = weight_threshold + self._quantized_var_threshold[var_name] = weight_threshold for var_name in self._quantized_act_var_name: hist, hist_edeges = self._sampling_act_histogram[var_name] - self._quantized_var_kl_threshold[var_name] = \ - self._get_kl_scaling_factor(hist, hist_edeges) + if self._algo == "KL": + self._quantized_var_threshold[var_name] = \ + self._get_kl_scaling_factor(hist, hist_edeges) + elif self._algo == "hist": + self._quantized_var_threshold[var_name] = \ + self._get_hist_scaling_factor(hist, hist_edeges) def _update_program(self): ''' @@ -712,10 +783,10 @@ def _update_program(self): add_quant_dequant_pass.apply(graph) # save abs_max or KL threshold to scale var node - if self._algo == "KL": - scale_dict = self._quantized_var_kl_threshold + if self._algo in ["KL", "hist"]: + scale_dict = self._quantized_var_threshold else: - scale_dict = self._quantized_var_abs_max + scale_dict = self._quantized_threshold for key, val in scale_dict.items(): _set_variable_data( self._scope, @@ -734,6 +805,7 @@ def _update_program(self): freeze_pass = QuantizationFreezePass( scope=self._scope, place=self._place, + bias_correct=self._bias_correct, weight_bits=self._weight_bits, activation_bits=self._activation_bits, weight_quantize_type=self._weight_quantize_type, @@ -762,19 +834,29 @@ def analysis_and_save_info(op_node, out_var_name): if self._algo == "KL": # For compatibility, we save output threshold by two methods. save_info(op_node, out_var_name, - self._quantized_var_kl_threshold, "out_threshold", + self._quantized_var_threshold, "out_threshold", "post_kl") save_info( - op_node, out_var_name, self._quantized_var_kl_threshold, + op_node, out_var_name, self._quantized_var_threshold, argname_index[0] + str(argname_index[1]) + "_threshold", "post_kl") - elif self._algo == "abs_max": - save_info(op_node, out_var_name, self._quantized_var_abs_max, - "out_threshold", "post_abs_max") + elif self._algo == "hist": + # For compatibility, we save output threshold by two methods. + save_info(op_node, out_var_name, + self._quantized_var_threshold, "out_threshold", + "post_hist") save_info( - op_node, out_var_name, self._quantized_var_abs_max, + op_node, out_var_name, self._quantized_var_threshold, argname_index[0] + str(argname_index[1]) + "_threshold", - "post_kl") + "post_hist") + + elif self._algo in ["avg", "abs_max", "mse"]: + save_info(op_node, out_var_name, self._quantized_threshold, + "out_threshold", "post_absmax") + save_info( + op_node, out_var_name, self._quantized_threshold, + argname_index[0] + str(argname_index[1]) + "_threshold", + "post_absmax") elif self._algo == "min_max": save_info(op_node, out_var_name, self._quantized_var_min, "out_min", "post_min_max") @@ -817,10 +899,27 @@ def _collect_dynamic_quantize_op_threshold(self, target_ops_type): op._set_attr("quantization_type", quantization_type) op._set_attr("bit_length", self._weight_bits) - def _get_kl_scaling_factor(self, hist, hist_edeges, num_quantized_bins=255): + def _get_hist_scaling_factor(self, hist, hist_edges): + ''' + Using the hist method to get the scaling factor. + ''' + threshold_rate = self._hist_perc + hist = hist / float(sum(hist)) + hist_sum = 0 + hist_index = 0 + for i in range(len(hist)): + hist_sum += hist[i] + if hist_sum >= threshold_rate: + hist_index = i + 1 + break + bin_width = hist_edges[1] - hist_edges[0] + return (hist_index - 0.5) * bin_width + + def _get_kl_scaling_factor(self, hist, hist_edeges): ''' Using the KL-divergenc method to get the more precise scaling factor. ''' + num_quantized_bins = 2 ** (self._activation_bits - 1) - 1 ending_iter = self._histogram_bins - 1 starting_iter = int(ending_iter * 0.7) bin_width = hist_edeges[1] - hist_edeges[0] diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 3f9ff7295dd6bb..3545a7122f0976 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -1070,6 +1070,7 @@ class QuantizationFreezePass(object): def __init__(self, scope, place, + bias_correct=False, weight_bits=8, activation_bits=8, weight_quantize_type='abs_max', @@ -1085,6 +1086,8 @@ def __init__(self, scope(fluid.Scope): scope is used to get the weight tensor values. place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs. + bias_correct(bool): whether use bias correction for post-training quantization. + https://arxiv.org/abs/1810.05723. weight_bits(int): quantization bit number for weights. activation_bits(int): quantization bit number for activation. weight_quantize_type(str): quantization type for weights, support 'abs_max' and @@ -1098,6 +1101,7 @@ def __init__(self, assert place is not None, \ 'The place cannot be set None.' self._scope = scope + self._bias_correct = bias_correct self._place = _get_paddle_place(place) self._weight_bits = weight_bits self._activation_bits = activation_bits @@ -1154,7 +1158,35 @@ def apply(self, graph): else: quant_axis = 0 quantized_param_v = self._quant( - param_v, scale_v, self._weight_bits, quant_axis) + param_v.copy(), scale_v, self._weight_bits, quant_axis) + if self._bias_correct == True: + eps = 1e-8 + bnt = (1 << (self._weight_bits - 1)) - 1 + if isinstance(scale_v, list): + if quant_axis == 0: + for i, s in enumerate(scale_v): + quantized_param_v[i] = quantized_param_v[i] * s / bnt + quant_bias = param_v - quantized_param_v + mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1) + std_orig = param_v.reshape(param_v.shape[0], -1).std(-1) + std_quant = quantized_param_v.reshape(quantized_param_v.shape[0], -1).std(-1) + std_bias = std_orig / (std_quant + eps) + + else: + for i, s in enumerate(scale_v): + quantized_param_v[:, i] = quantized_param_v[:, i] * s / bnt + quant_bias = param_v - quantized_param_v + mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])]) + std_orig = np.array([param_v[:, i].std() for i in range(param_v.shape[1])]) + std_quant = np.array([quantized_param_v[:, i].std() for i in range(quantized_param_v.shape[1])]) + std_bias = std_orig / (std_quant + eps) + + if mean_bias.ndim == 1: + std_bias = np.resize(std_bias, param_v.shape) + mean_bias = np.resize(mean_bias, param_v.shape) + + quantized_param_v = (mean_bias + quantized_param_v) * std_bias + quantized_param_v = self._quant(quantized_param_v, scale_v, self._weight_bits, quant_axis) self._restore_var(input_arg_name, quantized_param_v) self._remove_fake_quant_and_dequant_op(graph, op_node) @@ -1365,7 +1397,7 @@ def _is_float(self, v): def _quant(self, x, scale, num_bits, quant_axis): assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.' bnt = (1 << (num_bits - 1)) - 1 - + eps = 1e-8 def _clip(x, scale): x[x > scale] = scale x[x < -scale] = -scale @@ -1373,6 +1405,8 @@ def _clip(x, scale): if isinstance(scale, list): for i, s in enumerate(scale): + if s == 0.0: + s = eps if quant_axis == 0: x[i] = _clip(x[i], s) x[i] = np.round(x[i] / s * bnt) From 216e9469945f69417d0871a2d6cd3c4e0330b92a Mon Sep 17 00:00:00 2001 From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com> Date: Tue, 13 Apr 2021 09:34:49 +0000 Subject: [PATCH 2/3] add new methods and tests --- .../post_training_quantization.py | 97 ++++++++++++------- .../slim/quantization/quantization_pass.py | 76 ++++++++------- .../test_post_training_quantization_mnist.py | 60 ++++++++++++ ..._post_training_quantization_mobilenetv1.py | 62 ++++++++++++ .../slim/tests/test_quantization_pass.py | 18 +++- 5 files changed, 242 insertions(+), 71 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index b89e1ae84765f4..5ccde7b4b604e1 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -138,10 +138,10 @@ def __init__(self, batch_size=10, batch_nums=None, algo="KL", - hist_perc=0.99999, + hist_percent=0.99999, quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], is_full_quantize=False, - bias_correct=False, + bias_correction=False, activation_bits=8, weight_bits=8, activation_quantize_type='range_abs_max', @@ -184,10 +184,10 @@ def __init__(self, value for activations and weights. If algo= 'min_max', get the min and max value for quantized activations and weights. If algo='avg', get the average value among the max values for activations. If - algo= 'hist', get the value of 'hist_perc' quantile as the threshold. + algo= 'hist', get the value of 'hist_percent' quantile as the threshold. If algo='mse', get the value which makes the quantization mse loss minimal. Default is KL. - hist_perc(float, optional): The threshold of algo 'hist' for activations. + hist_percent(float, optional): The threshold of algo 'hist' for activations. Default is 0.99999. quantizable_op_type(list[str], optional): List the type of ops that will be quantized. Default is ["conv2d", "depthwise_conv2d", @@ -196,8 +196,8 @@ def __init__(self, apply quantization to all supported quantizable op type. If set is_full_quantized as False, only apply quantization to the op type according to the input quantizable_op_type. - bias_correct(bool, optional): If set as True, use the bias correction - method of https://arxiv.org/abs/1810.05723. Default is False. + bias_correction(bool, optional): If set as True, use the bias correction + method of https://arxiv.org/abs/1810.05723. Default is False. activation_bits(int): quantization bit number for activation. weight_bits(int, optional): quantization bit number for weights. activation_quantize_type(str): quantization type for activation, @@ -289,7 +289,7 @@ def __init__(self, weight_quantize_type, self._support_weight_quantize_type) # Save input params - self._bias_correct = bias_correct + self._bias_correction = bias_correction self._executor = executor self._scope = global_scope() if scope == None else scope self._model_dir = model_dir @@ -300,7 +300,7 @@ def __init__(self, self._batch_size = batch_size self._batch_nums = batch_nums self._algo = algo - self._hist_perc = hist_perc + self._hist_percent = hist_percent self._activation_bits = activation_bits self._weight_bits = weight_bits self._activation_quantize_type = activation_quantize_type @@ -326,7 +326,7 @@ def __init__(self, self._quantized_weight_var_name = set() self._quantized_act_var_name = set() self._weight_op_pairs = {} - # The vars for alog = KL + # The vars for alog = KL or hist self._sampling_act_abs_min_max = {} self._sampling_act_histogram = {} self._sampling_data = {} @@ -390,13 +390,14 @@ def quantize(self): if self._batch_nums and batch_id >= self._batch_nums: break - if self._algo == 'avg': - for var_name in self._quantized_act_var_name: - self._quantized_threshold[var_name] = np.array(self._quantized_var_avg[var_name]).mean() _logger.info("Finish sampling stage, all batch: " + str(batch_id)) self._reset_activation_persistable() - + + if self._algo == 'avg': + for var_name in self._quantized_act_var_name: + self._quantized_threshold[var_name] = \ + np.array(self._quantized_var_avg[var_name]).mean() if self._algo in ["KL", "hist"]: self._calculate_kl_hist_threshold() if self._algo in ["KL", "abs_max", "hist", "avg", "mse"]: @@ -543,8 +544,10 @@ def _sampling(self): ''' Sample the min/max, abs_max or histogram in every iterations. ''' - if self._algo in ["avg", "abs_max"]: - self._sample_abs_max_avg() + if self._algo == "abs_max": + self._sample_abs_max() + elif self._algo == "avg": + self._sample_avg() elif self._algo == "min_max": self._sample_min_max() elif self._algo == "mse": @@ -553,7 +556,6 @@ def _sampling(self): self._sample_histogram() def _sample_mse(self): - # Only calculate abs_max value for weight for once if self._quantized_threshold == {}: for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -571,15 +573,16 @@ def _sample_mse(self): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value + + #Search for the best threshold for activations _logger.info("MSE searching stage ...") for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) var_tensor = var_tensor.flatten() abs_max_value = float(np.max(np.abs(var_tensor))) s = 0.3 - best_scale = 0.0 if var_name not in self._best_mse_loss: - self._best_mse_loss[var_name] = 100000.0 + self._best_mse_loss[var_name] = float('inf') while s <= 1.0: scale = s * abs_max_value s += 0.02 @@ -588,12 +591,9 @@ def _sample_mse(self): mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean() if mse_loss <= self._best_mse_loss[var_name]: self._best_mse_loss[var_name] = mse_loss - best_scale = scale - if best_scale > 0.0: - self._quantized_threshold[var_name] = best_scale + self._quantized_threshold[var_name] = scale - def _sample_abs_max_avg(self): - # Only calculate abs_max value for weight for once + def _sample_avg(self): if self._quantized_threshold == {}: for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -611,16 +611,39 @@ def _sample_abs_max_avg(self): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value - + + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + abs_max_value = float(np.max(np.abs(var_tensor))) + if (var_name not in self._quantized_var_avg): + self._quantized_var_avg[var_name] = [] + abs_avg_value = float(np.mean(np.max( \ + np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1)))) + self._quantized_var_avg[var_name].append(abs_avg_value) + continue + + def _sample_abs_max(self): + if self._quantized_threshold == {}: + for var_name in self._quantized_weight_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + if self._weight_quantize_type == "abs_max": + abs_max_value = float(np.max(np.abs(var_tensor))) + elif self._weight_quantize_type == "channel_wise_abs_max": + abs_max_value = [] + if self._weight_op_pairs[ + var_name] in _channelwise_quant_axis1_ops: + for i in range(var_tensor.shape[1]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[:, i])))) + else: + for i in range(var_tensor.shape[0]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[i])))) + self._quantized_threshold[var_name] = abs_max_value + for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) abs_max_value = float(np.max(np.abs(var_tensor))) - if self._algo == 'avg': - if (var_name not in self._quantized_var_avg): - self._quantized_var_avg[var_name] = [] - abs_avg_value = float(np.mean(np.max(np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1)))) - self._quantized_var_avg[var_name].append(abs_avg_value) - continue if (var_name not in self._quantized_threshold) or \ (abs_max_value > self._quantized_threshold[var_name]): self._quantized_threshold[var_name] = abs_max_value @@ -718,7 +741,7 @@ def _calculate_kl_hist_threshold(self): Calculate the KL or hist threshold of quantized variables. ''' _logger.info("Calculate {} threshold ...".format(self._algo)) - assert self._algo in ["KL", "hist"], "The algo should be KL to calculate kl threshold." + assert self._algo in ["KL", "hist"], "The algo should be KL or hist." # Abs_max threshold for weights for var_name in self._quantized_weight_var_name: @@ -751,7 +774,7 @@ def _update_program(self): ''' Use QuantizationTransformPass and AddQuantDequantPass to insert fake_quantize, fake_dequantize and fake_quant_dequant op. - Besides, save all kl threshold to the scale var node. + Besides, save all threshold to the scale var node. ''' _logger.info("Update the program ...") graph = IrGraph(core.Graph(self._program.desc), for_test=True) @@ -782,7 +805,7 @@ def _update_program(self): quantizable_op_type=minor_quantizable_op_types) add_quant_dequant_pass.apply(graph) - # save abs_max or KL threshold to scale var node + # save threshold to scale var node if self._algo in ["KL", "hist"]: scale_dict = self._quantized_var_threshold else: @@ -805,7 +828,7 @@ def _update_program(self): freeze_pass = QuantizationFreezePass( scope=self._scope, place=self._place, - bias_correct=self._bias_correct, + bias_correction=self._bias_correction, weight_bits=self._weight_bits, activation_bits=self._activation_bits, weight_quantize_type=self._weight_quantize_type, @@ -852,11 +875,11 @@ def analysis_and_save_info(op_node, out_var_name): elif self._algo in ["avg", "abs_max", "mse"]: save_info(op_node, out_var_name, self._quantized_threshold, - "out_threshold", "post_absmax") + "out_threshold", "post_" + str(self._algo)) save_info( op_node, out_var_name, self._quantized_threshold, argname_index[0] + str(argname_index[1]) + "_threshold", - "post_absmax") + "post_" + str(self._algo)) elif self._algo == "min_max": save_info(op_node, out_var_name, self._quantized_var_min, "out_min", "post_min_max") @@ -903,7 +926,7 @@ def _get_hist_scaling_factor(self, hist, hist_edges): ''' Using the hist method to get the scaling factor. ''' - threshold_rate = self._hist_perc + threshold_rate = self._hist_percent hist = hist / float(sum(hist)) hist_sum = 0 hist_index = 0 diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 3545a7122f0976..b9f8bdcb715a83 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -1070,7 +1070,7 @@ class QuantizationFreezePass(object): def __init__(self, scope, place, - bias_correct=False, + bias_correction=False, weight_bits=8, activation_bits=8, weight_quantize_type='abs_max', @@ -1086,7 +1086,7 @@ def __init__(self, scope(fluid.Scope): scope is used to get the weight tensor values. place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs. - bias_correct(bool): whether use bias correction for post-training quantization. + bias_correction(bool): whether use bias correction for post-training quantization. https://arxiv.org/abs/1810.05723. weight_bits(int): quantization bit number for weights. activation_bits(int): quantization bit number for activation. @@ -1101,7 +1101,7 @@ def __init__(self, assert place is not None, \ 'The place cannot be set None.' self._scope = scope - self._bias_correct = bias_correct + self._bias_correction = bias_correction self._place = _get_paddle_place(place) self._weight_bits = weight_bits self._activation_bits = activation_bits @@ -1159,34 +1159,9 @@ def apply(self, graph): quant_axis = 0 quantized_param_v = self._quant( param_v.copy(), scale_v, self._weight_bits, quant_axis) - if self._bias_correct == True: - eps = 1e-8 - bnt = (1 << (self._weight_bits - 1)) - 1 - if isinstance(scale_v, list): - if quant_axis == 0: - for i, s in enumerate(scale_v): - quantized_param_v[i] = quantized_param_v[i] * s / bnt - quant_bias = param_v - quantized_param_v - mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1) - std_orig = param_v.reshape(param_v.shape[0], -1).std(-1) - std_quant = quantized_param_v.reshape(quantized_param_v.shape[0], -1).std(-1) - std_bias = std_orig / (std_quant + eps) - - else: - for i, s in enumerate(scale_v): - quantized_param_v[:, i] = quantized_param_v[:, i] * s / bnt - quant_bias = param_v - quantized_param_v - mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])]) - std_orig = np.array([param_v[:, i].std() for i in range(param_v.shape[1])]) - std_quant = np.array([quantized_param_v[:, i].std() for i in range(quantized_param_v.shape[1])]) - std_bias = std_orig / (std_quant + eps) - - if mean_bias.ndim == 1: - std_bias = np.resize(std_bias, param_v.shape) - mean_bias = np.resize(mean_bias, param_v.shape) - - quantized_param_v = (mean_bias + quantized_param_v) * std_bias - quantized_param_v = self._quant(quantized_param_v, scale_v, self._weight_bits, quant_axis) + if self._bias_correction == True: + quantized_param_v = self._bias_correction_w( + param_v, quantized_param_v, scale_v, quant_axis) self._restore_var(input_arg_name, quantized_param_v) self._remove_fake_quant_and_dequant_op(graph, op_node) @@ -1397,7 +1372,6 @@ def _is_float(self, v): def _quant(self, x, scale, num_bits, quant_axis): assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.' bnt = (1 << (num_bits - 1)) - 1 - eps = 1e-8 def _clip(x, scale): x[x > scale] = scale x[x < -scale] = -scale @@ -1406,7 +1380,7 @@ def _clip(x, scale): if isinstance(scale, list): for i, s in enumerate(scale): if s == 0.0: - s = eps + s = 1e-8 if quant_axis == 0: x[i] = _clip(x[i], s) x[i] = np.round(x[i] / s * bnt) @@ -1418,6 +1392,42 @@ def _clip(x, scale): x = np.round(x / scale * bnt) return x + def _bias_correction_w(self, x, x_quant, scale_v, quant_axis): + ''' + Bias correction for weight + ''' + eps = 1e-8 + bnt = (1 << (self._weight_bits - 1)) - 1 + x_dequant = x_quant.copy() + if isinstance(scale_v, list): + if quant_axis == 0: + for i, s in enumerate(scale_v): + x_dequant[i] = x_dequant[i] * s / bnt + quant_bias = x - x_dequant + mean_bias = quant_bias.reshape(quant_bias.shape[0], -1).mean(-1) + std_orig = x.reshape(x.shape[0], -1).std(-1) + std_quant = x_dequant.reshape(x_dequant.shape[0], -1).std(-1) + std_bias = std_orig / (std_quant + eps) + else: + for i, s in enumerate(scale_v): + x_dequant[:, i] = x_quant[:, i] * s / bnt + quant_bias = x - x_dequant + mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])]) + std_orig = np.array([x[:, i].std() for i in range(x.shape[1])]) + std_quant = np.array([x_dequant[:, i].std() for i in range(x_dequant.shape[1])]) + std_bias = std_orig / (std_quant + eps) + else: + x_dequant = x_quant * scale_v / bnt + mean_bias = (x - x_dequant).mean() + std_bias = x.std() / (x_dequant.std() + eps) + if mean_bias.ndim == 1: + std_bias = np.resize(std_bias, x.shape) + mean_bias = np.resize(mean_bias, x.shape) + + x_dequant = (mean_bias + x_dequant) * std_bias + quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits, quant_axis) + return quantized_param_v + class ConvertToInt8Pass(object): def __init__(self, scope, place, quantizable_op_type=None): diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py index 3ea1c84f976a85..da5c5d6dc9441b 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -204,6 +204,66 @@ def test_post_training_kl(self): quant_iterations) +class TestPostTraininghistForMnist(TestPostTrainingQuantization): + def test_post_training_hist(self): + model_name = "mnist_model" + data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" + data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" + algo = "hist" + quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.01 + batch_size = 10 + infer_iterations = 50 + quant_iterations = 5 + self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold, batch_size, infer_iterations, + quant_iterations) + + +class TestPostTrainingmseForMnist(TestPostTrainingQuantization): + def test_post_training_mse(self): + model_name = "mnist_model" + data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" + data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" + algo = "mse" + quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.01 + batch_size = 10 + infer_iterations = 50 + quant_iterations = 5 + self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold, batch_size, infer_iterations, + quant_iterations) + + +class TestPostTrainingavgForMnist(TestPostTrainingQuantization): + def test_post_training_avg(self): + model_name = "mnist_model" + data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" + data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" + algo = "avg" + quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.01 + batch_size = 10 + infer_iterations = 50 + quant_iterations = 5 + self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold, batch_size, infer_iterations, + quant_iterations) + + class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): def test_post_training_abs_max(self): model_name = "mnist_model" diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 18389d9433b9a5..2614804a3cc752 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -327,6 +327,68 @@ def test_post_training_kl_mobilenetv1(self): is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) +class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_avg_mobilenetv1(self): + model = "MobileNet-V1" + algo = "avg" + data_urls = [ + 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + ] + data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + quantizable_op_type = [ + "conv2d", + "depthwise_conv2d", + "mul", + ] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.025 + self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold) + +class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_hist_mobilenetv1(self): + model = "MobileNet-V1" + algo = "hist" + data_urls = [ + 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + ] + data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + quantizable_op_type = [ + "conv2d", + "depthwise_conv2d", + "mul", + ] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.025 + self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold) + +class TestPostTrainingmseForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_mse_mobilenetv1(self): + model = "MobileNet-V1" + algo = "mse" + data_urls = [ + 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + ] + data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + quantizable_op_type = [ + "conv2d", + "depthwise_conv2d", + "mul", + ] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.025 + self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold) class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): def test_post_training_abs_max_mobilenetv1(self): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py index 768a9ba7cfc3e7..790213d4b02924 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -257,6 +257,7 @@ def freeze_graph(self, use_cuda, seed, activation_quant_type, + bias_correction=False, weight_quant_type='abs_max', for_ci=True, quant_skip_pattern='skip_quant'): @@ -355,7 +356,8 @@ def build_program(main, startup, is_test): # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( - scope=scope, place=place, weight_quantize_type=weight_quant_type) + scope=scope, place=place, bias_correction=bias_correction, \ + weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) if not for_ci: marked_nodes = set() @@ -472,6 +474,13 @@ def test_freeze_graph_cpu_dynamic(self): def test_freeze_graph_cuda_static(self): if fluid.core.is_compiled_with_cuda(): with fluid.unique_name.guard(): + self.freeze_graph( + True, + seed=1, + activation_quant_type='range_abs_max', + bias_correction=True, + weight_quant_type='abs_max', + for_ci=True) self.freeze_graph( True, seed=1, @@ -496,6 +505,13 @@ def test_freeze_graph_cuda_static(self): activation_quant_type='moving_average_abs_max', weight_quant_type='channel_wise_abs_max', for_ci=True) + self.freeze_graph( + True, + seed=1, + activation_quant_type='moving_average_abs_max', + bias_correction=True, + weight_quant_type='channel_wise_abs_max', + for_ci=True) def test_freeze_graph_cpu_static(self): with fluid.unique_name.guard(): From 724e56c35d28e2355922a786adc8ee16746d7ceb Mon Sep 17 00:00:00 2001 From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com> Date: Tue, 13 Apr 2021 17:54:43 +0000 Subject: [PATCH 3/3] code style changed --- .../post_training_quantization.py | 41 +++++++++---------- .../slim/quantization/quantization_pass.py | 11 +++-- ..._post_training_quantization_mobilenetv1.py | 22 +--------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 5ccde7b4b604e1..bc2e2dc9b6562c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -265,7 +265,9 @@ def __init__(self, 'range_abs_max', 'moving_average_abs_max', 'abs_max' ] self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max'] - self._support_algo_type = ['KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max'] + self._support_algo_type = [ + 'KL', 'hist', 'avg', 'mse', 'abs_max', 'min_max' + ] self._dynamic_quantize_op_type = ['lstm'] self._support_quantize_op_type = \ list(set(QuantizationTransformPass._supported_quantizable_op_type + @@ -389,11 +391,8 @@ def quantize(self): batch_id += 1 if self._batch_nums and batch_id >= self._batch_nums: break - _logger.info("Finish sampling stage, all batch: " + str(batch_id)) - self._reset_activation_persistable() - if self._algo == 'avg': for var_name in self._quantized_act_var_name: self._quantized_threshold[var_name] = \ @@ -573,8 +572,6 @@ def _sample_mse(self): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value - - #Search for the best threshold for activations _logger.info("MSE searching stage ...") for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -586,13 +583,15 @@ def _sample_mse(self): while s <= 1.0: scale = s * abs_max_value s += 0.02 - bins = 2 ** (self._activation_bits -1) - 1 - quant_dequant_var = np.round(np.clip(var_tensor, 0.0, scale) / scale * bins) / bins * scale - mse_loss = ((var_tensor - quant_dequant_var) ** 2).mean() + bins = 2**(self._activation_bits - 1) - 1 + quant_dequant_var = np.round( + np.clip(var_tensor, 0.0, scale) / scale * + bins) / bins * scale + mse_loss = ((var_tensor - quant_dequant_var)**2).mean() if mse_loss <= self._best_mse_loss[var_name]: self._best_mse_loss[var_name] = mse_loss self._quantized_threshold[var_name] = scale - + def _sample_avg(self): if self._quantized_threshold == {}: for var_name in self._quantized_weight_var_name: @@ -611,7 +610,7 @@ def _sample_avg(self): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value - + for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) abs_max_value = float(np.max(np.abs(var_tensor))) @@ -621,8 +620,8 @@ def _sample_avg(self): np.abs(var_tensor.reshape(var_tensor.shape[0], -1)), axis=(1)))) self._quantized_var_avg[var_name].append(abs_avg_value) continue - - def _sample_abs_max(self): + + def _sample_abs_max(self): if self._quantized_threshold == {}: for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -640,7 +639,7 @@ def _sample_abs_max(self): abs_max_value.append( float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value - + for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) abs_max_value = float(np.max(np.abs(var_tensor))) @@ -856,18 +855,16 @@ def analysis_and_save_info(op_node, out_var_name): out_var_name + " is not the output of the op" if self._algo == "KL": # For compatibility, we save output threshold by two methods. - save_info(op_node, out_var_name, - self._quantized_var_threshold, "out_threshold", - "post_kl") + save_info(op_node, out_var_name, self._quantized_var_threshold, + "out_threshold", "post_kl") save_info( op_node, out_var_name, self._quantized_var_threshold, argname_index[0] + str(argname_index[1]) + "_threshold", "post_kl") elif self._algo == "hist": # For compatibility, we save output threshold by two methods. - save_info(op_node, out_var_name, - self._quantized_var_threshold, "out_threshold", - "post_hist") + save_info(op_node, out_var_name, self._quantized_var_threshold, + "out_threshold", "post_hist") save_info( op_node, out_var_name, self._quantized_var_threshold, argname_index[0] + str(argname_index[1]) + "_threshold", @@ -926,7 +923,7 @@ def _get_hist_scaling_factor(self, hist, hist_edges): ''' Using the hist method to get the scaling factor. ''' - threshold_rate = self._hist_percent + threshold_rate = self._hist_percent hist = hist / float(sum(hist)) hist_sum = 0 hist_index = 0 @@ -942,7 +939,7 @@ def _get_kl_scaling_factor(self, hist, hist_edeges): ''' Using the KL-divergenc method to get the more precise scaling factor. ''' - num_quantized_bins = 2 ** (self._activation_bits - 1) - 1 + num_quantized_bins = 2**(self._activation_bits - 1) - 1 ending_iter = self._histogram_bins - 1 starting_iter = int(ending_iter * 0.7) bin_width = hist_edeges[1] - hist_edeges[0] diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index b9f8bdcb715a83..79aad8c8bc53d3 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -1372,6 +1372,7 @@ def _is_float(self, v): def _quant(self, x, scale, num_bits, quant_axis): assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.' bnt = (1 << (num_bits - 1)) - 1 + def _clip(x, scale): x[x > scale] = scale x[x < -scale] = -scale @@ -1412,9 +1413,12 @@ def _bias_correction_w(self, x, x_quant, scale_v, quant_axis): for i, s in enumerate(scale_v): x_dequant[:, i] = x_quant[:, i] * s / bnt quant_bias = x - x_dequant - mean_bias = np.array([quant_bias[:, i].mean() for i in range(quant_bias.shape[1])]) + mean_bias = np.array([ + quant_bias[:, i].mean() for i in range(quant_bias.shape[1]) + ]) std_orig = np.array([x[:, i].std() for i in range(x.shape[1])]) - std_quant = np.array([x_dequant[:, i].std() for i in range(x_dequant.shape[1])]) + std_quant = np.array( + [x_dequant[:, i].std() for i in range(x_dequant.shape[1])]) std_bias = std_orig / (std_quant + eps) else: x_dequant = x_quant * scale_v / bnt @@ -1425,7 +1429,8 @@ def _bias_correction_w(self, x, x_quant, scale_v, quant_axis): mean_bias = np.resize(mean_bias, x.shape) x_dequant = (mean_bias + x_dequant) * std_bias - quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits, quant_axis) + quantized_param_v = self._quant(x_dequant, scale_v, self._weight_bits, + quant_axis) return quantized_param_v diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 2614804a3cc752..71611048610060 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -327,6 +327,7 @@ def test_post_training_kl_mobilenetv1(self): is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) + class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): def test_post_training_avg_mobilenetv1(self): model = "MobileNet-V1" @@ -348,6 +349,7 @@ def test_post_training_avg_mobilenetv1(self): is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) + class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): def test_post_training_hist_mobilenetv1(self): model = "MobileNet-V1" @@ -369,26 +371,6 @@ def test_post_training_hist_mobilenetv1(self): is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) -class TestPostTrainingmseForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_mse_mobilenetv1(self): - model = "MobileNet-V1" - algo = "mse" - data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' - ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] - quantizable_op_type = [ - "conv2d", - "depthwise_conv2d", - "mul", - ] - is_full_quantize = False - is_use_cache_file = False - is_optimize_model = True - diff_threshold = 0.025 - self.run_test(model, algo, data_urls, data_md5s, quantizable_op_type, - is_full_quantize, is_use_cache_file, is_optimize_model, - diff_threshold) class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): def test_post_training_abs_max_mobilenetv1(self):