From f4f31f2396a7e6b168ece6527187da2be8911442 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 2 Aug 2021 08:35:52 +0000 Subject: [PATCH 01/27] support quantization of conv2d_transpose --- .../slim/quantization/imperative/qat.py | 62 ++++++++----- .../slim/quantization/imperative/utils.py | 18 +++- python/paddle/nn/quant/quant_layers.py | 87 +++++++++++++++++++ 3 files changed, 140 insertions(+), 27 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index b8c0e47e9bbc26..32a3ebfe047030 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -42,17 +42,18 @@ class ImperativeQuantAware(object): Applying quantization aware training (QAT) to the dgraph model. """ - def __init__(self, - quantizable_layer_type=['Conv2D', 'Linear'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__( + self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantAware. @@ -232,17 +233,18 @@ class ImperativeQuantizeInputs(object): logic both for activation inputs and weight inputs. """ - def __init__(self, - quantizable_layer_type=['Conv2D', 'Linear'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__( + self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantizeInputs. @@ -303,6 +305,18 @@ def __init__(self, } def apply(self, model): + """ + Quantize the weights and activations to calculate for specific + layers in the dygraph model. + + Args: + model(fluid.dygraph.Layer): The target model which would + calculate the input quantization scale. + + Returns: + None + """ + assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." @@ -544,7 +558,9 @@ def _is_skip_quant_op(self, block, in_op): 1. the type of input op should be conv2d, depthwise_conv2d or matmul 2. the previous ops of the input op are not fake_quantize_dequantize ops """ - target_op_types = ["conv2d", "depthwise_conv2d", "matmul"] + target_op_types = [ + "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose" + ] if in_op.type not in target_op_types: return False diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index a9d52c5a87ad36..5a98ac80549f18 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -24,6 +24,7 @@ from ..quantization_pass import _get_input_name_index layer_name_map = { + 'Conv2DTranspose': paddle.nn.Conv2DTranspose, 'Conv2D': paddle.nn.Conv2D, 'Linear': paddle.nn.Linear, 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, @@ -47,7 +48,9 @@ # Apply fake quant for the inputs of these layers # TODO (jc): support paddle.nn.Conv2DTranspose -fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear] +fake_quant_input_layers = [ + paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.Conv2DTranspose +] # Apply fake quant for the output of these layers # TODO(jc): fix the problem of adding duplicate fake_quant ops @@ -65,7 +68,8 @@ ] fake_quant_wrap_layers = [ - quant_layers.QuantizedConv2D, quant_layers.QuantizedLinear + quant_layers.QuantizedConv2D, quant_layers.QuantizedLinear, + quant_layers.QuantizedConv2DTranspose ] # The weight format of these layers is Cin * Cout * H * W @@ -84,9 +88,9 @@ def load_variable_data(scope, var_name): - ''' + """ Load variable value from scope - ''' + """ var_node = scope.find_var(var_name) assert var_node is not None, \ "Can not find " + var_name + " in the scope." @@ -120,6 +124,12 @@ def find_parent_layer_and_sub_name(model, name): the sub_name of the layer. For example, if name is 'block_1/convbn_1/conv_1', the parent layer is 'block_1/convbn_1' and the sub_name is `conv_1`. + Args: + model(fluid.dygraph.Layer): the model to be quantized. + name(string): the name of a layer + + Returns: + parent_layer, subname """ assert isinstance(model, paddle.nn.Layer), \ "The model must be the instance of paddle.nn.Layer." diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index 5573683ebd0458..cc98f73174b4b3 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -31,6 +31,7 @@ 'FakeQuantMovingAverageAbsMax', 'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', + 'QuantizedConv2DTranspose', 'QuantizedLinear', 'MovingAverageAbsMaxScale', 'MAOutputScaleLayer', @@ -481,6 +482,92 @@ def forward(self, input): data_format=self._data_format) +class QuantizedConv2DTranspose(layers.Layer): + """ + The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose. + The only difference is that its inputs are all fake quantized. + """ + + def __init__(self, + layer, + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_quantize_type='abs_max', + activation_quantize_type='abs_max', + weight_pre_layer=None, + act_pre_layer=None, + weight_quant_layer=None, + act_quant_layer=None): + super(QuantizedConv2DTranspose, self).__init__() + # For Conv2DTranspose + self._groups = getattr(layer, '_groups') + self._stride = getattr(layer, '_stride') + self._padding = getattr(layer, '_padding') + self._output_padding = getattr(layer, 'output_padding') + self._dilation = getattr(layer, '_dilation') + self._data_format = getattr(layer, '_data_format') + self.weight = getattr(layer, 'weight') + self.bias = getattr(layer, 'bias') + # For FakeQuant + self._conv2d_transpose_quant_axis = 1 + if weight_quant_layer is not None: + self._fake_quant_weight = weight_quant_layer() + else: + self._fake_quant_weight = _get_fake_quant_type( + weight_quantize_type, + name=self.weight.name, + moving_rate=moving_rate, + quant_bits=weight_bits, + dtype=self._dtype, + quant_on_weight=True, + channel_num=self.weight.shape[ + self._conv2d_transpose_quant_axis], + quant_axis=self._conv2d_transpose_quant_axis) + if act_quant_layer is not None: + self._fake_quant_input = act_quant_layer() + else: + self._fake_quant_input = _get_fake_quant_type( + activation_quantize_type, + name=layer.full_name(), + moving_rate=moving_rate, + quant_bits=activation_bits, + dtype=self._dtype, + quant_on_weight=False) + + self._act_preprocess = act_pre_layer( + ) if act_pre_layer is not None else None + self._weight_preprocess = weight_pre_layer( + ) if weight_pre_layer is not None else None + + def forward(self, input, output_size=None): + if self._act_preprocess is not None: + input = self._act_preprocess(input) + quant_input = self._fake_quant_input(input) + + weight = self.weight + if self._weight_preprocess is not None: + weight = self._weight_preprocess(self.weight) + quant_weight = self._fake_quant_weight(weight) + + if output_size is None: + output_padding = self._output_padding + else: + output_padding = 0 + + return F.conv2d_transpose( + quant_input, + quant_weight, + bias=self.bias, + padding=self._padding, + output_padding=output_padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + output_size=output_size, + data_format=self._data_format) + + class QuantizedLinear(layers.Layer): """ The computational logic of QuantizedLinear is the same with Linear. From ac21a6041534a459c3dce4a0a75f375cb50e6e17 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Thu, 5 Aug 2021 11:17:43 +0000 Subject: [PATCH 02/27] fix quantization bugs --- .../contrib/slim/quantization/post_training_quantization.py | 2 ++ .../fluid/contrib/slim/quantization/quantization_pass.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 5996e752c8c22d..5272d9f59903d7 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -578,6 +578,8 @@ def _sample_mse(self): var_tensor = _load_variable_data(self._scope, var_name) var_tensor = var_tensor.flatten() abs_max_value = float(np.max(np.abs(var_tensor))) + if abs_max_value == 0.0: + abs_max_value = 1e-8 s = 0.3 if var_name not in self._best_mse_loss: self._best_mse_loss[var_name] = float('inf') diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index b3b12a477e2a0a..857486b3fc46cc 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -1312,6 +1312,8 @@ def _insert_post_dequant_op(self, graph, op_node): assert self._is_float( scale_v), 'The scale of parameter %s is not a float.' % ( original_var_name) + if scale_v == 0.0: + scale_v = 1e-8 max_range *= param_range / scale_v else: max_range *= act_range @@ -1413,6 +1415,8 @@ def _clip(x, scale): x[:, i] = _clip(x[:, i], s) x[:, i] = np.round(x[:, i] / s * bnt) else: + if scale == 0.0: + scale = 1e-8 x = _clip(x, scale) x = np.round(x / scale * bnt) return x From 350048ef509d720d31782e607f571e8e0719e4f6 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sun, 8 Aug 2021 16:27:40 +0800 Subject: [PATCH 03/27] Update post_training_quantization.py --- .../contrib/slim/quantization/post_training_quantization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 5272d9f59903d7..5996e752c8c22d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -578,8 +578,6 @@ def _sample_mse(self): var_tensor = _load_variable_data(self._scope, var_name) var_tensor = var_tensor.flatten() abs_max_value = float(np.max(np.abs(var_tensor))) - if abs_max_value == 0.0: - abs_max_value = 1e-8 s = 0.3 if var_name not in self._best_mse_loss: self._best_mse_loss[var_name] = float('inf') From cdfa3fe87f6649a5083ae9c4a8475ae5d87ac98d Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sun, 8 Aug 2021 16:28:51 +0800 Subject: [PATCH 04/27] Update quantization_pass.py --- .../fluid/contrib/slim/quantization/quantization_pass.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 857486b3fc46cc..b3b12a477e2a0a 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -1312,8 +1312,6 @@ def _insert_post_dequant_op(self, graph, op_node): assert self._is_float( scale_v), 'The scale of parameter %s is not a float.' % ( original_var_name) - if scale_v == 0.0: - scale_v = 1e-8 max_range *= param_range / scale_v else: max_range *= act_range @@ -1415,8 +1413,6 @@ def _clip(x, scale): x[:, i] = _clip(x[:, i], s) x[:, i] = np.round(x[:, i] / s * bnt) else: - if scale == 0.0: - scale = 1e-8 x = _clip(x, scale) x = np.round(x / scale * bnt) return x From 4b047da84ab21cbd0748aeeff1c8bc0a1dd4d376 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 9 Aug 2021 08:28:11 +0000 Subject: [PATCH 05/27] update docs --- .../paddle/fluid/contrib/slim/quantization/imperative/qat.py | 4 ++-- .../fluid/contrib/slim/quantization/imperative/utils.py | 3 +-- python/paddle/nn/quant/quant_layers.py | 5 +++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 32a3ebfe047030..aa3269b0f2541d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -307,10 +307,10 @@ def __init__( def apply(self, model): """ Quantize the weights and activations to calculate for specific - layers in the dygraph model. + layers. Args: - model(fluid.dygraph.Layer): The target model which would + model(paddle.nn.Layer): The target model which would calculate the input quantization scale. Returns: diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 5a98ac80549f18..009ce372b4f29c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -47,7 +47,6 @@ } # Apply fake quant for the inputs of these layers -# TODO (jc): support paddle.nn.Conv2DTranspose fake_quant_input_layers = [ paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.Conv2DTranspose ] @@ -125,7 +124,7 @@ def find_parent_layer_and_sub_name(model, name): For example, if name is 'block_1/convbn_1/conv_1', the parent layer is 'block_1/convbn_1' and the sub_name is `conv_1`. Args: - model(fluid.dygraph.Layer): the model to be quantized. + model(paddle.nn.Layer): the model to be quantized. name(string): the name of a layer Returns: diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index cc98f73174b4b3..9d07febd955481 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -499,6 +499,11 @@ def __init__(self, act_pre_layer=None, weight_quant_layer=None, act_quant_layer=None): + r""" + Constructor. + + The arguments are the same as ImperativeQuantAware. + """ super(QuantizedConv2DTranspose, self).__init__() # For Conv2DTranspose self._groups = getattr(layer, '_groups') From e5ea4eb4c2430653b47ebaa53c7c77f5bb3fe976 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 9 Aug 2021 09:12:13 +0000 Subject: [PATCH 06/27] add tests for quantized_conv2d_transpose --- .../fluid/contrib/slim/tests/test_imperative_qat.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 14fa291ee077c6..2dc118aa7e1a9f 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -28,10 +28,10 @@ from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.dygraph.container import Sequential -from paddle.nn import Linear, Conv2D, Softmax +from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.nn.quant.quant_layers import QuantizedConv2D +from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose from imperative_test_utils import fix_model_dict, ImperativeLenet @@ -74,6 +74,11 @@ def test_qat(self): quant_conv1 = QuantizedConv2D(conv1) data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) + + conv_transpose = Conv2DTranspose(4, 6, (3, 3)) + quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) + x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) + quant_conv_transpose(x_var) seed = 1 np.random.seed(seed) From 3231853193666ca376e56745d4d98a0912b9f739 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 9 Aug 2021 10:17:21 +0000 Subject: [PATCH 07/27] update codestyle --- .../paddle/fluid/contrib/slim/tests/test_imperative_qat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 2dc118aa7e1a9f..bec486185ba171 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -74,10 +74,11 @@ def test_qat(self): quant_conv1 = QuantizedConv2D(conv1) data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) - + conv_transpose = Conv2DTranspose(4, 6, (3, 3)) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) - x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) + x_var = paddle.uniform( + (2, 4, 8, 8), dtype='float32', min=-1., max=1.) quant_conv_transpose(x_var) seed = 1 From da48df7ea923cbf86c03561dc57a312a578c9ba8 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 9 Aug 2021 10:20:50 +0000 Subject: [PATCH 08/27] update docs --- .../paddle/fluid/contrib/slim/quantization/imperative/qat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index aa3269b0f2541d..8ebad9974ace4f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -213,7 +213,7 @@ def quantize(self, model): the out_scale value of outputs would be calculated. Args: - model(fluid.dygraph.Layer): the model to be quantized. + model(paddle.nn.Layer): the model to be quantized. Returns: None """ @@ -368,7 +368,7 @@ def apply(self, model): output scales for specific layers in the dygraph model. Args: - model(fluid.dygraph.Layer): The target model which would be + model(paddle.nn.Layer): The target model which would be calculate the output quantization scale. Returns: From 43976beaba3c1381e03e955cb1a50f5f442efdb0 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sat, 14 Aug 2021 12:11:47 +0000 Subject: [PATCH 09/27] update tests and conv2dtranspose layer --- .../contrib/slim/tests/test_imperative_qat.py | 2 +- .../tests/test_imperative_qat_user_defined.py | 20 +++++++++++++++++++ python/paddle/nn/quant/quant_layers.py | 15 ++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index bec486185ba171..677ccb52e242cf 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -78,7 +78,7 @@ def test_qat(self): conv_transpose = Conv2DTranspose(4, 6, (3, 3)) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) x_var = paddle.uniform( - (2, 4, 8, 8), dtype='float32', min=-1., max=1.) + (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) quant_conv_transpose(x_var) seed = 1 diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py index 621213beb31cd7..186789ff03bad0 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py @@ -28,6 +28,7 @@ from paddle.fluid.dygraph import Conv2D from paddle.fluid.dygraph import Pool2D from paddle.fluid.dygraph import Linear +from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose from paddle.fluid.log_helper import get_logger os.environ["CPU_NUM"] = "1" @@ -100,6 +101,19 @@ def dequantize(x, lower_bound, delta, interval): return x +class ModelForConv2dT(nn.Layer): + def __init__(self, num_classes=10): + super(ModelForConv2dT, self).__init__() + self.features = nn.Conv2DTranspose(4, 6, (3, 3)) + self.fc = Linear(input_dim=600, output_dim=num_classes) + + def forward(self, inputs): + x = self.features(inputs) + x = paddle.flatten(x, 1) + x = self.fc(x) + return x + + class ImperativeLenet(paddle.nn.Layer): def __init__(self, num_classes=10, classifier_activation='softmax'): super(ImperativeLenet, self).__init__() @@ -168,6 +182,12 @@ def test_quant_aware_training(self): imperative_qat.quantize(lenet) adam = Adam(learning_rate=0.001, parameters=lenet.parameters()) dynamic_loss_rec = [] + #for CI coverage + conv_transpose = ModelForConv2dT() + imperative_qat.quantize(conv_transpose) + x_var = paddle.uniform( + (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) + conv_transpose(x_var) def train(model): adam = Adam(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index 9d07febd955481..040b04f5e7bf1e 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -486,6 +486,21 @@ class QuantizedConv2DTranspose(layers.Layer): """ The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose. The only difference is that its inputs are all fake quantized. + + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose + x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) + conv = nn.Conv2DTranspose(4, 6, (3, 3)) + conv_quantized = QuantizedConv2DTranspose(conv) + y_quantized = conv_quantized(x_var) + y_var = conv(x_var) + y_quantized_np = y_quantized.numpy() + y_np = y_var.numpy() + print(y_np.shape, y_quantized_np.shape) + # (2, 6, 10, 10), (2, 6, 10, 10) """ def __init__(self, From 8ec36b6fe5ffbf6a3c6d253652cf2e1031804b59 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sat, 14 Aug 2021 18:12:44 +0000 Subject: [PATCH 10/27] update quant tests --- .../contrib/slim/tests/test_imperative_qat_user_defined.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py index 186789ff03bad0..270e8ee566ab57 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py @@ -106,7 +106,7 @@ def __init__(self, num_classes=10): super(ModelForConv2dT, self).__init__() self.features = nn.Conv2DTranspose(4, 6, (3, 3)) self.fc = Linear(input_dim=600, output_dim=num_classes) - + def forward(self, inputs): x = self.features(inputs) x = paddle.flatten(x, 1) @@ -183,10 +183,9 @@ def test_quant_aware_training(self): adam = Adam(learning_rate=0.001, parameters=lenet.parameters()) dynamic_loss_rec = [] #for CI coverage - conv_transpose = ModelForConv2dT() + conv_transpose = ModelForConv2dT() imperative_qat.quantize(conv_transpose) - x_var = paddle.uniform( - (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) + x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) conv_transpose(x_var) def train(model): From fc74ab00f6333ef6b8cc5c946efa26c2c449d731 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 16 Aug 2021 04:35:08 +0000 Subject: [PATCH 11/27] update sampcd_processor for tests --- tools/sampcd_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index 3ec12c11a7045a..d8cb70c9dd107b 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -440,6 +440,7 @@ def get_filenames(full_test=False): ''' global whl_error import paddle + import paddle.fluid.contrib.slim.quantization whl_error = [] if full_test: get_full_api_from_pr_spec() From ccd16757c122d20b4a28a6622bf2ef86bb1333a5 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Mon, 16 Aug 2021 05:58:51 +0000 Subject: [PATCH 12/27] update code examples --- .../slim/quantization/imperative/qat.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 8ebad9974ace4f..6208b43c9e9e48 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -216,6 +216,41 @@ def quantize(self, model): model(paddle.nn.Layer): the model to be quantized. Returns: None + + Examples: + .. code-block:: python + + import paddle + from paddle.fluid.contrib.slim.quantization \ + import ImperativeQuantAware + + class ImperativeModel(paddle.nn.Layer): + def __init__(self): + super(ImperativeModel, self).__init__() + # self.linear_0 would skip the quantization. + self.linear_0 = paddle.nn.Linear(784, 400) + self.linear_0.skip_quant = True + + # self.linear_1 would not skip the quantization. + self.linear_1 = paddle.nn.Linear(400, 10) + self.linear_1.skip_quant = False + + def forward(self, inputs): + x = self.linear_0(inputs) + x = self.linear_1(inputs) + return x + + model = ImperativeModel() + imperative_qat = ImperativeQuantAware( + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max') + + # Add the fake quant logical. + # The original model will be rewrite. + # + # There is only one Layer(self.linear1) would be added the + # fake quant logical. + imperative_qat.quantize(model) """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." From ff95292cc27f351d2a603078a36c7d2c663e8258 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Thu, 19 Aug 2021 06:06:25 +0000 Subject: [PATCH 13/27] support fuse for eval --- .../quantization/imperative/fuse_utils.py | 158 ++++++++++++++++++ .../slim/quantization/imperative/ptq.py | 10 +- 2 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py new file mode 100644 index 00000000000000..9b8b0cc8c79ee7 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -0,0 +1,158 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import paddle +import paddle.nn as nn +from . import utils + + +class Identity(nn.Layer): + '''a layer to replace bn or relu layers''' + + def __init__(self, *args, **kwargs): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +def fuse_layers(model, layers_to_fuse, inplace=False): + '''fuse layers in layers_to_fuse''' + if inplace == False: + model = copy.deepcopy(model) + for layers in layers_to_fuse: + _fuse_layers(model, layers) + return model + + +def _fuse_layers(model, layers_list): + '''fuse all the layers in layers_list''' + lay_list = [] + for layer_name in layers_list: + parent_layer, sub_name = utils.find_parent_layer_and_sub_name( + model, layer_name) + lay_list.append(getattr(parent_layer, sub_name)) + new_layers = fuse_func(lay_list) + for i, item in enumerate(layers_list): + parent_layer, sub_name = utils.find_parent_layer_and_sub_name(model, + item) + setattr(parent_layer, sub_name, new_layers[i]) + + +def fuse_func(lay_list): + '''choose the fuser method and fuse layers''' + types = tuple(type(m) for m in lay_list) + fuser_method = layer_list_to_fuse_method.get(types, None) + new_layers = [None] * len(lay_list) + fused = fuser_method(*lay_list) + for handle_id, pre_hook_fn in lay_list[0]._forward_pre_hooks.items(): + fused.register_forward_pre_hook(pre_hook_fn) + del lay_list[0]._forward_pre_hooks[handle_id] + for handle_id, hook_fn in lay_list[-1]._forward_post_hooks.items(): + fused.register_forward_post_hook(hook_fn) + del lay_list[-1]._forward_post_hooks[handle_id] + new_layers[0] = fused + for i in range(1, len(lay_list)): + identity = Identity() + identity.training = lay_list[0].training + new_layers[i] = identity + return new_layers + + +def fuse_conv_bn(conv, bn): + '''fuse conv and bn for train or eval''' + assert(conv.training == bn.training),\ + "Conv and BN both must be in the same mode (train or eval)." + if conv.training: + assert bn.num_channels == conv.out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' + if fused_module_class is not None: + raise NotImplementedError + else: + return fuse_conv_bn_eval(conv, bn) + + +def fuse_conv_bn_eval(conv, bn): + '''fuse conv and bn for eval''' + assert (not (conv.training or bn.training)), "Fusion only for eval!" + fused_conv = copy.deepcopy(conv) + + fused_weight, fused_bias = fuse_conv_bn_weights( + fused_conv.weight, fused_conv.bias, bn._mean, bn._variance, bn._epsilon, + bn.weight, bn.bias) + fused_conv.weight.set_value(fused_weight) + if fused_conv.bias is None: + fused_conv.bias = paddle.create_parameter( + shape=[fused_conv._out_channels], is_bias=True, dtype='float32') + fused_conv.bias.set_value(fused_bias) + return fused_conv + + +def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): + '''fuse weights and bias of conv and bn''' + if conv_b is None: + conv_b = paddle.zeros_like(bn_rm) + if bn_w is None: + bn_w = paddle.ones_like(bn_rm) + if bn_b is None: + bn_b = paddle.zeros_like(bn_rm) + bn_var_rsqrt = paddle.rsqrt(bn_rv + bn_eps) + conv_w = conv_w * + (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1)) + conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b + return conv_w, conv_b + + +def fuse_linear_bn(linear, bn): + '''fuse linear and bn''' + assert (linear.training == bn.training),\ + "Conv and BN both must be in the same mode (train or eval)." + if linear.training: + assert bn.num_channels == conv.out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' + if fused_module_class is not None: + raise NotImplementedError + else: + return fuse_linear_bn_eval(linear, bn) + + +def fuse_linear_bn_eval(linear, bn): + '''fuse linear and bn for eval''' + assert(not (linear.training or bn.training)), "Fusion only for eval!" + fused_linear = copy.deepcopy(linear) + + fused_weight, fused_bias = fuse_linear_bn_weights( + fused_linear.weight, fused_linear.bias, bn._mean, bn._variance, bn._epsilon, + bn.weight, bn.bias) + fused_linear.weight.set_value(fused_weight) + if fused_linear.bias is None: + fused_linear.bias = paddle.create_parameter( + shape=[fused_linear.weight.shape[1]], is_bias=True, dtype='float32') + fused_linear.bias.set_value(fused_bias) + return fused_linear + + +def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): + '''fuse weights and bias of linear and bn''' + if linear_b is None: + linear_b = torch.zeros_like(bn_rm) + bn_scale = bn_w * torch.rsqrt(bn_rv + bn_eps) + fused_w = linear_w * bn_scale.unsqueeze(-1) + fused_b = (linear_b - bn_rm) * bn_scale + bn_b + return fused_w, fused_b + + +layer_list_to_fuse_method = { +(nn.Conv2D, nn.BatchNorm2D): fuse_conv_bn, +(nn.Linear, nn.BatchNorm): fuse_linear_bn, +} diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py index 3a536ab1d20376..e70b9099dc455a 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py @@ -22,6 +22,7 @@ from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX +from . import fuse_utils from . import utils from . import ptq_hooks from . import ptq_config @@ -55,7 +56,7 @@ def __init__(self, quant_config=ptq_config.default_ptq_config): self._quant_config = quant_config - def quantize(self, model, inplace=False): + def quantize(self, model, inplace=False, fuse=False, fuse_list=None): """ Add quant config and hook to the target layer. @@ -63,6 +64,10 @@ def quantize(self, model, inplace=False): model(paddle.nn.Layer): The model to be quantized. inplace(bool): Whether apply quantization to the input model. Default: False. + fuse(bool): Whether fuse layers. + Default: False. + fuse_list(list): The layers to fuse. + Default: None. Returns: quantized_model(paddle.nn.Layer): The quantized model. """ @@ -71,7 +76,8 @@ def quantize(self, model, inplace=False): if not inplace: model = copy.deepcopy(model) - + if fuse: + model = fuse_utils.fuse_layers(model, fuse_list) for name, layer in model.named_sublayers(): if PTQRegistry.is_supported_layer(layer) \ and utils.is_leaf_layer(layer) \ From 4140a486f0393e731f4fbfe56425ce7b6480b381 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Thu, 19 Aug 2021 06:25:49 +0000 Subject: [PATCH 14/27] add tests for fuse --- python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 575a91642a7e76..6dae73b394abce 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -177,9 +177,9 @@ def test_ptq(self): model = ImperativeLenet() model_state_dict = paddle.load(params_path) model.set_state_dict(model_state_dict) - # Quantize, calibrate and save - quant_model = self.ptq.quantize(model) + f_l = [['features.0', 'features.1']] + quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l) before_acc_top1 = self.model_test(quant_model, self.batch_num, self.batch_size) From b75e4cfbeb66878054b610bbb136d5c85354ce1f Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Thu, 19 Aug 2021 07:16:08 +0000 Subject: [PATCH 15/27] support fuse conv/linear+bn --- .../slim/quantization/imperative/fuse_utils.py | 18 ++++++++---------- .../slim/quantization/imperative/ptq.py | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py index 9b8b0cc8c79ee7..098a2571a763ad 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -76,9 +76,8 @@ def fuse_conv_bn(conv, bn): assert(conv.training == bn.training),\ "Conv and BN both must be in the same mode (train or eval)." if conv.training: - assert bn.num_channels == conv.out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' - if fused_module_class is not None: - raise NotImplementedError + assert bn._num_features == conv._out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' + raise NotImplementedError else: return fuse_conv_bn_eval(conv, bn) @@ -108,7 +107,7 @@ def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): if bn_b is None: bn_b = paddle.zeros_like(bn_rm) bn_var_rsqrt = paddle.rsqrt(bn_rv + bn_eps) - conv_w = conv_w * + conv_w = conv_w * \ (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1)) conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b return conv_w, conv_b @@ -117,11 +116,10 @@ def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): def fuse_linear_bn(linear, bn): '''fuse linear and bn''' assert (linear.training == bn.training),\ - "Conv and BN both must be in the same mode (train or eval)." + "Linear and BN both must be in the same mode (train or eval)." if linear.training: - assert bn.num_channels == conv.out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' - if fused_module_class is not None: - raise NotImplementedError + assert bn._num_features == linear.weight.shape[1], 'Output channel of Linear must match num_features of BatchNorm' + raise NotImplementedError else: return fuse_linear_bn_eval(linear, bn) @@ -145,8 +143,8 @@ def fuse_linear_bn_eval(linear, bn): def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): '''fuse weights and bias of linear and bn''' if linear_b is None: - linear_b = torch.zeros_like(bn_rm) - bn_scale = bn_w * torch.rsqrt(bn_rv + bn_eps) + linear_b = paddle.zeros_like(bn_rm) + bn_scale = bn_w * paddle.rsqrt(bn_rv + bn_eps) fused_w = linear_w * bn_scale.unsqueeze(-1) fused_b = (linear_b - bn_rm) * bn_scale + bn_b return fused_w, fused_b diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py index e70b9099dc455a..fd0e263fddd417 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py @@ -73,10 +73,10 @@ def quantize(self, model, inplace=False, fuse=False, fuse_list=None): """ assert isinstance(model, paddle.nn.Layer), \ "The model must be the instance of paddle.nn.Layer." - if not inplace: model = copy.deepcopy(model) if fuse: + model.eval() model = fuse_utils.fuse_layers(model, fuse_list) for name, layer in model.named_sublayers(): if PTQRegistry.is_supported_layer(layer) \ From efb1acdd966d12ca27b3ad0ec206dfe72fad4a5e Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 06:45:05 +0000 Subject: [PATCH 16/27] update test --- .../quantization/imperative/fuse_utils.py | 18 +++--- .../contrib/slim/tests/test_imperative_ptq.py | 56 ++++++++++++++++++- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py index 098a2571a763ad..4dba0224dc48e0 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -47,7 +47,7 @@ def _fuse_layers(model, layers_list): new_layers = fuse_func(lay_list) for i, item in enumerate(layers_list): parent_layer, sub_name = utils.find_parent_layer_and_sub_name(model, - item) + item) setattr(parent_layer, sub_name, new_layers[i]) @@ -118,7 +118,8 @@ def fuse_linear_bn(linear, bn): assert (linear.training == bn.training),\ "Linear and BN both must be in the same mode (train or eval)." if linear.training: - assert bn._num_features == linear.weight.shape[1], 'Output channel of Linear must match num_features of BatchNorm' + assert bn._num_features == linear.weight.shape[ + 1], 'Output channel of Linear must match num_features of BatchNorm' raise NotImplementedError else: return fuse_linear_bn_eval(linear, bn) @@ -126,12 +127,12 @@ def fuse_linear_bn(linear, bn): def fuse_linear_bn_eval(linear, bn): '''fuse linear and bn for eval''' - assert(not (linear.training or bn.training)), "Fusion only for eval!" + assert (not (linear.training or bn.training)), "Fusion only for eval!" fused_linear = copy.deepcopy(linear) fused_weight, fused_bias = fuse_linear_bn_weights( - fused_linear.weight, fused_linear.bias, bn._mean, bn._variance, bn._epsilon, - bn.weight, bn.bias) + fused_linear.weight, fused_linear.bias, bn._mean, bn._variance, + bn._epsilon, bn.weight, bn.bias) fused_linear.weight.set_value(fused_weight) if fused_linear.bias is None: fused_linear.bias = paddle.create_parameter( @@ -140,7 +141,8 @@ def fuse_linear_bn_eval(linear, bn): return fused_linear -def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): +def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, + bn_b): '''fuse weights and bias of linear and bn''' if linear_b is None: linear_b = paddle.zeros_like(bn_rm) @@ -151,6 +153,6 @@ def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b) layer_list_to_fuse_method = { -(nn.Conv2D, nn.BatchNorm2D): fuse_conv_bn, -(nn.Linear, nn.BatchNorm): fuse_linear_bn, + (nn.Conv2D, nn.BatchNorm2D): fuse_conv_bn, + (nn.Linear, nn.BatchNorm): fuse_linear_bn, } diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 6dae73b394abce..353a6cd98ffe02 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -178,8 +178,62 @@ def test_ptq(self): model_state_dict = paddle.load(params_path) model.set_state_dict(model_state_dict) # Quantize, calibrate and save - f_l = [['features.0', 'features.1']] + quant_model = self.ptq.quantize(model) + before_acc_top1 = self.model_test(quant_model, self.batch_num, + self.batch_size) + + input_spec = [ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ] + self.ptq.save_quantized_model( + model=quant_model, path=self.save_path, input_spec=input_spec) + print('Quantized model saved in {%s}' % self.save_path) + + after_acc_top1 = self.model_test(quant_model, self.batch_num, + self.batch_size) + + paddle.enable_static() + infer_acc_top1 = self.program_test(self.save_path, self.batch_num, + self.batch_size) + paddle.disable_static() + + # Check + print('Before converted acc_top1: %s' % before_acc_top1) + print('After converted acc_top1: %s' % after_acc_top1) + print('Infer acc_top1: %s' % infer_acc_top1) + + self.assertTrue( + after_acc_top1 >= self.eval_acc_top1, + msg="The test acc {%f} is less than {%f}." % + (after_acc_top1, self.eval_acc_top1)) + self.assertTrue( + infer_acc_top1 >= after_acc_top1, + msg='The acc is lower after converting model.') + + end_time = time.time() + print("total time: %ss \n" % (end_time - start_time)) + + +class TestImperativePTQfuse(TestImperativePTQ): + def test_ptq(self): + start_time = time.time() + + self.set_vars() + + # Load model + params_path = self.download_model(self.lenet_url, self.lenet_md5, + "lenet") + params_path += "/lenet_pretrained/lenet.pdparams" + + model = ImperativeLenet() + model_state_dict = paddle.load(params_path) + model.set_state_dict(model_state_dict) + # Quantize, calibrate and save + f_l = [['features.0', 'features.1'], ['features.4', 'features.5']] quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l) + for name, layer in quant_model.named_sublayers(): + print(name, layer) before_acc_top1 = self.model_test(quant_model, self.batch_num, self.batch_size) From 8870cc3631de2cb61e1d3030b091acba2ebe160c Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 10:55:50 +0000 Subject: [PATCH 17/27] update test --- .../quantization/imperative/fuse_utils.py | 2 +- .../contrib/slim/tests/test_imperative_ptq.py | 21 ++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py index 4dba0224dc48e0..4e4c38120be8c2 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -154,5 +154,5 @@ def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, layer_list_to_fuse_method = { (nn.Conv2D, nn.BatchNorm2D): fuse_conv_bn, - (nn.Linear, nn.BatchNorm): fuse_linear_bn, + (nn.Linear, nn.BatchNorm1D): fuse_linear_bn, } diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 353a6cd98ffe02..033893b7e41bb9 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -23,18 +23,37 @@ import copy import logging +import paddle.nn as nn import paddle import paddle.fluid as fluid from paddle.fluid.contrib.slim.quantization import * from paddle.fluid.log_helper import get_logger from paddle.dataset.common import download -from imperative_test_utils import fix_model_dict, ImperativeLenet +from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +class TestFuseLinearBn(unittest.TestCase): + """ + """ + def test_fuse(self): + model = ImperativeLinearBn() + inputs = paddle.randn((3, 10), dtype="float32") + config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer()) + ptq = ImperativePTQ(config) + f_l = [['linear', 'bn']] + quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) + for name, layer in quant_model.named_sublayers(): + print(name, layer) + out = model(inputs) + out_quant = quant_model(inputs) + cos_sim_func = nn.CosineSimilarity(axis=0) + print(cos_sim_func(out.flatten(), out_quant.flatten())) + + class TestImperativePTQ(unittest.TestCase): """ """ From eca29a66d788afa0b38e61092b7620d6262e11a5 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 11:34:18 +0000 Subject: [PATCH 18/27] update test --- .../paddle/fluid/contrib/slim/tests/test_imperative_ptq.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 033893b7e41bb9..b2ec0a8027c010 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -39,6 +39,7 @@ class TestFuseLinearBn(unittest.TestCase): """ """ + def test_fuse(self): model = ImperativeLinearBn() inputs = paddle.randn((3, 10), dtype="float32") @@ -47,13 +48,13 @@ def test_fuse(self): f_l = [['linear', 'bn']] quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): - print(name, layer) + print(name, layer) out = model(inputs) out_quant = quant_model(inputs) cos_sim_func = nn.CosineSimilarity(axis=0) print(cos_sim_func(out.flatten(), out_quant.flatten())) - - + + class TestImperativePTQ(unittest.TestCase): """ """ From 323cc3d7290ea38b2ba61a144a096e4633eb8b40 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 11:36:25 +0000 Subject: [PATCH 19/27] add test --- .../slim/tests/imperative_test_utils.py | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index 5c91f01d0bdda4..65b54b73d61e69 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -16,10 +16,7 @@ import paddle import paddle.fluid as fluid -from paddle.fluid import core -from paddle.fluid.dygraph.container import Sequential -from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU -from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D +from paddle.nn import Linear, BatchNorm1D from paddle.fluid.log_helper import get_logger @@ -224,3 +221,31 @@ def forward(self, inputs): x = self.softmax_0(x) return x + + +class ImperativeLinearBn(fluid.dygraph.Layer): + def __init__(self): + super(ImperativeLinearBn, self).__init__() + + fc_w_attr = paddle.ParamAttr( + name="fc_weight", + initializer=paddle.nn.initializer.Constant(value=0.5)) + fc_b_attr = paddle.ParamAttr( + name="fc_bias", + initializer=paddle.nn.initializer.Constant(value=1.0)) + bn_w_attr = paddle.ParamAttr( + name="bn_weight", + initializer=paddle.nn.initializer.Constant(value=0.5)) + + self.linear = Linear( + in_features=10, + out_features=10, + weight_attr=fc_w_attr, + bias_attr=fc_b_attr) + self.bn = BatchNorm1D(10, weight_attr=bn_w_attr) + + def forward(self, inputs): + x = self.linear(inputs) + x = self.bn(x) + + return x From 7a34ddba28bc48ed959fc6d146f912e7d19b18af Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 11:42:12 +0000 Subject: [PATCH 20/27] add test --- .../fluid/contrib/slim/tests/imperative_test_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index 65b54b73d61e69..f8e370082d4ecf 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -16,7 +16,11 @@ import paddle import paddle.fluid as fluid -from paddle.nn import Linear, BatchNorm1D +from paddle.fluid import core +from paddle.fluid.dygraph.container import Sequential +from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU +from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D +from paddle.nn import BatchNorm1D from paddle.fluid.log_helper import get_logger From 97a16660d04c5e64cc4fc6e44a05cc5ba4f50032 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 20 Aug 2021 11:58:56 +0000 Subject: [PATCH 21/27] add test --- python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index f8e370082d4ecf..1d502e6a5aff26 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -246,7 +246,7 @@ def __init__(self): out_features=10, weight_attr=fc_w_attr, bias_attr=fc_b_attr) - self.bn = BatchNorm1D(10, weight_attr=bn_w_attr) + self.bn = BatchNorm1D(10, weight_attr=bn_w_attr) def forward(self, inputs): x = self.linear(inputs) From 4b9c5dd00134ee135e553eb53f4985eb400a336f Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sat, 21 Aug 2021 15:10:18 +0000 Subject: [PATCH 22/27] add test --- .../slim/tests/imperative_test_utils.py | 30 +++++++++++++++++++ .../contrib/slim/tests/test_imperative_ptq.py | 8 ++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index 1d502e6a5aff26..c6df2164447f4c 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -44,6 +44,13 @@ def fix_model_dict(model): return model +def pre_hook(layer, input): + input_return = (input[0] * 2) + return input_return + +def post_hook(layer, input, output): + return output * 2 + def train_lenet(lenet, reader, optimizer): loss_list = [] lenet.train() @@ -253,3 +260,26 @@ def forward(self, inputs): x = self.bn(x) return x + +class ImperativeLinearBn_hook(fluid.dygraph.Layer): + def __init__(self): + super(ImperativeLinearBn_hook, self).__init__() + + fc_w_attr = paddle.ParamAttr( + name="linear_weight", + initializer=paddle.nn.initializer.Constant(value=0.5)) + + self.linear = Linear( + in_features=10, + out_features=10, + weight_attr=fc_w_attr) + self.bn = BatchNorm1D(10) + + forward_pre = self.linear.register_forward_pre_hook(pre_hook) + forward_post = self.bn.register_forward_post_hook(post_hook) + + def forward(self, inputs): + x = self.linear(inputs) + x = self.bn(x) + + return x diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index b2ec0a8027c010..c2f644f1474f2c 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -31,6 +31,7 @@ from paddle.dataset.common import download from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn +from imperative_test_utils import ImperativeLinearBn_hook _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') @@ -42,17 +43,22 @@ class TestFuseLinearBn(unittest.TestCase): def test_fuse(self): model = ImperativeLinearBn() + model_h = ImperativeLinearBn_hook() inputs = paddle.randn((3, 10), dtype="float32") config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer()) ptq = ImperativePTQ(config) f_l = [['linear', 'bn']] quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) + quant_h = ptq.quantize(model, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): print(name, layer) out = model(inputs) + out_h = model_h(inputs) out_quant = quant_model(inputs) + out_quant_h = quant_h(inputs) cos_sim_func = nn.CosineSimilarity(axis=0) - print(cos_sim_func(out.flatten(), out_quant.flatten())) + print('fuse linear+bn', cos_sim_func(out.flatten(), out_quant.flatten())) + print(cos_sim_func(out_h.flatten(), out_quant_h.flatten())) class TestImperativePTQ(unittest.TestCase): From ba689a74f1f2a9b0e2995f04bdac88ecc269692a Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sat, 21 Aug 2021 15:44:53 +0000 Subject: [PATCH 23/27] add test --- .../fluid/contrib/slim/tests/imperative_test_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index c6df2164447f4c..466cc14eae0984 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -48,9 +48,11 @@ def pre_hook(layer, input): input_return = (input[0] * 2) return input_return + def post_hook(layer, input, output): return output * 2 + def train_lenet(lenet, reader, optimizer): loss_list = [] lenet.train() @@ -261,6 +263,7 @@ def forward(self, inputs): return x + class ImperativeLinearBn_hook(fluid.dygraph.Layer): def __init__(self): super(ImperativeLinearBn_hook, self).__init__() @@ -270,9 +273,7 @@ def __init__(self): initializer=paddle.nn.initializer.Constant(value=0.5)) self.linear = Linear( - in_features=10, - out_features=10, - weight_attr=fc_w_attr) + in_features=10, out_features=10, weight_attr=fc_w_attr) self.bn = BatchNorm1D(10) forward_pre = self.linear.register_forward_pre_hook(pre_hook) From 6c74925c0a096a1dc4e8ddd325fb6179a673d601 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sun, 22 Aug 2021 06:21:08 +0000 Subject: [PATCH 24/27] add test --- .../paddle/fluid/contrib/slim/tests/test_imperative_ptq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index c2f644f1474f2c..1dbd291728c66b 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -49,7 +49,7 @@ def test_fuse(self): ptq = ImperativePTQ(config) f_l = [['linear', 'bn']] quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) - quant_h = ptq.quantize(model, fuse=True, fuse_list=f_l) + quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): print(name, layer) out = model(inputs) @@ -57,7 +57,8 @@ def test_fuse(self): out_quant = quant_model(inputs) out_quant_h = quant_h(inputs) cos_sim_func = nn.CosineSimilarity(axis=0) - print('fuse linear+bn', cos_sim_func(out.flatten(), out_quant.flatten())) + print('fuse linear+bn', + cos_sim_func(out.flatten(), out_quant.flatten())) print(cos_sim_func(out_h.flatten(), out_quant_h.flatten())) From 5d3396a178cdbf4109154202c947c26ba629cd69 Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Sun, 22 Aug 2021 08:16:23 +0000 Subject: [PATCH 25/27] add test --- python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 1dbd291728c66b..4505d2b48d9971 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -57,7 +57,7 @@ def test_fuse(self): out_quant = quant_model(inputs) out_quant_h = quant_h(inputs) cos_sim_func = nn.CosineSimilarity(axis=0) - print('fuse linear+bn', + print('fuse linear+bn', cos_sim_func(out.flatten(), out_quant.flatten())) print(cos_sim_func(out_h.flatten(), out_quant_h.flatten())) From fb67b1523ffd9d5e92d12fc4a617c5bd3c020259 Mon Sep 17 00:00:00 2001 From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 27 Aug 2021 05:08:40 +0000 Subject: [PATCH 26/27] modified according to reviewers' suggestions --- .../quantization/imperative/fuse_utils.py | 85 +++++++++++-------- .../slim/quantization/imperative/ptq.py | 11 ++- .../contrib/slim/tests/test_imperative_ptq.py | 11 ++- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py index 4e4c38120be8c2..14282df23d3650 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -29,7 +29,22 @@ def forward(self, input): def fuse_layers(model, layers_to_fuse, inplace=False): - '''fuse layers in layers_to_fuse''' + ''' + fuse layers in layers_to_fuse + + Args: + model(paddle.nn.Layer): The model to be fused. + layers_to_fuse(list): The layers' names to be fused. For + example,"fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]". + A TypeError would be raised if "fuse" was set as + True but "fuse_list" was None. + Default: None. + inplace(bool): Whether apply fusing to the input model. + Default: False. + + Return + fused_model(paddle.nn.Layer): The fused model. + ''' if inplace == False: model = copy.deepcopy(model) for layers in layers_to_fuse: @@ -39,39 +54,39 @@ def fuse_layers(model, layers_to_fuse, inplace=False): def _fuse_layers(model, layers_list): '''fuse all the layers in layers_list''' - lay_list = [] + layer_list = [] for layer_name in layers_list: parent_layer, sub_name = utils.find_parent_layer_and_sub_name( model, layer_name) - lay_list.append(getattr(parent_layer, sub_name)) - new_layers = fuse_func(lay_list) + layer_list.append(getattr(parent_layer, sub_name)) + new_layers = _fuse_func(layer_list) for i, item in enumerate(layers_list): parent_layer, sub_name = utils.find_parent_layer_and_sub_name(model, item) setattr(parent_layer, sub_name, new_layers[i]) -def fuse_func(lay_list): +def _fuse_func(layer_list): '''choose the fuser method and fuse layers''' - types = tuple(type(m) for m in lay_list) - fuser_method = layer_list_to_fuse_method.get(types, None) - new_layers = [None] * len(lay_list) - fused = fuser_method(*lay_list) - for handle_id, pre_hook_fn in lay_list[0]._forward_pre_hooks.items(): - fused.register_forward_pre_hook(pre_hook_fn) - del lay_list[0]._forward_pre_hooks[handle_id] - for handle_id, hook_fn in lay_list[-1]._forward_post_hooks.items(): - fused.register_forward_post_hook(hook_fn) - del lay_list[-1]._forward_post_hooks[handle_id] - new_layers[0] = fused - for i in range(1, len(lay_list)): + types = tuple(type(m) for m in layer_list) + fusion_method = types_to_fusion_method.get(types, None) + new_layers = [None] * len(layer_list) + fused_layer = fusion_method(*layer_list) + for handle_id, pre_hook_fn in layer_list[0]._forward_pre_hooks.items(): + fused_layer.register_forward_pre_hook(pre_hook_fn) + del layer_list[0]._forward_pre_hooks[handle_id] + for handle_id, hook_fn in layer_list[-1]._forward_post_hooks.items(): + fused_layer.register_forward_post_hook(hook_fn) + del layer_list[-1]._forward_post_hooks[handle_id] + new_layers[0] = fused_layer + for i in range(1, len(layer_list)): identity = Identity() - identity.training = lay_list[0].training + identity.training = layer_list[0].training new_layers[i] = identity return new_layers -def fuse_conv_bn(conv, bn): +def _fuse_conv_bn(conv, bn): '''fuse conv and bn for train or eval''' assert(conv.training == bn.training),\ "Conv and BN both must be in the same mode (train or eval)." @@ -79,26 +94,26 @@ def fuse_conv_bn(conv, bn): assert bn._num_features == conv._out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d' raise NotImplementedError else: - return fuse_conv_bn_eval(conv, bn) + return _fuse_conv_bn_eval(conv, bn) -def fuse_conv_bn_eval(conv, bn): +def _fuse_conv_bn_eval(conv, bn): '''fuse conv and bn for eval''' assert (not (conv.training or bn.training)), "Fusion only for eval!" fused_conv = copy.deepcopy(conv) - fused_weight, fused_bias = fuse_conv_bn_weights( + fused_weight, fused_bias = _fuse_conv_bn_weights( fused_conv.weight, fused_conv.bias, bn._mean, bn._variance, bn._epsilon, bn.weight, bn.bias) fused_conv.weight.set_value(fused_weight) if fused_conv.bias is None: fused_conv.bias = paddle.create_parameter( - shape=[fused_conv._out_channels], is_bias=True, dtype='float32') + shape=[fused_conv._out_channels], is_bias=True, dtype=bn.bias.dtype) fused_conv.bias.set_value(fused_bias) return fused_conv -def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): +def _fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): '''fuse weights and bias of conv and bn''' if conv_b is None: conv_b = paddle.zeros_like(bn_rm) @@ -113,7 +128,7 @@ def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): return conv_w, conv_b -def fuse_linear_bn(linear, bn): +def _fuse_linear_bn(linear, bn): '''fuse linear and bn''' assert (linear.training == bn.training),\ "Linear and BN both must be in the same mode (train or eval)." @@ -122,27 +137,29 @@ def fuse_linear_bn(linear, bn): 1], 'Output channel of Linear must match num_features of BatchNorm' raise NotImplementedError else: - return fuse_linear_bn_eval(linear, bn) + return _fuse_linear_bn_eval(linear, bn) -def fuse_linear_bn_eval(linear, bn): +def _fuse_linear_bn_eval(linear, bn): '''fuse linear and bn for eval''' assert (not (linear.training or bn.training)), "Fusion only for eval!" fused_linear = copy.deepcopy(linear) - fused_weight, fused_bias = fuse_linear_bn_weights( + fused_weight, fused_bias = _fuse_linear_bn_weights( fused_linear.weight, fused_linear.bias, bn._mean, bn._variance, bn._epsilon, bn.weight, bn.bias) fused_linear.weight.set_value(fused_weight) if fused_linear.bias is None: fused_linear.bias = paddle.create_parameter( - shape=[fused_linear.weight.shape[1]], is_bias=True, dtype='float32') + shape=[fused_linear.weight.shape[1]], + is_bias=True, + dtype=bn.bias.dtype) fused_linear.bias.set_value(fused_bias) return fused_linear -def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, - bn_b): +def _fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, + bn_b): '''fuse weights and bias of linear and bn''' if linear_b is None: linear_b = paddle.zeros_like(bn_rm) @@ -152,7 +169,7 @@ def fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, return fused_w, fused_b -layer_list_to_fuse_method = { - (nn.Conv2D, nn.BatchNorm2D): fuse_conv_bn, - (nn.Linear, nn.BatchNorm1D): fuse_linear_bn, +types_to_fusion_method = { + (nn.Conv2D, nn.BatchNorm2D): _fuse_conv_bn, + (nn.Linear, nn.BatchNorm1D): _fuse_linear_bn, } diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py index fd0e263fddd417..64d9cd321016c5 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py @@ -64,11 +64,14 @@ def quantize(self, model, inplace=False, fuse=False, fuse_list=None): model(paddle.nn.Layer): The model to be quantized. inplace(bool): Whether apply quantization to the input model. Default: False. - fuse(bool): Whether fuse layers. + fuse(bool): Whether to fuse layers. Default: False. - fuse_list(list): The layers to fuse. - Default: None. - Returns: + fuse_list(list): The layers' names to be fused. For example, + "fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]". + A TypeError would be raised if "fuse" was set as + True but "fuse_list" was None. + Default: None. + Return quantized_model(paddle.nn.Layer): The quantized model. """ assert isinstance(model, paddle.nn.Layer), \ diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 4505d2b48d9971..0710112ec191be 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -39,6 +39,7 @@ class TestFuseLinearBn(unittest.TestCase): """ + Fuse the linear and bn layers, and then quantize the model. """ def test_fuse(self): @@ -51,7 +52,8 @@ def test_fuse(self): quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): - print(name, layer) + assert not (isinstance(layer, nn.BatchNorm1D) or + isinstance(layer, nn.BatchNorm2D)) out = model(inputs) out_h = model_h(inputs) out_quant = quant_model(inputs) @@ -260,7 +262,8 @@ def test_ptq(self): f_l = [['features.0', 'features.1'], ['features.4', 'features.5']] quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): - print(name, layer) + assert not (isinstance(layer, nn.BatchNorm1D) or + isinstance(layer, nn.BatchNorm2D)) before_acc_top1 = self.model_test(quant_model, self.batch_num, self.batch_size) @@ -285,10 +288,14 @@ def test_ptq(self): print('After converted acc_top1: %s' % after_acc_top1) print('Infer acc_top1: %s' % infer_acc_top1) + #Check whether the quant_model is correct after converting. + #The acc of quantized model should be higher than 0.95. self.assertTrue( after_acc_top1 >= self.eval_acc_top1, msg="The test acc {%f} is less than {%f}." % (after_acc_top1, self.eval_acc_top1)) + #Check the saved infer_model.The acc of infer model + #should not be lower than the one of dygraph model. self.assertTrue( infer_acc_top1 >= after_acc_top1, msg='The acc is lower after converting model.') From 1c334c0703812c5a6d59456c4061f821006c6377 Mon Sep 17 00:00:00 2001 From: XGZhang11 <46363693+XGZhang11@users.noreply.github.com> Date: Fri, 27 Aug 2021 05:16:54 +0000 Subject: [PATCH 27/27] update test --- .../fluid/contrib/slim/tests/test_imperative_ptq.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index 0710112ec191be..fb92b12cb0d870 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -52,8 +52,9 @@ def test_fuse(self): quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l) quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): - assert not (isinstance(layer, nn.BatchNorm1D) or - isinstance(layer, nn.BatchNorm2D)) + if name in f_l: + assert not (isinstance(layer, nn.BatchNorm1D) or + isinstance(layer, nn.BatchNorm2D)) out = model(inputs) out_h = model_h(inputs) out_quant = quant_model(inputs) @@ -262,8 +263,9 @@ def test_ptq(self): f_l = [['features.0', 'features.1'], ['features.4', 'features.5']] quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): - assert not (isinstance(layer, nn.BatchNorm1D) or - isinstance(layer, nn.BatchNorm2D)) + if name in f_l: + assert not (isinstance(layer, nn.BatchNorm1D) or + isinstance(layer, nn.BatchNorm2D)) before_acc_top1 = self.model_test(quant_model, self.batch_num, self.batch_size)