-
Notifications
You must be signed in to change notification settings - Fork 5.9k
[dygraph qat] Refine saving output scale to infer program #31784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
a5b5b71
ebba22c
4e867cc
5d943f0
949c20a
9ef98c0
80dc9c6
85f18ef
dbd72f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -251,8 +251,8 @@ def __init__(self, | |
| super(ImperativeQuantizeInputs, self).__init__() | ||
|
|
||
| self._quantizable_layer_type = tuple( | ||
| utils.supported_quant_layers_map[layer] | ||
| if layer in utils.supported_quant_layers_map else layer | ||
| utils.quant_input_layers_map[layer] | ||
| if layer in utils.quant_input_layers_map else layer | ||
| for layer in quantizable_layer_type) | ||
| for layer in self._quantizable_layer_type: | ||
| assert not isinstance(layer, str), \ | ||
|
|
@@ -324,12 +324,11 @@ def apply(self, model): | |
| target = name[last_idx:idx] | ||
|
|
||
| quant_layer = self._get_quantized_layer(layer) | ||
| setattr(quant_layer, "layer_name", layer.full_name()) | ||
| setattr(obj, target, quant_layer) | ||
|
|
||
| def _get_quantized_layer(self, layer): | ||
| quant_layer_name = None | ||
| for key, value in utils.supported_quant_layers_map.items(): | ||
| for key, value in utils.quant_input_layers_map.items(): | ||
| if isinstance(layer, value): | ||
| quant_layer_name = 'Quantized' + key | ||
| break | ||
|
|
@@ -372,6 +371,9 @@ def apply(self, model): | |
| """ | ||
| assert isinstance(model, dygraph.Layer), \ | ||
| "The model must be the instance of dygraph.Layer." | ||
|
|
||
| # Calculate the target ops's output scale, and don't consider | ||
| # the skip_quant attr | ||
| for _, layer in model.named_sublayers(): | ||
| if self._is_target_layer(layer): | ||
| self._init_scale_params(layer) | ||
|
|
@@ -411,24 +413,21 @@ def save_quantized_model(self, layer, path, input_spec=None, **config): | |
| assert isinstance(layer, dygraph.Layer), \ | ||
| "The model must be the instance of dygraph.Layer." | ||
|
|
||
| # remove handles and collect output scales | ||
| self._gather_output_scale(layer) | ||
|
|
||
| with dygraph.guard(): | ||
| layer.eval() | ||
| for handle in self._register_hook_handle_list: | ||
| handle.remove() | ||
| for _, sub_layer in layer.named_sublayers(): | ||
| if self._is_target_layer(sub_layer): | ||
| if hasattr(sub_layer, "layer_name"): | ||
| layer_name = sub_layer.layer_name | ||
| else: | ||
| layer_name = sub_layer.full_name() | ||
| if hasattr(sub_layer, "_quant_out_scale"): | ||
| self._out_scale_dict[layer_name] = float( | ||
| sub_layer._quant_out_scale) | ||
|
|
||
| # save the quantized model that doesn't have output scales | ||
| paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) | ||
|
|
||
| if len(self._out_scale_dict) == 0: | ||
| warnings.warn("Warning: No Layer of the model while to be " \ | ||
| "saved contains the out_threshold attribute, so the " \ | ||
| "generated inference model would not contain the " \ | ||
| "out_threshold.") | ||
| return | ||
|
|
||
| # load static model | ||
| is_dynamic_mode = False | ||
| if paddle.in_dynamic_mode(): | ||
|
|
@@ -443,96 +442,103 @@ def save_quantized_model(self, layer, path, input_spec=None, **config): | |
| basename = os.path.basename(path) | ||
| model_filename = basename + INFER_MODEL_SUFFIX | ||
| params_filename = basename + INFER_PARAMS_SUFFIX | ||
| [inference_program, feed_target_names, fetch_targets] = ( | ||
|
|
||
| [infer_program, feed_target_names, fetch_targets] = ( | ||
| load_inference_model( | ||
| dirname=dirname, | ||
| executor=exe, | ||
| model_filename=model_filename, | ||
| params_filename=params_filename)) | ||
|
|
||
| # TODO(jc): analyse whether the dygraph model has | ||
| # several blocks before applying qat | ||
| assert infer_program.num_blocks == 1, \ | ||
| "Quantization aware training (QAT) requires the program " \ | ||
| "only has a block for now. When the model has if-else or " \ | ||
| "while, the program will have several blocks." | ||
|
|
||
| # set output scales to the static model | ||
| check_behind_op = False | ||
| op_count = 0 | ||
| ops_list = [key for key, _ in self._out_scale_dict.items()] | ||
| if len(ops_list) == 0: | ||
| warnings.warn( | ||
| "Warning: No Layer of the model while to be saved contains " | ||
| "the out_threshold attribute, so the generated inference " | ||
| "model would not contain the out_threshold.") | ||
| else: | ||
| # Because the Layer in dygraph may correspond to multiple ops | ||
| # in static program after being saved. To ensure correctness, | ||
| # the outscale collected for output of dygraph Layer can only | ||
| # be set to the last op in the corresponding ops in static program. | ||
| # | ||
| # We can judge the execution order of the ops which corresponding | ||
| # to dygraph Layer by check_behind_op | ||
| forward_op = None | ||
| for block in inference_program.blocks: | ||
| for op in block.ops: | ||
| if op.type in utils.op_real_in_out_name: | ||
| if op_count > len(ops_list): | ||
| warnings.warn( | ||
| "The number of Layer which has " | ||
| "out_threshold attribute should be bigger than " | ||
| "the op in inference model") | ||
| break | ||
| if check_behind_op: | ||
| check_behind_op = False | ||
| if op.type == "elementwise_add": | ||
| if self._is_op_matched(ops_list[op_count], op, | ||
| block): | ||
| op._set_attr("out_threshold", | ||
| self._out_scale_dict[ops_list[ | ||
| op_count]]) | ||
| op_count += 1 | ||
| forward_op = None | ||
| continue | ||
| else: | ||
| if forward_op is None: | ||
| raise ValueError( | ||
| "forward_op should not be None") | ||
| if self._is_op_matched(ops_list[op_count], | ||
| forward_op, block): | ||
| forward_op._set_attr( | ||
| "out_threshold", self._out_scale_dict[ | ||
| ops_list[op_count]]) | ||
| op_count += 1 | ||
| forward_op = None | ||
|
|
||
| if op.type in ["conv2d", "depthwise_conv2d", "matmul"]: | ||
| check_behind_op = True | ||
| forward_op = op | ||
| continue | ||
| if op_count >= len(ops_list): | ||
| warnings.warn( | ||
| "The number of Layer which has out_threshold attribute should be bigger than the op in inference model" | ||
| ) | ||
| break | ||
| if self._is_op_matched(ops_list[op_count], op, block): | ||
| op._set_attr( | ||
| "out_threshold", | ||
| self._out_scale_dict[ops_list[op_count]]) | ||
| op_count += 1 | ||
|
|
||
| self._set_skip_quant_attr(inference_program) | ||
| self._save_output_scale(infer_program) | ||
|
|
||
| # process skip quant | ||
| self._set_skip_quant_attr(infer_program) | ||
|
|
||
| # save the final quantized model that has output scales | ||
| save_inference_model( | ||
| dirname=dirname, | ||
| feeded_var_names=feed_target_names, | ||
| target_vars=fetch_targets, | ||
| executor=exe, | ||
| main_program=inference_program.clone(), | ||
| main_program=infer_program.clone(), | ||
| model_filename=model_filename, | ||
| params_filename=params_filename) | ||
|
|
||
| if is_dynamic_mode: | ||
| paddle.disable_static() | ||
|
|
||
| def _gather_output_scale(self, layer): | ||
| """ | ||
| Gather all output scales to self._out_scale_dict | ||
| """ | ||
| with dygraph.guard(): | ||
| layer.eval() | ||
| for _, sub_layer in layer.named_sublayers(): | ||
| if self._is_target_layer(sub_layer): | ||
| layer_name = sub_layer.full_name() | ||
| if hasattr(sub_layer, "_quant_out_scale"): | ||
| self._out_scale_dict[layer_name] = float( | ||
| sub_layer._quant_out_scale) | ||
|
|
||
| def _save_output_scale(self, infer_program): | ||
| """ | ||
| Save all output scales to the corresponding ops in static | ||
| inference program. | ||
|
|
||
| Because the Layer in dygraph may correspond to multiple ops | ||
| in static program after being saved. To ensure correctness, | ||
| the outscale collected for output of dygraph Layer can only | ||
| be set to the last op in the corresponding ops in static program. | ||
| """ | ||
| assert infer_program.num_blocks == 1, \ | ||
| "The inference program should only have a block." | ||
|
|
||
| global_block = infer_program.global_block() | ||
| target_ops = global_block.ops | ||
|
|
||
| scale_idx = 0 | ||
| op_idx = 0 | ||
| attr_name = "out_threshold" | ||
|
|
||
| for scale_name, scale_value in self._out_scale_dict.items(): | ||
| while True: | ||
| if op_idx >= len(target_ops): | ||
| break | ||
|
|
||
| op = target_ops[op_idx] | ||
|
|
||
| if not self._is_scale_op_matched(scale_name, op, global_block): | ||
| op_idx += 1 | ||
| else: | ||
| weight_ops = ["conv2d", "depthwise_conv2d", "matmul"] | ||
| if op.type in weight_ops and op_idx + 1 < len(target_ops) \ | ||
| and target_ops[op_idx+1].type == "elementwise_add": | ||
| target_ops[op_idx + 1]._set_attr(attr_name, scale_value) | ||
| op_idx += 2 | ||
| else: | ||
| op._set_attr(attr_name, scale_value) | ||
| op_idx += 1 | ||
| scale_idx += 1 | ||
| break | ||
|
|
||
| if scale_idx != len(self._out_scale_dict): | ||
| _logger.warning("Warning: the model have %s output scales, "\ | ||
| "but it only saves %s output scales." \ | ||
| % (len(self._out_scale_dict), scale_idx)) | ||
|
|
||
| def _is_target_layer(self, layer): | ||
| return isinstance(layer, utils.out_scale_layers_list) \ | ||
| or 'quantized_' in layer.full_name() | ||
| return isinstance(layer, tuple(utils.quant_output_layers_map.values())) \ | ||
| or ('quantized_' in layer.full_name() and \ | ||
| 'quantized_noweight' not in layer.full_name()) | ||
|
|
||
| def _init_scale_params(self, layer, name=None): | ||
| """ | ||
|
|
@@ -570,27 +576,37 @@ def _create_param(in_layer, first_name, last_name, dtype): | |
| layer._quant_out_accum = _create_param(layer, name, "accum", dtype) | ||
| layer._quant_out_accum.stop_gradient = True | ||
|
|
||
| # Judge whether the op in program matches the Layer in dynamic model | ||
| def _is_op_matched(self, layer_name, op, block): | ||
| output_var_names = quantization_pass._get_op_output_var_names(op) | ||
| for output_var_name in output_var_names: | ||
| output_var_tensor = block.var(output_var_name) | ||
| if output_var_tensor.dtype not in [ | ||
| core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32 | ||
| ]: | ||
| return False | ||
|
|
||
| # Because the naming styles of static and dynamic graph are different, | ||
| # in order to avoid mistakes, we unify the name here. | ||
| op_type = output_var_names[0].split(".")[0] | ||
| op_type = op_type.rsplit("_", 1)[0] | ||
| if op_type == 'depthwise_conv2d': | ||
| op_type = 'conv2d' | ||
| if 'prelu' in op_type: | ||
| op_type = op_type.replace('prelu', 'p_re_lu') | ||
| if 'relu' in op_type: | ||
| op_type = op_type.replace('relu', 're_lu') | ||
| return op_type in layer_name | ||
| def _is_scale_op_matched(self, scale_name, op, block): | ||
| """ | ||
| Based on the op name and attrs to judge whether the op in | ||
| program matches the scale_name. We must know the corresponding | ||
| name between dgraph and static model. | ||
| """ | ||
| fp_type = [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32] | ||
| if op.type in quantization_pass._op_real_in_out_name.keys(): | ||
| output_var_names = quantization_pass._get_op_output_var_names(op) | ||
| for output_var_name in output_var_names: | ||
| output_var_tensor = block.var(output_var_name) | ||
| if output_var_tensor.dtype not in fp_type: | ||
| return False | ||
|
|
||
| # Note that, the items have priority in corresponding_dict | ||
| corresponding_dict = { | ||
| 'conv2d_tranpose': [['conv2d_tranpose'], None], | ||
|
||
| 'conv2d': [['conv2d', 'depthwise_conv2d'], None], | ||
| 'linear': [['matmul'], None], | ||
| 're_lu6': [['relu6'], None], | ||
| 'p_re_lu': [['prelu'], None], | ||
| 'leaky_re_lu': [['leaky_relu'], None], | ||
| 're_lu': [['relu'], None], | ||
| } | ||
|
|
||
| for key, value in corresponding_dict.items(): | ||
| if key in scale_name: | ||
| return (op.type in value[0]) and \ | ||
| (len(value) == 1 or value[1] is None or value[1](op)) | ||
|
||
|
|
||
| return op.type in scale_name | ||
|
|
||
| def _set_skip_quant_attr(self, program): | ||
| block = program.global_block() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,7 +30,7 @@ | |
| "swish": [["X"], ["Out"]], | ||
| } | ||
|
|
||
| supported_quant_layers_map = { | ||
| quant_input_layers_map = { | ||
| 'Conv2D': paddle.nn.Conv2D, | ||
| 'Linear': paddle.nn.Linear, | ||
| 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, | ||
|
|
@@ -58,8 +58,28 @@ | |
| "fake_quantize_dequantize_moving_average_abs_max" | ||
| ] | ||
|
|
||
| out_scale_layers_list = ( | ||
| paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.MaxPool2D, | ||
| paddle.nn.BatchNorm, paddle.nn.BatchNorm2D, paddle.nn.SyncBatchNorm, | ||
| paddle.nn.LeakyReLU, paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, | ||
| paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Tanh, paddle.nn.Swish) | ||
| quant_output_layers_map = { | ||
| 'Conv2D': paddle.nn.Conv2D, | ||
| 'Conv2DTranspose': paddle.nn.Conv2DTranspose, | ||
| 'Linear': paddle.nn.Linear, | ||
| 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, | ||
| 'AdaptiveMaxPool2D': paddle.nn.AdaptiveMaxPool2D, | ||
| 'AvgPool2D': paddle.nn.AvgPool2D, | ||
| 'MaxPool2D': paddle.nn.MaxPool2D, | ||
| 'BatchNorm': paddle.nn.BatchNorm, | ||
| 'BatchNorm2D': paddle.nn.BatchNorm2D, | ||
| 'GroupNorm': paddle.nn.GroupNorm, | ||
| 'InstanceNorm2D': paddle.nn.InstanceNorm2D, | ||
|
||
| 'LayerNorm': paddle.nn.LayerNorm, | ||
| 'SyncBatchNorm': paddle.nn.SyncBatchNorm, | ||
| 'ELU': paddle.nn.ELU, | ||
| 'GELU': paddle.nn.GELU, | ||
| 'LeakyReLU': paddle.nn.LeakyReLU, | ||
| 'PReLU': paddle.nn.PReLU, | ||
| 'ReLU': paddle.nn.ReLU, | ||
| 'ReLU6': paddle.nn.ReLU6, | ||
| 'Sigmoid': paddle.nn.Sigmoid, | ||
| 'Softmax': paddle.nn.Softmax, | ||
| 'Tanh': paddle.nn.Tanh, | ||
| 'Swish': paddle.nn.Swish, | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
weight_ops中还需添加
conv2d_transpose和depthwise_conv2d_transposeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done