|
21 | 21 | from paddle.fluid.initializer import NumpyArrayInitializer |
22 | 22 | from paddle.fluid.framework import convert_np_dtype_to_dtype_ |
23 | 23 |
|
| 24 | +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass |
| 25 | +from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass |
| 26 | +from paddle.fluid.framework import IrGraph, IrNode, Operator |
| 27 | +from paddle.fluid.executor import global_scope |
| 28 | + |
24 | 29 |
|
25 | 30 | class TensorConfig: |
26 | 31 | ''' |
@@ -160,3 +165,181 @@ def create_fake_model(program_config): |
160 | 165 | executor.run(util_program) |
161 | 166 | params = scope.find_var("out_var_0").get_bytes() |
162 | 167 | return model, params |
| 168 | + |
| 169 | + |
| 170 | +def create_quant_model(model, |
| 171 | + params, |
| 172 | + activation_quantize_type='moving_average_abs_max', |
| 173 | + weight_quantize_type='channel_wise_abs_max', |
| 174 | + save=False): |
| 175 | + place = paddle.CUDAPlace(0) |
| 176 | + scope = global_scope() |
| 177 | + exe = paddle.static.Executor(place) |
| 178 | + [inference_program, feed_target_names, |
| 179 | + fetch_targets] = paddle.static.load_inference_model( |
| 180 | + path_prefix=None, |
| 181 | + executor=exe, |
| 182 | + model_filename=model, |
| 183 | + params_filename=params) |
| 184 | + graph = IrGraph(core.Graph(inference_program.desc), for_test=True) |
| 185 | + |
| 186 | + transform_pass = QuantizationTransformPass( |
| 187 | + scope=scope, |
| 188 | + place=place, |
| 189 | + activation_quantize_type=activation_quantize_type, |
| 190 | + weight_quantize_type=weight_quantize_type) |
| 191 | + transform_pass.apply(graph) |
| 192 | + |
| 193 | + out_scale_op_list = [ |
| 194 | + "conv2d", |
| 195 | + "depthwise_conv2d", |
| 196 | + "mul", |
| 197 | + "matmul", |
| 198 | + "relu", |
| 199 | + "leaky_relu", |
| 200 | + "relu6", |
| 201 | + "sigmoid", |
| 202 | + "tanh", |
| 203 | + "prelu", |
| 204 | + "swish", |
| 205 | + "softmax", |
| 206 | + "batch_norm", |
| 207 | + "layer_norm", |
| 208 | + "elementwise_add", |
| 209 | + "pool2d", |
| 210 | + "reshape2", |
| 211 | + "transpose2", |
| 212 | + "concat", |
| 213 | + "elementwise_mul", |
| 214 | + "scale", |
| 215 | + "slice", |
| 216 | + "hard_swish", |
| 217 | + "hard_sigmoid", |
| 218 | + "conv2d_transpose", |
| 219 | + "gru", |
| 220 | + "bilinear_interp", |
| 221 | + "nearest_interp", |
| 222 | + "trilinear_interp", |
| 223 | + "flatten", |
| 224 | + "flatten2", |
| 225 | + "transpose", |
| 226 | + "pad2d", |
| 227 | + "reshape", |
| 228 | + "layer_norm", |
| 229 | + ] |
| 230 | + op_real_in_out_name = { |
| 231 | + "conv2d": [["Input", "Filter"], ["Output"]], |
| 232 | + "depthwise_conv2d": [["Input", "Filter"], ["Output"]], |
| 233 | + "conv2d_transpose": [["Input", "Filter"], ["Output"]], |
| 234 | + "mul": [["X", "Y"], ["Out"]], |
| 235 | + "matmul": [["X", "Y"], ["Out"]], |
| 236 | + "pool2d": [["X"], ["Out"]], |
| 237 | + "elementwise_add": [["X", "Y"], ["Out"]], |
| 238 | + "concat": [["X"], ["Out"]], |
| 239 | + "softmax": [["X"], ["Out"]], |
| 240 | + "argmax": [["X"], ["Out"]], |
| 241 | + "transpose": [["X"], ["Out"]], |
| 242 | + "equal": [["X", "Y"], ["Out"]], |
| 243 | + "gather": [["X"], ["Out"]], |
| 244 | + "greater_equal": [["X", "Y"], ["Out"]], |
| 245 | + "greater_than": [["X", "Y"], ["Out"]], |
| 246 | + "less_equal": [["X", "Y"], ["Out"]], |
| 247 | + "less_than": [["X", "Y"], ["Out"]], |
| 248 | + "mean": [["X"], ["Out"]], |
| 249 | + "not_equal": [["X", "Y"], ["Out"]], |
| 250 | + "reshape": [["X"], ["Out"]], |
| 251 | + "reshape2": [["X"], ["Out"]], |
| 252 | + "transpose2": [["X"], ["Out"]], |
| 253 | + "bilinear_interp": [["X"], ["Out"]], |
| 254 | + "nearest_interp": [["X"], ["Out"]], |
| 255 | + "trilinear_interp": [["X"], ["Out"]], |
| 256 | + "slice": [["Input"], ["Out"]], |
| 257 | + "squeeze": [["X"], ["Out"]], |
| 258 | + "elementwise_sub": [["X", "Y"], ["Out"]], |
| 259 | + "relu": [["X"], ["Out"]], |
| 260 | + "relu6": [["X"], ["Out"]], |
| 261 | + "leaky_relu": [["X"], ["Out"]], |
| 262 | + "prelu": [["X"], ["Out"]], |
| 263 | + "tanh": [["X"], ["Out"]], |
| 264 | + "swish": [["X"], ["Out"]], |
| 265 | + "dropout": [["X"], ["Out"]], |
| 266 | + "batch_norm": [["X"], ["Y"]], |
| 267 | + "layer_norm": [["X"], ["Y"]], |
| 268 | + "sigmoid": [["X"], ["Out"]], |
| 269 | + "elementwise_mul": [["X", "Y"], ["Out"]], |
| 270 | + "scale": [["X"], ["Out"]], |
| 271 | + "hard_swish": [["X"], ["Out"]], |
| 272 | + "hard_sigmoid": [["X"], ["Out"]], |
| 273 | + "gru": [["Input", "Weight"], ["Hidden"]], |
| 274 | + "lstm": [["Input", "Weight"], ["Hidden"]], |
| 275 | + "pad2d": [["X"], ["Out"]], |
| 276 | + "flatten": [["X"], ["Out"]], |
| 277 | + "flatten2": [["X"], ["Out"]], |
| 278 | + } |
| 279 | + |
| 280 | + def _get_op_output_var_names(op): |
| 281 | + """ """ |
| 282 | + assert isinstance(op, (IrNode, Operator)), \ |
| 283 | + "The input op should be IrNode or Operator." |
| 284 | + var_names = [] |
| 285 | + op_name = op.name() if isinstance(op, IrNode) \ |
| 286 | + else op.type |
| 287 | + if op_name not in op_real_in_out_name: |
| 288 | + return [] |
| 289 | + |
| 290 | + name_list = op_real_in_out_name[op_name][1] |
| 291 | + for name in name_list: |
| 292 | + var_name = op.output(name) |
| 293 | + if isinstance(var_name, list): |
| 294 | + var_names.extend(var_name) |
| 295 | + else: |
| 296 | + var_names.append(var_name) |
| 297 | + return var_names |
| 298 | + |
| 299 | + op_nodes = graph.all_op_nodes() |
| 300 | + for op_node in op_nodes: |
| 301 | + if op_node.name() in out_scale_op_list: |
| 302 | + var_names = _get_op_output_var_names(op_node) |
| 303 | + for var_name in var_names: |
| 304 | + in_node = graph._find_node_by_name(op_node.outputs, var_name) |
| 305 | + if in_node.dtype() not in \ |
| 306 | + [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: |
| 307 | + continue |
| 308 | + |
| 309 | + op_node.op()._set_attr("out_threshold", 3.0) |
| 310 | + |
| 311 | + # Freeze graph for inference, but the weight of fc/conv is still float type. |
| 312 | + freeze_pass = QuantizationFreezePass( |
| 313 | + scope=scope, place=place, weight_quantize_type=weight_quantize_type) |
| 314 | + freeze_pass.apply(graph) |
| 315 | + |
| 316 | + main_program = graph.to_program() |
| 317 | + |
| 318 | + # modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales) |
| 319 | + op_nodes = graph.all_op_nodes() |
| 320 | + for op_node in op_nodes: |
| 321 | + if op_node.name() == 'fake_quantize_moving_average_abs_max': |
| 322 | + var_name = op_node.input("InScale")[0] |
| 323 | + tensor = scope.var(var_name).get_tensor() |
| 324 | + tensor.set(np.array([1], dtype=np.float32), place) |
| 325 | + elif op_node.name() == 'fake_channel_wise_dequantize_max_abs': |
| 326 | + var_name = op_node.input("Scales")[0] |
| 327 | + tensor = scope.var(var_name).get_tensor() |
| 328 | + tensor.set(np.ones(tensor.shape(), dtype=np.float32), place) |
| 329 | + |
| 330 | + if save: |
| 331 | + fluid.io.save_inference_model( |
| 332 | + 'test_inference_model', |
| 333 | + feed_target_names, |
| 334 | + fetch_targets, |
| 335 | + exe, |
| 336 | + main_program=main_program) |
| 337 | + |
| 338 | + feed_vars = [ |
| 339 | + main_program.global_block().var(name) for name in feed_target_names |
| 340 | + ] |
| 341 | + serialized_program = paddle.static.serialize_program( |
| 342 | + feed_vars, fetch_targets, program=main_program) |
| 343 | + serialized_params = paddle.static.serialize_persistables( |
| 344 | + feed_vars, fetch_targets, executor=exe, program=main_program) |
| 345 | + return serialized_program, serialized_params |
0 commit comments