Skip to content

Commit 17188e8

Browse files
authored
trt convert ut add dynamic_shape and int8, etc. (#35061)
1 parent a95db6a commit 17188e8

File tree

7 files changed

+308
-17
lines changed

7 files changed

+308
-17
lines changed

paddle/fluid/framework/ir/graph_helper.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ void GraphToProgram(const Graph &graph, ProgramDesc *program,
535535

536536
block = program_pb.add_blocks();
537537
block->set_idx(idx);
538+
block->set_parent_idx(kRootBlockIndex);
538539
GraphToBlock(*graph.GetSubGraph(idx), block, sort_kind);
539540
}
540541
} else {

paddle/fluid/framework/ir/graph_viz_pass.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
16+
#include <string>
17+
#include "paddle/fluid/framework/ir/graph_helper.h"
1618
#include "paddle/fluid/framework/ir/graph_printer.h"
1719
#include "paddle/fluid/framework/op_proto_maker.h"
20+
#include "paddle/fluid/framework/program_desc.h"
1821
#include "paddle/fluid/inference/analysis/dot.h"
1922

2023
namespace paddle {
@@ -44,6 +47,31 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
4447
"Can not open file %s for printing the graph.", graph_viz_path));
4548
std::ostream& sout = *fout;
4649

50+
// serialize only model file.
51+
std::string program_path;
52+
std::size_t found1 = graph_viz_path.find("_ir_");
53+
std::size_t found2 = graph_viz_path.find(".dot");
54+
if (found1 != std::string::npos && found2 != std::string::npos) {
55+
ProgramDesc program_desc;
56+
GraphToProgram(*graph, &program_desc);
57+
// TODO(wilber): GraphToProgram seems have bugs.
58+
for (size_t i = 0; i < program_desc.Size(); ++i) {
59+
for (size_t j = 0; j < program_desc.Block(i).OpSize(); ++j) {
60+
if (program_desc.Block(i).Op(j)->Type() == "tensorrt_engine") {
61+
program_desc.Block(i).Op(j)->RemoveAttr("sub_block");
62+
}
63+
}
64+
}
65+
std::string program_bytes = program_desc.Proto()->SerializeAsString();
66+
// rename from "17_ir_fc_fuse_pass.dot" to "fc_fuse_pass.pdmodel"
67+
program_path =
68+
graph_viz_path.substr(found1 + 4, found2 - found1 - 4) + ".pdmodel";
69+
std::ofstream file(program_path.c_str(), std::ios::binary);
70+
file.write(program_bytes.c_str(), program_bytes.size());
71+
file.close();
72+
VLOG(3) << "serialize program to " << program_path;
73+
}
74+
4775
std::unordered_map<const ir::Node*, std::string> node2dot;
4876

4977
Dot dot;

python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,21 @@
1515
import numpy as np
1616
import unittest
1717
import abc
18+
import os
1819
import paddle
1920
import paddle.fluid as fluid
2021
from paddle.fluid.initializer import NumpyArrayInitializer
2122
import paddle.fluid.core as core
2223
from paddle import compat as cpt
2324
import paddle.inference as paddle_infer
2425
from typing import Optional, List, Callable, Dict, Any, Set
25-
from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model
26+
from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
2627

2728

2829
class AutoScanTest(unittest.TestCase):
2930
def __init__(self, methodName='runTest'):
3031
paddle.enable_static()
3132
super(AutoScanTest, self).__init__(methodName)
32-
self.threshold = 1e-5
3333

3434
@abc.abstractmethod
3535
def sample_program_configs(self) -> List[ProgramConfig]:
@@ -56,11 +56,31 @@ def run_test_config(self, model, params, prog_config, pred_config,
5656
input_tensor.copy_from_cpu(feed_data[name])
5757
predictor.run()
5858
result = {}
59-
for out_name in prog_config.outputs:
60-
result[out_name] = predictor.get_output_handle(
61-
out_name).copy_to_cpu()
59+
for out_name, o_name in zip(prog_config.outputs,
60+
predictor.get_output_names()):
61+
result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu()
6262
return result
6363

64+
def assert_op_size(self, trt_engine_num, paddle_op_num):
65+
cur_path = os.path.dirname(__file__)
66+
last_passed_program = os.path.join(
67+
cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel')
68+
model_bytes = paddle.static.load_from_file(last_passed_program)
69+
pg = paddle.static.deserialize_program(model_bytes)
70+
main_block = pg.desc.block(0)
71+
op_size = main_block.op_size()
72+
op_types = [
73+
main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
74+
]
75+
trt_engine_size = sum(op_types)
76+
paddle_op_size = op_size - trt_engine_size
77+
self.assertTrue(trt_engine_size == trt_engine_num,
78+
'trt_engine_num is {}, but got {}!'.format(
79+
trt_engine_size, trt_engine_num))
80+
self.assertTrue(paddle_op_size == paddle_op_num,
81+
'paddle_op_num is {}, but got {}!'.format(
82+
paddle_op_size, paddle_op_num))
83+
6484
def assert_tensors_near(self,
6585
threshold: float,
6686
tensors: List[Dict[str, np.array]]):
@@ -73,9 +93,15 @@ def assert_tensors_near(self,
7393
first[key], arr, atol=threshold),
7494
"Output has diff between GPU and TensorRT. ")
7595

76-
def run_test(self):
96+
def run_test(self,
97+
trt_engine_num: int,
98+
paddle_op_num: int,
99+
threshold=1e-5,
100+
quant=False):
77101
for prog_config in self.sample_program_configs():
78102
model, params = create_fake_model(prog_config)
103+
if quant:
104+
model, params = create_quant_model(model, params)
79105
for batch_size in self.batch_size_set:
80106
feed_data = {}
81107
for name, tensor_config in prog_config.inputs.items():
@@ -88,5 +114,5 @@ def run_test(self):
88114
results.append(
89115
self.run_test_config(model, params, prog_config,
90116
pred_config, feed_data))
91-
self.assert_tensors_near(
92-
threshold=self.threshold, tensors=results)
117+
self.assert_tensors_near(threshold=threshold, tensors=results)
118+
self.assert_op_size(trt_engine_num, paddle_op_num)

python/paddle/fluid/tests/unittests/ir/inference/program_config.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
from paddle.fluid.initializer import NumpyArrayInitializer
2222
from paddle.fluid.framework import convert_np_dtype_to_dtype_
2323

24+
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
25+
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
26+
from paddle.fluid.framework import IrGraph, IrNode, Operator
27+
from paddle.fluid.executor import global_scope
28+
2429

2530
class TensorConfig:
2631
'''
@@ -160,3 +165,181 @@ def create_fake_model(program_config):
160165
executor.run(util_program)
161166
params = scope.find_var("out_var_0").get_bytes()
162167
return model, params
168+
169+
170+
def create_quant_model(model,
171+
params,
172+
activation_quantize_type='moving_average_abs_max',
173+
weight_quantize_type='channel_wise_abs_max',
174+
save=False):
175+
place = paddle.CUDAPlace(0)
176+
scope = global_scope()
177+
exe = paddle.static.Executor(place)
178+
[inference_program, feed_target_names,
179+
fetch_targets] = paddle.static.load_inference_model(
180+
path_prefix=None,
181+
executor=exe,
182+
model_filename=model,
183+
params_filename=params)
184+
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
185+
186+
transform_pass = QuantizationTransformPass(
187+
scope=scope,
188+
place=place,
189+
activation_quantize_type=activation_quantize_type,
190+
weight_quantize_type=weight_quantize_type)
191+
transform_pass.apply(graph)
192+
193+
out_scale_op_list = [
194+
"conv2d",
195+
"depthwise_conv2d",
196+
"mul",
197+
"matmul",
198+
"relu",
199+
"leaky_relu",
200+
"relu6",
201+
"sigmoid",
202+
"tanh",
203+
"prelu",
204+
"swish",
205+
"softmax",
206+
"batch_norm",
207+
"layer_norm",
208+
"elementwise_add",
209+
"pool2d",
210+
"reshape2",
211+
"transpose2",
212+
"concat",
213+
"elementwise_mul",
214+
"scale",
215+
"slice",
216+
"hard_swish",
217+
"hard_sigmoid",
218+
"conv2d_transpose",
219+
"gru",
220+
"bilinear_interp",
221+
"nearest_interp",
222+
"trilinear_interp",
223+
"flatten",
224+
"flatten2",
225+
"transpose",
226+
"pad2d",
227+
"reshape",
228+
"layer_norm",
229+
]
230+
op_real_in_out_name = {
231+
"conv2d": [["Input", "Filter"], ["Output"]],
232+
"depthwise_conv2d": [["Input", "Filter"], ["Output"]],
233+
"conv2d_transpose": [["Input", "Filter"], ["Output"]],
234+
"mul": [["X", "Y"], ["Out"]],
235+
"matmul": [["X", "Y"], ["Out"]],
236+
"pool2d": [["X"], ["Out"]],
237+
"elementwise_add": [["X", "Y"], ["Out"]],
238+
"concat": [["X"], ["Out"]],
239+
"softmax": [["X"], ["Out"]],
240+
"argmax": [["X"], ["Out"]],
241+
"transpose": [["X"], ["Out"]],
242+
"equal": [["X", "Y"], ["Out"]],
243+
"gather": [["X"], ["Out"]],
244+
"greater_equal": [["X", "Y"], ["Out"]],
245+
"greater_than": [["X", "Y"], ["Out"]],
246+
"less_equal": [["X", "Y"], ["Out"]],
247+
"less_than": [["X", "Y"], ["Out"]],
248+
"mean": [["X"], ["Out"]],
249+
"not_equal": [["X", "Y"], ["Out"]],
250+
"reshape": [["X"], ["Out"]],
251+
"reshape2": [["X"], ["Out"]],
252+
"transpose2": [["X"], ["Out"]],
253+
"bilinear_interp": [["X"], ["Out"]],
254+
"nearest_interp": [["X"], ["Out"]],
255+
"trilinear_interp": [["X"], ["Out"]],
256+
"slice": [["Input"], ["Out"]],
257+
"squeeze": [["X"], ["Out"]],
258+
"elementwise_sub": [["X", "Y"], ["Out"]],
259+
"relu": [["X"], ["Out"]],
260+
"relu6": [["X"], ["Out"]],
261+
"leaky_relu": [["X"], ["Out"]],
262+
"prelu": [["X"], ["Out"]],
263+
"tanh": [["X"], ["Out"]],
264+
"swish": [["X"], ["Out"]],
265+
"dropout": [["X"], ["Out"]],
266+
"batch_norm": [["X"], ["Y"]],
267+
"layer_norm": [["X"], ["Y"]],
268+
"sigmoid": [["X"], ["Out"]],
269+
"elementwise_mul": [["X", "Y"], ["Out"]],
270+
"scale": [["X"], ["Out"]],
271+
"hard_swish": [["X"], ["Out"]],
272+
"hard_sigmoid": [["X"], ["Out"]],
273+
"gru": [["Input", "Weight"], ["Hidden"]],
274+
"lstm": [["Input", "Weight"], ["Hidden"]],
275+
"pad2d": [["X"], ["Out"]],
276+
"flatten": [["X"], ["Out"]],
277+
"flatten2": [["X"], ["Out"]],
278+
}
279+
280+
def _get_op_output_var_names(op):
281+
""" """
282+
assert isinstance(op, (IrNode, Operator)), \
283+
"The input op should be IrNode or Operator."
284+
var_names = []
285+
op_name = op.name() if isinstance(op, IrNode) \
286+
else op.type
287+
if op_name not in op_real_in_out_name:
288+
return []
289+
290+
name_list = op_real_in_out_name[op_name][1]
291+
for name in name_list:
292+
var_name = op.output(name)
293+
if isinstance(var_name, list):
294+
var_names.extend(var_name)
295+
else:
296+
var_names.append(var_name)
297+
return var_names
298+
299+
op_nodes = graph.all_op_nodes()
300+
for op_node in op_nodes:
301+
if op_node.name() in out_scale_op_list:
302+
var_names = _get_op_output_var_names(op_node)
303+
for var_name in var_names:
304+
in_node = graph._find_node_by_name(op_node.outputs, var_name)
305+
if in_node.dtype() not in \
306+
[core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]:
307+
continue
308+
309+
op_node.op()._set_attr("out_threshold", 3.0)
310+
311+
# Freeze graph for inference, but the weight of fc/conv is still float type.
312+
freeze_pass = QuantizationFreezePass(
313+
scope=scope, place=place, weight_quantize_type=weight_quantize_type)
314+
freeze_pass.apply(graph)
315+
316+
main_program = graph.to_program()
317+
318+
# modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales)
319+
op_nodes = graph.all_op_nodes()
320+
for op_node in op_nodes:
321+
if op_node.name() == 'fake_quantize_moving_average_abs_max':
322+
var_name = op_node.input("InScale")[0]
323+
tensor = scope.var(var_name).get_tensor()
324+
tensor.set(np.array([1], dtype=np.float32), place)
325+
elif op_node.name() == 'fake_channel_wise_dequantize_max_abs':
326+
var_name = op_node.input("Scales")[0]
327+
tensor = scope.var(var_name).get_tensor()
328+
tensor.set(np.ones(tensor.shape(), dtype=np.float32), place)
329+
330+
if save:
331+
fluid.io.save_inference_model(
332+
'test_inference_model',
333+
feed_target_names,
334+
fetch_targets,
335+
exe,
336+
main_program=main_program)
337+
338+
feed_vars = [
339+
main_program.global_block().var(name) for name in feed_target_names
340+
]
341+
serialized_program = paddle.static.serialize_program(
342+
feed_vars, fetch_targets, program=main_program)
343+
serialized_params = paddle.static.serialize_persistables(
344+
feed_vars, fetch_targets, executor=exe, program=main_program)
345+
return serialized_program, serialized_params

python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from trt_layer_auto_scan_test import TrtLayerAutoScanTest
1616
from program_config import TensorConfig
1717
import numpy as np
18+
import paddle.inference as paddle_infer
1819

1920

2021
class TrtConvertConv2dTest(TrtLayerAutoScanTest):
@@ -59,8 +60,33 @@ def update_program_input_and_weight_with_attr(self, op_attr_list):
5960
self.program_inputs = {"input_data": input_data}
6061
self.program_outputs = ["relu_output_data"]
6162

62-
def test_check_output(self):
63-
self.run_test()
63+
def test_check_fp32_output(self):
64+
self.trt_param.precision == paddle_infer.PrecisionType.Float32
65+
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
66+
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
67+
68+
def test_check_fp16_output(self):
69+
self.trt_param.precision == paddle_infer.PrecisionType.Half
70+
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
71+
72+
def test_dynamic_shape_fp32_check_output(self):
73+
self.trt_param.precision = paddle_infer.PrecisionType.Float32
74+
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
75+
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
76+
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
77+
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
78+
79+
def test_dynamic_shape_fp16_check_output(self):
80+
self.trt_param.precision = paddle_infer.PrecisionType.Half
81+
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
82+
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
83+
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
84+
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
85+
86+
def test_trt_int8_check_output(self):
87+
self.trt_param.precision = paddle_infer.PrecisionType.Int8
88+
self.run_test(
89+
trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1)
6490

6591

6692
if __name__ == "__main__":

0 commit comments

Comments
 (0)