Skip to content

Commit cffa15c

Browse files
authored
Tile supported (#34388)
* tile op * more uts * disable tile if trt6.0 * typo * fix timeout issue * opteller * opteller remove duplicate code * comments. test=document_fix * modify PADDLE_ENFORCE. * fix reduce_mean issue
1 parent e958316 commit cffa15c

File tree

6 files changed

+223
-1
lines changed

6 files changed

+223
-1
lines changed

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,7 @@ USE_TRT_CONVERTER(reshape);
12561256
USE_TRT_CONVERTER(reduce_sum);
12571257
USE_TRT_CONVERTER(gather_nd);
12581258
USE_TRT_CONVERTER(reduce_mean);
1259+
USE_TRT_CONVERTER(tile);
12591260
#endif
12601261

12611262
namespace paddle_infer {

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ nv_library(tensorrt_converter
1515
reshape_op.cc
1616
reduce_op.cc
1717
gather_nd_op.cc
18+
tile_op.cc
1819
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
1920

2021
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
13+
14+
namespace paddle {
15+
namespace framework {
16+
class Scope;
17+
namespace proto {
18+
class OpDesc;
19+
} // namespace proto
20+
} // namespace framework
21+
} // namespace paddle
22+
23+
namespace paddle {
24+
namespace inference {
25+
namespace tensorrt {
26+
27+
/*
28+
* ReshapeOp
29+
*/
30+
class TileOpConverter : public OpConverter {
31+
public:
32+
void operator()(const framework::proto::OpDesc& op,
33+
const framework::Scope& scope, bool test_mode) override {
34+
#if IS_TRT_VERSION_GE(7000)
35+
VLOG(4) << "convert a fluid tile op to tensorrt tile layer";
36+
37+
framework::OpDesc op_desc(op, nullptr);
38+
// Declare inputs
39+
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
40+
nvinfer1::Dims input_shape = input->getDimensions();
41+
std::vector<int> repeat_times =
42+
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("repeat_times"));
43+
44+
nvinfer1::Dims output_dim = input_shape;
45+
nvinfer1::Dims output_stride;
46+
// If input_dims.nbDims + 1 < repeat_times.size() means we
47+
// should expand 1 on batchsize. trt doesn't support this behavior.
48+
PADDLE_ENFORCE_GE(input_shape.nbDims + 1, repeat_times.size(),
49+
platform::errors::InvalidArgument(
50+
"Can't change batchsize, please check repeat_times"));
51+
int diff = input_shape.nbDims + 1 - repeat_times.size();
52+
if (diff > 0) repeat_times.insert(repeat_times.begin(), diff, 1);
53+
54+
// Can't expand on batchsize
55+
PADDLE_ENFORCE_EQ(
56+
repeat_times[0], 1,
57+
platform::errors::InvalidArgument(
58+
"Can't expand on batchsize, please check repeat_times"));
59+
output_stride.nbDims = input_shape.nbDims;
60+
for (int i = 0; i < input_shape.nbDims; i++) {
61+
output_dim.d[i] = output_dim.d[i] * repeat_times[i + 1];
62+
output_stride.d[i] = 1;
63+
}
64+
65+
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, input_shape,
66+
output_dim, output_stride);
67+
layer->setMode(nvinfer1::SliceMode::kWRAP);
68+
auto output_name = op_desc.Output("Out")[0];
69+
RreplenishLayerAndOutput(layer, "tile", {output_name}, test_mode);
70+
#endif
71+
}
72+
};
73+
74+
} // namespace tensorrt
75+
} // namespace inference
76+
} // namespace paddle
77+
78+
REGISTER_TRT_OP_CONVERTER(tile, TileOpConverter);

paddle/fluid/inference/tensorrt/op_teller.cc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ struct SimpleOpTypeSetTeller : public Teller {
5151
#if IS_TRT_VERSION_GE(7130)
5252
teller_set.insert("group_norm");
5353
#endif
54+
#if IS_TRT_VERSION_GE(7000)
55+
teller_set.insert("tile");
56+
#endif
5457
#if CUDA_VERSION >= 10020
5558
teller_set.insert("reshape");
5659
teller_set.insert("reshape2");
@@ -716,19 +719,36 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
716719
VLOG(3) << "the " << op_type
717720
<< " does not have attr (keep_dim or dim or "
718721
"reduce_all)";
722+
std::cout << "attr " << desc.HasAttr("keep_dim") << " "
723+
<< desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all");
719724
return false;
720725
}
721726

722727
// The batch size dimension cannot be reduced if it's not dynamic shape.
723728
if (!with_dynamic_shape) {
724-
if (desc.HasAttr("reduce_all")) return false;
729+
if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false;
725730
std::vector<int32_t> dim =
726731
BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
727732
for (auto x : dim) {
728733
if (!x) return false;
729734
}
730735
}
731736
}
737+
#if IS_TRT_VERSION_GE(7000)
738+
if (op_type == "tile") {
739+
// Paddle-TRT does not support the input tensors.
740+
auto inputs = desc.InputArgumentNames();
741+
for (auto& input : inputs) {
742+
if (input == "repeat_times_tensor" &&
743+
desc.Input("repeat_times_tensor").size() > 0)
744+
return false;
745+
if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0)
746+
return false;
747+
}
748+
if (with_dynamic_shape) return false;
749+
if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false;
750+
}
751+
#endif
732752

733753
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
734754
}

python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
3737
set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120)
3838
set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45)
3939
set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
40+
set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60)
4041
endif()
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
import unittest
18+
import numpy as np
19+
from inference_pass_test import InferencePassTest
20+
import paddle
21+
import paddle.fluid as fluid
22+
import paddle.fluid.core as core
23+
from paddle.fluid.core import PassVersionChecker
24+
from paddle.fluid.core import AnalysisConfig
25+
26+
27+
class TRTTileTest(InferencePassTest):
28+
def setUp(self):
29+
with fluid.program_guard(self.main_program, self.startup_program):
30+
data = fluid.data(
31+
name="data", shape=[4, 3, 224, 256], dtype="float32")
32+
tile_out = paddle.tile(x=data, repeat_times=[1, 1, 1, 1])
33+
out = fluid.layers.batch_norm(tile_out, is_test=True)
34+
35+
self.feeds = {
36+
"data": np.random.random([4, 3, 224, 256]).astype("float32"),
37+
}
38+
self.enable_trt = True
39+
self.trt_parameters = TRTTileTest.TensorRTParam(
40+
1 << 30, 16, 1, AnalysisConfig.Precision.Float32, False, False)
41+
self.fetch_list = [out]
42+
43+
def test_check_output(self):
44+
if core.is_compiled_with_cuda():
45+
use_gpu = True
46+
self.check_output_with_option(use_gpu, flatten=True)
47+
self.assertTrue(
48+
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
49+
50+
51+
class TRTTileExpandTest(InferencePassTest):
52+
def setUp(self):
53+
with fluid.program_guard(self.main_program, self.startup_program):
54+
data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
55+
tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
56+
out = fluid.layers.batch_norm(tile_out, is_test=True)
57+
58+
self.feeds = {
59+
"data": np.random.random([1, 1, 1, 1]).astype("float32"),
60+
}
61+
self.enable_trt = True
62+
self.trt_parameters = TRTTileExpandTest.TensorRTParam(
63+
1 << 30, 1, 1, AnalysisConfig.Precision.Float32, False, False)
64+
self.fetch_list = [out]
65+
66+
def test_check_output(self):
67+
if core.is_compiled_with_cuda():
68+
use_gpu = True
69+
self.check_output_with_option(use_gpu, flatten=True)
70+
self.assertTrue(
71+
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
72+
73+
74+
class TRTTileExpandStaticTest(InferencePassTest):
75+
def setUp(self):
76+
with fluid.program_guard(self.main_program, self.startup_program):
77+
data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
78+
tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
79+
out = fluid.layers.batch_norm(tile_out, is_test=True)
80+
81+
self.feeds = {
82+
"data": np.random.random([1, 1, 1, 1]).astype("float32"),
83+
}
84+
self.enable_trt = True
85+
self.trt_parameters = TRTTileExpandStaticTest.TensorRTParam(
86+
1 << 30, 1, 1, AnalysisConfig.Precision.Float32, True, False)
87+
self.fetch_list = [out]
88+
89+
def test_check_output(self):
90+
if core.is_compiled_with_cuda():
91+
use_gpu = True
92+
self.check_output_with_option(use_gpu, flatten=True)
93+
self.assertTrue(
94+
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
95+
96+
97+
class TRTTileExpandHalfTest(InferencePassTest):
98+
def setUp(self):
99+
with fluid.program_guard(self.main_program, self.startup_program):
100+
data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
101+
tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
102+
out = fluid.layers.batch_norm(tile_out, is_test=True)
103+
104+
self.feeds = {
105+
"data": np.random.random([1, 1, 1, 1]).astype("float32"),
106+
}
107+
self.enable_trt = True
108+
self.trt_parameters = TRTTileExpandHalfTest.TensorRTParam(
109+
1 << 30, 1, 1, AnalysisConfig.Precision.Half, False, False)
110+
self.fetch_list = [out]
111+
112+
def test_check_output(self):
113+
if core.is_compiled_with_cuda():
114+
use_gpu = True
115+
self.check_output_with_option(use_gpu, flatten=True)
116+
self.assertTrue(
117+
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
118+
119+
120+
if __name__ == "__main__":
121+
unittest.main()

0 commit comments

Comments
 (0)